blob: 05dc450af4416c1dcb775bfb54dfcd858a7edda4 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Joe Perchesf3213832012-05-15 14:11:53 +000027#define pr_fmt(fmt) "IPv6: " fmt
28
Randy Dunlap4fc268d2006-01-11 12:17:47 -080029#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/errno.h>
Paul Gortmakerbc3b2d72011-07-15 11:47:34 -040031#include <linux/export.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090040#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070042#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070043#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080045#include <linux/nsproxy.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090046#include <linux/slab.h>
Wei Wang35732d02017-10-06 12:05:57 -070047#include <linux/jhash.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020048#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070049#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
Jiri Benc904af042015-08-20 13:56:31 +020058#include <net/dst_metadata.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070059#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070060#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070061#include <net/netlink.h>
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +000062#include <net/nexthop.h>
Roopa Prabhu19e42e42015-07-21 10:43:48 +020063#include <net/lwtunnel.h>
Jiri Benc904af042015-08-20 13:56:31 +020064#include <net/ip_tunnels.h>
David Ahernca254492015-10-12 11:47:10 -070065#include <net/l3mdev.h>
David Ahernb8115802015-11-19 12:24:22 -080066#include <trace/events/fib6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070067
Linus Torvalds7c0f6ba2016-12-24 11:46:01 -080068#include <linux/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070069
70#ifdef CONFIG_SYSCTL
71#include <linux/sysctl.h>
72#endif
73
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +020074enum rt6_nud_state {
Jiri Benc7e980562013-12-11 13:48:20 +010075 RT6_NUD_FAIL_HARD = -3,
76 RT6_NUD_FAIL_PROBE = -2,
77 RT6_NUD_FAIL_DO_RR = -1,
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +020078 RT6_NUD_SUCCEED = 1
79};
80
Martin KaFai Lau83a09ab2015-05-22 20:56:05 -070081static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
Linus Torvalds1da177e2005-04-16 15:20:36 -070082static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -080083static unsigned int ip6_default_advmss(const struct dst_entry *dst);
Steffen Klassertebb762f2011-11-23 02:12:51 +000084static unsigned int ip6_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -070085static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86static void ip6_dst_destroy(struct dst_entry *);
87static void ip6_dst_ifdown(struct dst_entry *,
88 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080089static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070090
91static int ip6_pkt_discard(struct sk_buff *skb);
Eric W. Biedermanede20592015-10-07 16:48:47 -050092static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
Kamala R7150aed2013-12-02 19:55:21 +053093static int ip6_pkt_prohibit(struct sk_buff *skb);
Eric W. Biedermanede20592015-10-07 16:48:47 -050094static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -070095static void ip6_link_failure(struct sk_buff *skb);
David S. Miller6700c272012-07-17 03:29:28 -070096static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
97 struct sk_buff *skb, u32 mtu);
98static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
99 struct sk_buff *skb);
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -0700100static void rt6_dst_from_metrics_check(struct rt6_info *rt);
Nicolas Dichtel52bd4c02013-06-28 17:35:48 +0200101static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
David Ahern16a16cd2017-02-02 12:37:11 -0800102static size_t rt6_nlmsg_size(struct rt6_info *rt);
103static int rt6_fill_node(struct net *net,
104 struct sk_buff *skb, struct rt6_info *rt,
105 struct in6_addr *dst, struct in6_addr *src,
106 int iif, int type, u32 portid, u32 seq,
107 unsigned int flags);
Wei Wang35732d02017-10-06 12:05:57 -0700108static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
109 struct in6_addr *daddr,
110 struct in6_addr *saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800112#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800113static struct rt6_info *rt6_add_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000114 const struct in6_addr *prefix, int prefixlen,
David Ahern830218c2016-10-24 10:52:35 -0700115 const struct in6_addr *gwaddr,
116 struct net_device *dev,
Eric Dumazet95c96172012-04-15 05:58:06 +0000117 unsigned int pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800118static struct rt6_info *rt6_get_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000119 const struct in6_addr *prefix, int prefixlen,
David Ahern830218c2016-10-24 10:52:35 -0700120 const struct in6_addr *gwaddr,
121 struct net_device *dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800122#endif
123
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700124struct uncached_list {
125 spinlock_t lock;
126 struct list_head head;
127};
128
129static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
130
131static void rt6_uncached_list_add(struct rt6_info *rt)
132{
133 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
134
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700135 rt->rt6i_uncached_list = ul;
136
137 spin_lock_bh(&ul->lock);
138 list_add_tail(&rt->rt6i_uncached, &ul->head);
139 spin_unlock_bh(&ul->lock);
140}
141
142static void rt6_uncached_list_del(struct rt6_info *rt)
143{
144 if (!list_empty(&rt->rt6i_uncached)) {
145 struct uncached_list *ul = rt->rt6i_uncached_list;
146
147 spin_lock_bh(&ul->lock);
148 list_del(&rt->rt6i_uncached);
149 spin_unlock_bh(&ul->lock);
150 }
151}
152
153static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
154{
155 struct net_device *loopback_dev = net->loopback_dev;
156 int cpu;
157
Eric W. Biedermane332bc62015-10-12 11:02:08 -0500158 if (dev == loopback_dev)
159 return;
160
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700161 for_each_possible_cpu(cpu) {
162 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
163 struct rt6_info *rt;
164
165 spin_lock_bh(&ul->lock);
166 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
167 struct inet6_dev *rt_idev = rt->rt6i_idev;
168 struct net_device *rt_dev = rt->dst.dev;
169
Eric W. Biedermane332bc62015-10-12 11:02:08 -0500170 if (rt_idev->dev == dev) {
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700171 rt->rt6i_idev = in6_dev_get(loopback_dev);
172 in6_dev_put(rt_idev);
173 }
174
Eric W. Biedermane332bc62015-10-12 11:02:08 -0500175 if (rt_dev == dev) {
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700176 rt->dst.dev = loopback_dev;
177 dev_hold(rt->dst.dev);
178 dev_put(rt_dev);
179 }
180 }
181 spin_unlock_bh(&ul->lock);
182 }
183}
184
Martin KaFai Laud52d3992015-05-22 20:56:06 -0700185static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
186{
187 return dst_metrics_write_ptr(rt->dst.from);
188}
189
David S. Miller06582542011-01-27 14:58:42 -0800190static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
191{
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -0700192 struct rt6_info *rt = (struct rt6_info *)dst;
David S. Miller06582542011-01-27 14:58:42 -0800193
Martin KaFai Laud52d3992015-05-22 20:56:06 -0700194 if (rt->rt6i_flags & RTF_PCPU)
195 return rt6_pcpu_cow_metrics(rt);
196 else if (rt->rt6i_flags & RTF_CACHE)
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -0700197 return NULL;
198 else
Martin KaFai Lau3b471172015-02-12 16:14:08 -0800199 return dst_cow_metrics_generic(dst, old);
David S. Miller06582542011-01-27 14:58:42 -0800200}
201
David S. Millerf894cbf2012-07-02 21:52:24 -0700202static inline const void *choose_neigh_daddr(struct rt6_info *rt,
203 struct sk_buff *skb,
204 const void *daddr)
David S. Miller39232972012-01-26 15:22:32 -0500205{
206 struct in6_addr *p = &rt->rt6i_gateway;
207
David S. Millera7563f32012-01-26 16:29:16 -0500208 if (!ipv6_addr_any(p))
David S. Miller39232972012-01-26 15:22:32 -0500209 return (const void *) p;
David S. Millerf894cbf2012-07-02 21:52:24 -0700210 else if (skb)
211 return &ipv6_hdr(skb)->daddr;
David S. Miller39232972012-01-26 15:22:32 -0500212 return daddr;
213}
214
David S. Millerf894cbf2012-07-02 21:52:24 -0700215static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
216 struct sk_buff *skb,
217 const void *daddr)
David S. Millerd3aaeb32011-07-18 00:40:17 -0700218{
David S. Miller39232972012-01-26 15:22:32 -0500219 struct rt6_info *rt = (struct rt6_info *) dst;
220 struct neighbour *n;
221
David S. Millerf894cbf2012-07-02 21:52:24 -0700222 daddr = choose_neigh_daddr(rt, skb, daddr);
YOSHIFUJI Hideaki / 吉藤英明8e022ee2013-01-17 12:53:09 +0000223 n = __ipv6_neigh_lookup(dst->dev, daddr);
David S. Millerf83c7792011-12-28 15:41:23 -0500224 if (n)
225 return n;
226 return neigh_create(&nd_tbl, daddr, dst->dev);
227}
228
Julian Anastasov63fca652017-02-06 23:14:15 +0200229static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
230{
231 struct net_device *dev = dst->dev;
232 struct rt6_info *rt = (struct rt6_info *)dst;
233
234 daddr = choose_neigh_daddr(rt, NULL, daddr);
235 if (!daddr)
236 return;
237 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
238 return;
239 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
240 return;
241 __ipv6_confirm_neigh(dev, daddr);
242}
243
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -0800244static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245 .family = AF_INET6,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246 .gc = ip6_dst_gc,
247 .gc_thresh = 1024,
248 .check = ip6_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800249 .default_advmss = ip6_default_advmss,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000250 .mtu = ip6_mtu,
David S. Miller06582542011-01-27 14:58:42 -0800251 .cow_metrics = ipv6_cow_metrics,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252 .destroy = ip6_dst_destroy,
253 .ifdown = ip6_dst_ifdown,
254 .negative_advice = ip6_negative_advice,
255 .link_failure = ip6_link_failure,
256 .update_pmtu = ip6_rt_update_pmtu,
David S. Miller6e157b62012-07-12 00:05:02 -0700257 .redirect = rt6_do_redirect,
Eric W. Biederman9f8955c2015-10-07 16:48:39 -0500258 .local_out = __ip6_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700259 .neigh_lookup = ip6_neigh_lookup,
Julian Anastasov63fca652017-02-06 23:14:15 +0200260 .confirm_neigh = ip6_confirm_neigh,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261};
262
Steffen Klassertebb762f2011-11-23 02:12:51 +0000263static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
Roland Dreierec831ea2011-01-31 13:16:00 -0800264{
Steffen Klassert618f9bc2011-11-23 02:13:31 +0000265 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
266
267 return mtu ? : dst->dev->mtu;
Roland Dreierec831ea2011-01-31 13:16:00 -0800268}
269
David S. Miller6700c272012-07-17 03:29:28 -0700270static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
271 struct sk_buff *skb, u32 mtu)
David S. Miller14e50e52007-05-24 18:17:54 -0700272{
273}
274
David S. Miller6700c272012-07-17 03:29:28 -0700275static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
276 struct sk_buff *skb)
David S. Millerb587ee32012-07-12 00:39:24 -0700277{
278}
279
David S. Miller14e50e52007-05-24 18:17:54 -0700280static struct dst_ops ip6_dst_blackhole_ops = {
281 .family = AF_INET6,
David S. Miller14e50e52007-05-24 18:17:54 -0700282 .destroy = ip6_dst_destroy,
283 .check = ip6_dst_check,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000284 .mtu = ip6_blackhole_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -0800285 .default_advmss = ip6_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -0700286 .update_pmtu = ip6_rt_blackhole_update_pmtu,
David S. Millerb587ee32012-07-12 00:39:24 -0700287 .redirect = ip6_rt_blackhole_redirect,
Martin KaFai Lau0a1f5962015-10-15 16:39:58 -0700288 .cow_metrics = dst_cow_metrics_generic,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700289 .neigh_lookup = ip6_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -0700290};
291
David S. Miller62fa8a82011-01-26 20:51:05 -0800292static const u32 ip6_template_metrics[RTAX_MAX] = {
Li RongQing14edd872012-10-24 14:01:18 +0800293 [RTAX_HOPLIMIT - 1] = 0,
David S. Miller62fa8a82011-01-26 20:51:05 -0800294};
295
Eric Dumazetfb0af4c2012-09-11 21:47:51 +0000296static const struct rt6_info ip6_null_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700297 .dst = {
298 .__refcnt = ATOMIC_INIT(1),
299 .__use = 1,
Nicolas Dichtel2c20cbd2012-09-10 22:09:47 +0000300 .obsolete = DST_OBSOLETE_FORCE_CHK,
Changli Gaod8d1f302010-06-10 23:31:35 -0700301 .error = -ENETUNREACH,
Changli Gaod8d1f302010-06-10 23:31:35 -0700302 .input = ip6_pkt_discard,
303 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304 },
305 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700306 .rt6i_protocol = RTPROT_KERNEL,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307 .rt6i_metric = ~(u32) 0,
308 .rt6i_ref = ATOMIC_INIT(1),
309};
310
Thomas Graf101367c2006-08-04 03:39:02 -0700311#ifdef CONFIG_IPV6_MULTIPLE_TABLES
312
Eric Dumazetfb0af4c2012-09-11 21:47:51 +0000313static const struct rt6_info ip6_prohibit_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700314 .dst = {
315 .__refcnt = ATOMIC_INIT(1),
316 .__use = 1,
Nicolas Dichtel2c20cbd2012-09-10 22:09:47 +0000317 .obsolete = DST_OBSOLETE_FORCE_CHK,
Changli Gaod8d1f302010-06-10 23:31:35 -0700318 .error = -EACCES,
Changli Gaod8d1f302010-06-10 23:31:35 -0700319 .input = ip6_pkt_prohibit,
320 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700321 },
322 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700323 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700324 .rt6i_metric = ~(u32) 0,
325 .rt6i_ref = ATOMIC_INIT(1),
326};
327
Eric Dumazetfb0af4c2012-09-11 21:47:51 +0000328static const struct rt6_info ip6_blk_hole_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700329 .dst = {
330 .__refcnt = ATOMIC_INIT(1),
331 .__use = 1,
Nicolas Dichtel2c20cbd2012-09-10 22:09:47 +0000332 .obsolete = DST_OBSOLETE_FORCE_CHK,
Changli Gaod8d1f302010-06-10 23:31:35 -0700333 .error = -EINVAL,
Changli Gaod8d1f302010-06-10 23:31:35 -0700334 .input = dst_discard,
Eric W. Biedermanede20592015-10-07 16:48:47 -0500335 .output = dst_discard_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700336 },
337 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700338 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700339 .rt6i_metric = ~(u32) 0,
340 .rt6i_ref = ATOMIC_INIT(1),
341};
342
343#endif
344
Martin KaFai Lauebfa45f2015-10-15 16:39:57 -0700345static void rt6_info_init(struct rt6_info *rt)
346{
347 struct dst_entry *dst = &rt->dst;
348
349 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
350 INIT_LIST_HEAD(&rt->rt6i_siblings);
351 INIT_LIST_HEAD(&rt->rt6i_uncached);
352}
353
Linus Torvalds1da177e2005-04-16 15:20:36 -0700354/* allocate dst with ip6_dst_ops */
Martin KaFai Laud52d3992015-05-22 20:56:06 -0700355static struct rt6_info *__ip6_dst_alloc(struct net *net,
356 struct net_device *dev,
Martin KaFai Lauad706862015-08-14 11:05:52 -0700357 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358{
David S. Miller97bab732012-06-09 22:36:36 -0700359 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
Wei Wangb2a9c0e2017-06-17 10:42:41 -0700360 1, DST_OBSOLETE_FORCE_CHK, flags);
David S. Millercf911662011-04-28 14:31:47 -0700361
Martin KaFai Lauebfa45f2015-10-15 16:39:57 -0700362 if (rt)
363 rt6_info_init(rt);
Steffen Klassert81048912012-07-05 23:37:09 +0000364
David S. Millercf911662011-04-28 14:31:47 -0700365 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700366}
367
David Ahern9ab179d2016-04-07 11:10:06 -0700368struct rt6_info *ip6_dst_alloc(struct net *net,
369 struct net_device *dev,
370 int flags)
Martin KaFai Laud52d3992015-05-22 20:56:06 -0700371{
Martin KaFai Lauad706862015-08-14 11:05:52 -0700372 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
Martin KaFai Laud52d3992015-05-22 20:56:06 -0700373
374 if (rt) {
375 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
376 if (rt->rt6i_pcpu) {
377 int cpu;
378
379 for_each_possible_cpu(cpu) {
380 struct rt6_info **p;
381
382 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
383 /* no one shares rt */
384 *p = NULL;
385 }
386 } else {
Wei Wang587fea72017-06-17 10:42:36 -0700387 dst_release_immediate(&rt->dst);
Martin KaFai Laud52d3992015-05-22 20:56:06 -0700388 return NULL;
389 }
390 }
391
392 return rt;
393}
David Ahern9ab179d2016-04-07 11:10:06 -0700394EXPORT_SYMBOL(ip6_dst_alloc);
Martin KaFai Laud52d3992015-05-22 20:56:06 -0700395
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396static void ip6_dst_destroy(struct dst_entry *dst)
397{
398 struct rt6_info *rt = (struct rt6_info *)dst;
Wei Wang35732d02017-10-06 12:05:57 -0700399 struct rt6_exception_bucket *bucket;
YOSHIFUJI Hideaki / 吉藤英明ecd98832013-02-20 00:29:08 +0000400 struct dst_entry *from = dst->from;
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700401 struct inet6_dev *idev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -0700403 dst_destroy_metrics_generic(dst);
Markus Elfring87775312015-07-02 16:30:24 +0200404 free_percpu(rt->rt6i_pcpu);
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700405 rt6_uncached_list_del(rt);
406
407 idev = rt->rt6i_idev;
David S. Miller38308472011-12-03 18:02:47 -0500408 if (idev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409 rt->rt6i_idev = NULL;
410 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900411 }
Wei Wang35732d02017-10-06 12:05:57 -0700412 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
413 if (bucket) {
414 rt->rt6i_exception_bucket = NULL;
415 kfree(bucket);
416 }
Gao feng1716a962012-04-06 00:13:10 +0000417
YOSHIFUJI Hideaki / 吉藤英明ecd98832013-02-20 00:29:08 +0000418 dst->from = NULL;
419 dst_release(from);
David S. Millerb3419362010-11-30 12:27:11 -0800420}
421
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
423 int how)
424{
425 struct rt6_info *rt = (struct rt6_info *)dst;
426 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800427 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900428 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429
Wei Wange5645f52017-08-14 10:44:59 -0700430 if (idev && idev->dev != loopback_dev) {
431 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
432 if (loopback_idev) {
433 rt->rt6i_idev = loopback_idev;
434 in6_dev_put(idev);
David S. Miller97cac082012-07-02 22:43:47 -0700435 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 }
437}
438
Martin KaFai Lau5973fb12015-11-11 11:51:07 -0800439static bool __rt6_check_expired(const struct rt6_info *rt)
440{
441 if (rt->rt6i_flags & RTF_EXPIRES)
442 return time_after(jiffies, rt->dst.expires);
443 else
444 return false;
445}
446
Eric Dumazeta50feda2012-05-18 18:57:34 +0000447static bool rt6_check_expired(const struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448{
Gao feng1716a962012-04-06 00:13:10 +0000449 if (rt->rt6i_flags & RTF_EXPIRES) {
450 if (time_after(jiffies, rt->dst.expires))
Eric Dumazeta50feda2012-05-18 18:57:34 +0000451 return true;
Gao feng1716a962012-04-06 00:13:10 +0000452 } else if (rt->dst.from) {
Xin Long1e2ea8a2017-08-26 20:10:10 +0800453 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
454 rt6_check_expired((struct rt6_info *)rt->dst.from);
Gao feng1716a962012-04-06 00:13:10 +0000455 }
Eric Dumazeta50feda2012-05-18 18:57:34 +0000456 return false;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457}
458
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +0000459static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
Nicolas Dichtel52bd4c02013-06-28 17:35:48 +0200460 struct flowi6 *fl6, int oif,
461 int strict)
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +0000462{
463 struct rt6_info *sibling, *next_sibling;
464 int route_choosen;
465
Jakub Sitnickib673d6c2017-08-23 09:58:31 +0200466 /* We might have already computed the hash for ICMPv6 errors. In such
467 * case it will always be non-zero. Otherwise now is the time to do it.
468 */
469 if (!fl6->mp_hash)
470 fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
471
472 route_choosen = fl6->mp_hash % (match->rt6i_nsiblings + 1);
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +0000473 /* Don't change the route, if route_choosen == 0
474 * (siblings does not include ourself)
475 */
476 if (route_choosen)
477 list_for_each_entry_safe(sibling, next_sibling,
478 &match->rt6i_siblings, rt6i_siblings) {
479 route_choosen--;
480 if (route_choosen == 0) {
Nicolas Dichtel52bd4c02013-06-28 17:35:48 +0200481 if (rt6_score_route(sibling, oif, strict) < 0)
482 break;
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +0000483 match = sibling;
484 break;
485 }
486 }
487 return match;
488}
489
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700491 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492 */
493
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800494static inline struct rt6_info *rt6_device_match(struct net *net,
495 struct rt6_info *rt,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000496 const struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700497 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700498 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499{
500 struct rt6_info *local = NULL;
501 struct rt6_info *sprt;
502
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900503 if (!oif && ipv6_addr_any(saddr))
504 goto out;
505
Changli Gaod8d1f302010-06-10 23:31:35 -0700506 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
David S. Millerd1918542011-12-28 20:19:20 -0500507 struct net_device *dev = sprt->dst.dev;
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900508
509 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510 if (dev->ifindex == oif)
511 return sprt;
512 if (dev->flags & IFF_LOOPBACK) {
David S. Miller38308472011-12-03 18:02:47 -0500513 if (!sprt->rt6i_idev ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700514 sprt->rt6i_idev->dev->ifindex != oif) {
David Ahern17fb0b22015-09-25 15:22:54 -0600515 if (flags & RT6_LOOKUP_F_IFACE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516 continue;
David Ahern17fb0b22015-09-25 15:22:54 -0600517 if (local &&
518 local->rt6i_idev->dev->ifindex == oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519 continue;
520 }
521 local = sprt;
522 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900523 } else {
524 if (ipv6_chk_addr(net, saddr, dev,
525 flags & RT6_LOOKUP_F_IFACE))
526 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900528 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700529
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900530 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700531 if (local)
532 return local;
533
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700534 if (flags & RT6_LOOKUP_F_IFACE)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800535 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900537out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700538 return rt;
539}
540
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800541#ifdef CONFIG_IPV6_ROUTER_PREF
Hannes Frederic Sowac2f17e82013-10-21 06:17:15 +0200542struct __rt6_probe_work {
543 struct work_struct work;
544 struct in6_addr target;
545 struct net_device *dev;
546};
547
548static void rt6_probe_deferred(struct work_struct *w)
549{
550 struct in6_addr mcaddr;
551 struct __rt6_probe_work *work =
552 container_of(w, struct __rt6_probe_work, work);
553
554 addrconf_addr_solict_mult(&work->target, &mcaddr);
Erik Nordmarkadc176c2016-12-02 14:00:08 -0800555 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
Hannes Frederic Sowac2f17e82013-10-21 06:17:15 +0200556 dev_put(work->dev);
Michael Büsch662f5532015-02-08 10:14:07 +0100557 kfree(work);
Hannes Frederic Sowac2f17e82013-10-21 06:17:15 +0200558}
559
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800560static void rt6_probe(struct rt6_info *rt)
561{
Martin KaFai Lau990edb42015-07-24 09:57:42 -0700562 struct __rt6_probe_work *work;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000563 struct neighbour *neigh;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800564 /*
565 * Okay, this does not seem to be appropriate
566 * for now, however, we need to check if it
567 * is really so; aka Router Reachability Probing.
568 *
569 * Router Reachability Probe MUST be rate-limited
570 * to no more than one per minute.
571 */
YOSHIFUJI Hideaki / 吉藤英明2152cae2013-01-17 12:53:43 +0000572 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
Amerigo Wangfdd66812012-09-10 02:48:44 +0000573 return;
YOSHIFUJI Hideaki / 吉藤英明2152cae2013-01-17 12:53:43 +0000574 rcu_read_lock_bh();
575 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
576 if (neigh) {
Martin KaFai Lau8d6c31b2015-07-24 09:57:43 -0700577 if (neigh->nud_state & NUD_VALID)
578 goto out;
579
Martin KaFai Lau990edb42015-07-24 09:57:42 -0700580 work = NULL;
YOSHIFUJI Hideaki / 吉藤英明2152cae2013-01-17 12:53:43 +0000581 write_lock(&neigh->lock);
Martin KaFai Lau990edb42015-07-24 09:57:42 -0700582 if (!(neigh->nud_state & NUD_VALID) &&
583 time_after(jiffies,
584 neigh->updated +
585 rt->rt6i_idev->cnf.rtr_probe_interval)) {
586 work = kmalloc(sizeof(*work), GFP_ATOMIC);
587 if (work)
588 __neigh_set_probe_once(neigh);
Hannes Frederic Sowac2f17e82013-10-21 06:17:15 +0200589 }
YOSHIFUJI Hideaki / 吉藤英明2152cae2013-01-17 12:53:43 +0000590 write_unlock(&neigh->lock);
Martin KaFai Lau990edb42015-07-24 09:57:42 -0700591 } else {
592 work = kmalloc(sizeof(*work), GFP_ATOMIC);
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000593 }
Martin KaFai Lau990edb42015-07-24 09:57:42 -0700594
595 if (work) {
596 INIT_WORK(&work->work, rt6_probe_deferred);
597 work->target = rt->rt6i_gateway;
598 dev_hold(rt->dst.dev);
599 work->dev = rt->dst.dev;
600 schedule_work(&work->work);
601 }
602
Martin KaFai Lau8d6c31b2015-07-24 09:57:43 -0700603out:
YOSHIFUJI Hideaki / 吉藤英明2152cae2013-01-17 12:53:43 +0000604 rcu_read_unlock_bh();
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800605}
606#else
607static inline void rt6_probe(struct rt6_info *rt)
608{
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800609}
610#endif
611
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800613 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700615static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616{
David S. Millerd1918542011-12-28 20:19:20 -0500617 struct net_device *dev = rt->dst.dev;
David S. Miller161980f2007-04-06 11:42:27 -0700618 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800619 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700620 if ((dev->flags & IFF_LOOPBACK) &&
621 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
622 return 1;
623 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700624}
625
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200626static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627{
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000628 struct neighbour *neigh;
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200629 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000630
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700631 if (rt->rt6i_flags & RTF_NONEXTHOP ||
632 !(rt->rt6i_flags & RTF_GATEWAY))
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200633 return RT6_NUD_SUCCEED;
YOSHIFUJI Hideaki / 吉藤英明145a3622013-01-17 12:53:38 +0000634
635 rcu_read_lock_bh();
636 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
637 if (neigh) {
638 read_lock(&neigh->lock);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800639 if (neigh->nud_state & NUD_VALID)
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200640 ret = RT6_NUD_SUCCEED;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800641#ifdef CONFIG_IPV6_ROUTER_PREF
Paul Marksa5a81f02012-12-03 10:26:54 +0000642 else if (!(neigh->nud_state & NUD_FAILED))
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200643 ret = RT6_NUD_SUCCEED;
Jiri Benc7e980562013-12-11 13:48:20 +0100644 else
645 ret = RT6_NUD_FAIL_PROBE;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800646#endif
YOSHIFUJI Hideaki / 吉藤英明145a3622013-01-17 12:53:38 +0000647 read_unlock(&neigh->lock);
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200648 } else {
649 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
Jiri Benc7e980562013-12-11 13:48:20 +0100650 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
Paul Marksa5a81f02012-12-03 10:26:54 +0000651 }
YOSHIFUJI Hideaki / 吉藤英明145a3622013-01-17 12:53:38 +0000652 rcu_read_unlock_bh();
653
Paul Marksa5a81f02012-12-03 10:26:54 +0000654 return ret;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800655}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700656
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800657static int rt6_score_route(struct rt6_info *rt, int oif,
658 int strict)
659{
Paul Marksa5a81f02012-12-03 10:26:54 +0000660 int m;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900661
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700662 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700663 if (!m && (strict & RT6_LOOKUP_F_IFACE))
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200664 return RT6_NUD_FAIL_HARD;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800665#ifdef CONFIG_IPV6_ROUTER_PREF
666 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
667#endif
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200668 if (strict & RT6_LOOKUP_F_REACHABLE) {
669 int n = rt6_check_neigh(rt);
670 if (n < 0)
671 return n;
672 }
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800673 return m;
674}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700675
David S. Millerf11e6652007-03-24 20:36:25 -0700676static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200677 int *mpri, struct rt6_info *match,
678 bool *do_rr)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800679{
David S. Millerf11e6652007-03-24 20:36:25 -0700680 int m;
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200681 bool match_do_rr = false;
Andy Gospodarek35103d12015-08-13 10:39:01 -0400682 struct inet6_dev *idev = rt->rt6i_idev;
683 struct net_device *dev = rt->dst.dev;
684
685 if (dev && !netif_carrier_ok(dev) &&
David Ahernd5d32e42016-10-24 12:27:23 -0700686 idev->cnf.ignore_routes_with_linkdown &&
687 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
Andy Gospodarek35103d12015-08-13 10:39:01 -0400688 goto out;
David S. Millerf11e6652007-03-24 20:36:25 -0700689
690 if (rt6_check_expired(rt))
691 goto out;
692
693 m = rt6_score_route(rt, oif, strict);
Jiri Benc7e980562013-12-11 13:48:20 +0100694 if (m == RT6_NUD_FAIL_DO_RR) {
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200695 match_do_rr = true;
696 m = 0; /* lowest valid score */
Jiri Benc7e980562013-12-11 13:48:20 +0100697 } else if (m == RT6_NUD_FAIL_HARD) {
David S. Millerf11e6652007-03-24 20:36:25 -0700698 goto out;
David S. Millerf11e6652007-03-24 20:36:25 -0700699 }
700
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200701 if (strict & RT6_LOOKUP_F_REACHABLE)
702 rt6_probe(rt);
703
Jiri Benc7e980562013-12-11 13:48:20 +0100704 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200705 if (m > *mpri) {
706 *do_rr = match_do_rr;
707 *mpri = m;
708 match = rt;
709 }
David S. Millerf11e6652007-03-24 20:36:25 -0700710out:
711 return match;
712}
713
714static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
Wei Wang8d1040e2017-10-06 12:06:08 -0700715 struct rt6_info *leaf,
David S. Millerf11e6652007-03-24 20:36:25 -0700716 struct rt6_info *rr_head,
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200717 u32 metric, int oif, int strict,
718 bool *do_rr)
David S. Millerf11e6652007-03-24 20:36:25 -0700719{
Steffen Klassert9fbdcfa2015-04-28 13:03:04 -0700720 struct rt6_info *rt, *match, *cont;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800721 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722
David S. Millerf11e6652007-03-24 20:36:25 -0700723 match = NULL;
Steffen Klassert9fbdcfa2015-04-28 13:03:04 -0700724 cont = NULL;
725 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
726 if (rt->rt6i_metric != metric) {
727 cont = rt;
728 break;
729 }
730
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200731 match = find_match(rt, oif, strict, &mpri, match, do_rr);
Steffen Klassert9fbdcfa2015-04-28 13:03:04 -0700732 }
733
Wei Wang8d1040e2017-10-06 12:06:08 -0700734 for (rt = leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
Steffen Klassert9fbdcfa2015-04-28 13:03:04 -0700735 if (rt->rt6i_metric != metric) {
736 cont = rt;
737 break;
738 }
739
740 match = find_match(rt, oif, strict, &mpri, match, do_rr);
741 }
742
743 if (match || !cont)
744 return match;
745
746 for (rt = cont; rt; rt = rt->dst.rt6_next)
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200747 match = find_match(rt, oif, strict, &mpri, match, do_rr);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800748
David S. Millerf11e6652007-03-24 20:36:25 -0700749 return match;
750}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800751
Wei Wang8d1040e2017-10-06 12:06:08 -0700752static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
753 int oif, int strict)
David S. Millerf11e6652007-03-24 20:36:25 -0700754{
Wei Wang8d1040e2017-10-06 12:06:08 -0700755 struct rt6_info *leaf = fn->leaf;
David S. Millerf11e6652007-03-24 20:36:25 -0700756 struct rt6_info *match, *rt0;
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200757 bool do_rr = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758
Wei Wang8d1040e2017-10-06 12:06:08 -0700759 if (!leaf)
760 return net->ipv6.ip6_null_entry;
761
David S. Millerf11e6652007-03-24 20:36:25 -0700762 rt0 = fn->rr_ptr;
763 if (!rt0)
Wei Wang8d1040e2017-10-06 12:06:08 -0700764 fn->rr_ptr = rt0 = leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765
Wei Wang8d1040e2017-10-06 12:06:08 -0700766 match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict,
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200767 &do_rr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200769 if (do_rr) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700770 struct rt6_info *next = rt0->dst.rt6_next;
David S. Millerf11e6652007-03-24 20:36:25 -0700771
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800772 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700773 if (!next || next->rt6i_metric != rt0->rt6i_metric)
Wei Wang8d1040e2017-10-06 12:06:08 -0700774 next = leaf;
David S. Millerf11e6652007-03-24 20:36:25 -0700775
776 if (next != rt0)
777 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700778 }
779
Eric Dumazeta02cec22010-09-22 20:43:57 +0000780 return match ? match : net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781}
782
Martin KaFai Lau8b9df262015-05-22 20:55:59 -0700783static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
784{
785 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
786}
787
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800788#ifdef CONFIG_IPV6_ROUTE_INFO
789int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000790 const struct in6_addr *gwaddr)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800791{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900792 struct net *net = dev_net(dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800793 struct route_info *rinfo = (struct route_info *) opt;
794 struct in6_addr prefix_buf, *prefix;
795 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900796 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800797 struct rt6_info *rt;
798
799 if (len < sizeof(struct route_info)) {
800 return -EINVAL;
801 }
802
803 /* Sanity check for prefix_len and length */
804 if (rinfo->length > 3) {
805 return -EINVAL;
806 } else if (rinfo->prefix_len > 128) {
807 return -EINVAL;
808 } else if (rinfo->prefix_len > 64) {
809 if (rinfo->length < 2) {
810 return -EINVAL;
811 }
812 } else if (rinfo->prefix_len > 0) {
813 if (rinfo->length < 1) {
814 return -EINVAL;
815 }
816 }
817
818 pref = rinfo->route_pref;
819 if (pref == ICMPV6_ROUTER_PREF_INVALID)
Jens Rosenboom3933fc92009-09-10 06:25:11 +0000820 return -EINVAL;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800821
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900822 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800823
824 if (rinfo->length == 3)
825 prefix = (struct in6_addr *)rinfo->prefix;
826 else {
827 /* this function is safe */
828 ipv6_addr_prefix(&prefix_buf,
829 (struct in6_addr *)rinfo->prefix,
830 rinfo->prefix_len);
831 prefix = &prefix_buf;
832 }
833
Duan Jiongf104a562013-11-08 09:56:53 +0800834 if (rinfo->prefix_len == 0)
835 rt = rt6_get_dflt_router(gwaddr, dev);
836 else
837 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
David Ahern830218c2016-10-24 10:52:35 -0700838 gwaddr, dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800839
840 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700841 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800842 rt = NULL;
843 }
844
845 if (!rt && lifetime)
David Ahern830218c2016-10-24 10:52:35 -0700846 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
847 dev, pref);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800848 else if (rt)
849 rt->rt6i_flags = RTF_ROUTEINFO |
850 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
851
852 if (rt) {
Gao feng1716a962012-04-06 00:13:10 +0000853 if (!addrconf_finite_timeout(lifetime))
854 rt6_clean_expires(rt);
855 else
856 rt6_set_expires(rt, jiffies + HZ * lifetime);
857
Amerigo Wang94e187c2012-10-29 00:13:19 +0000858 ip6_rt_put(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800859 }
860 return 0;
861}
862#endif
863
Martin KaFai Laua3c00e42014-10-20 13:42:43 -0700864static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
865 struct in6_addr *saddr)
866{
867 struct fib6_node *pn;
868 while (1) {
869 if (fn->fn_flags & RTN_TL_ROOT)
870 return NULL;
871 pn = fn->parent;
872 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
873 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
874 else
875 fn = pn;
876 if (fn->fn_flags & RTN_RTINFO)
877 return fn;
878 }
879}
Thomas Grafc71099a2006-08-04 23:20:06 -0700880
Wei Wangd3843fe2017-10-06 12:06:06 -0700881static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
882 bool null_fallback)
883{
884 struct rt6_info *rt = *prt;
885
886 if (dst_hold_safe(&rt->dst))
887 return true;
888 if (null_fallback) {
889 rt = net->ipv6.ip6_null_entry;
890 dst_hold(&rt->dst);
891 } else {
892 rt = NULL;
893 }
894 *prt = rt;
895 return false;
896}
897
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800898static struct rt6_info *ip6_pol_route_lookup(struct net *net,
899 struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500900 struct flowi6 *fl6, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700901{
Wei Wang2b760fc2017-10-06 12:06:03 -0700902 struct rt6_info *rt, *rt_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700903 struct fib6_node *fn;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904
Thomas Grafc71099a2006-08-04 23:20:06 -0700905 read_lock_bh(&table->tb6_lock);
David S. Miller4c9483b2011-03-12 16:22:43 -0500906 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Thomas Grafc71099a2006-08-04 23:20:06 -0700907restart:
908 rt = fn->leaf;
David S. Miller4c9483b2011-03-12 16:22:43 -0500909 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +0000910 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
Nicolas Dichtel52bd4c02013-06-28 17:35:48 +0200911 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
Martin KaFai Laua3c00e42014-10-20 13:42:43 -0700912 if (rt == net->ipv6.ip6_null_entry) {
913 fn = fib6_backtrack(fn, &fl6->saddr);
914 if (fn)
915 goto restart;
916 }
Wei Wang2b760fc2017-10-06 12:06:03 -0700917 /* Search through exception table */
918 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
919 if (rt_cache)
920 rt = rt_cache;
921
Wei Wangd3843fe2017-10-06 12:06:06 -0700922 if (ip6_hold_safe(net, &rt, true))
923 dst_use_noref(&rt->dst, jiffies);
924
Thomas Grafc71099a2006-08-04 23:20:06 -0700925 read_unlock_bh(&table->tb6_lock);
David Ahernb8115802015-11-19 12:24:22 -0800926
927 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
928
Thomas Grafc71099a2006-08-04 23:20:06 -0700929 return rt;
930
931}
932
Ian Morris67ba4152014-08-24 21:53:10 +0100933struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
Florian Westphalea6e5742011-09-05 16:05:44 +0200934 int flags)
935{
936 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
937}
938EXPORT_SYMBOL_GPL(ip6_route_lookup);
939
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900940struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
941 const struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700942{
David S. Miller4c9483b2011-03-12 16:22:43 -0500943 struct flowi6 fl6 = {
944 .flowi6_oif = oif,
945 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700946 };
947 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700948 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700949
Thomas Grafadaa70b2006-10-13 15:01:03 -0700950 if (saddr) {
David S. Miller4c9483b2011-03-12 16:22:43 -0500951 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
Thomas Grafadaa70b2006-10-13 15:01:03 -0700952 flags |= RT6_LOOKUP_F_HAS_SADDR;
953 }
954
David S. Miller4c9483b2011-03-12 16:22:43 -0500955 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700956 if (dst->error == 0)
957 return (struct rt6_info *) dst;
958
959 dst_release(dst);
960
Linus Torvalds1da177e2005-04-16 15:20:36 -0700961 return NULL;
962}
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900963EXPORT_SYMBOL(rt6_lookup);
964
Thomas Grafc71099a2006-08-04 23:20:06 -0700965/* ip6_ins_rt is called with FREE table->tb6_lock.
Wei Wang1cfb71e2017-06-17 10:42:33 -0700966 * It takes new route entry, the addition fails by any reason the
967 * route is released.
968 * Caller must hold dst before calling it.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969 */
970
Michal Kubečeke5fd3872014-03-27 13:04:08 +0100971static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
David Ahern333c4302017-05-21 10:12:04 -0600972 struct mx6_config *mxc,
973 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700974{
975 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700976 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700977
Thomas Grafc71099a2006-08-04 23:20:06 -0700978 table = rt->rt6i_table;
979 write_lock_bh(&table->tb6_lock);
David Ahern333c4302017-05-21 10:12:04 -0600980 err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
Thomas Grafc71099a2006-08-04 23:20:06 -0700981 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700982
983 return err;
984}
985
Thomas Graf40e22e82006-08-22 00:00:45 -0700986int ip6_ins_rt(struct rt6_info *rt)
987{
Florian Westphale715b6d2015-01-05 23:57:44 +0100988 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
989 struct mx6_config mxc = { .mx = NULL, };
990
Wei Wang1cfb71e2017-06-17 10:42:33 -0700991 /* Hold dst to account for the reference from the fib6 tree */
992 dst_hold(&rt->dst);
David Ahern333c4302017-05-21 10:12:04 -0600993 return __ip6_ins_rt(rt, &info, &mxc, NULL);
Thomas Graf40e22e82006-08-22 00:00:45 -0700994}
995
David Ahern4832c302017-08-17 12:17:20 -0700996/* called with rcu_lock held */
997static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
998{
999 struct net_device *dev = rt->dst.dev;
1000
1001 if (rt->rt6i_flags & RTF_LOCAL) {
1002 /* for copies of local routes, dst->dev needs to be the
1003 * device if it is a master device, the master device if
1004 * device is enslaved, and the loopback as the default
1005 */
1006 if (netif_is_l3_slave(dev) &&
1007 !rt6_need_strict(&rt->rt6i_dst.addr))
1008 dev = l3mdev_master_dev_rcu(dev);
1009 else if (!netif_is_l3_master(dev))
1010 dev = dev_net(dev)->loopback_dev;
1011 /* last case is netif_is_l3_master(dev) is true in which
1012 * case we want dev returned to be dev
1013 */
1014 }
1015
1016 return dev;
1017}
1018
Martin KaFai Lau8b9df262015-05-22 20:55:59 -07001019static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
1020 const struct in6_addr *daddr,
1021 const struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022{
David Ahern4832c302017-08-17 12:17:20 -07001023 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024 struct rt6_info *rt;
1025
1026 /*
1027 * Clone the route.
1028 */
1029
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001030 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
Martin KaFai Lau83a09ab2015-05-22 20:56:05 -07001031 ort = (struct rt6_info *)ort->dst.from;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032
David Ahern4832c302017-08-17 12:17:20 -07001033 rcu_read_lock();
1034 dev = ip6_rt_get_dev_rcu(ort);
1035 rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
1036 rcu_read_unlock();
Martin KaFai Lau83a09ab2015-05-22 20:56:05 -07001037 if (!rt)
1038 return NULL;
1039
1040 ip6_rt_copy_init(rt, ort);
1041 rt->rt6i_flags |= RTF_CACHE;
1042 rt->rt6i_metric = 0;
1043 rt->dst.flags |= DST_HOST;
1044 rt->rt6i_dst.addr = *daddr;
1045 rt->rt6i_dst.plen = 128;
1046
1047 if (!rt6_is_gw_or_nonexthop(ort)) {
1048 if (ort->rt6i_dst.plen != 128 &&
1049 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
1050 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001051#ifdef CONFIG_IPV6_SUBTREES
Martin KaFai Lau83a09ab2015-05-22 20:56:05 -07001052 if (rt->rt6i_src.plen && saddr) {
1053 rt->rt6i_src.addr = *saddr;
1054 rt->rt6i_src.plen = 128;
Martin KaFai Lau8b9df262015-05-22 20:55:59 -07001055 }
Martin KaFai Lau83a09ab2015-05-22 20:56:05 -07001056#endif
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -08001057 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001058
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -08001059 return rt;
1060}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001062static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1063{
David Ahern4832c302017-08-17 12:17:20 -07001064 struct net_device *dev;
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001065 struct rt6_info *pcpu_rt;
1066
David Ahern4832c302017-08-17 12:17:20 -07001067 rcu_read_lock();
1068 dev = ip6_rt_get_dev_rcu(rt);
1069 pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
1070 rcu_read_unlock();
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001071 if (!pcpu_rt)
1072 return NULL;
1073 ip6_rt_copy_init(pcpu_rt, rt);
1074 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1075 pcpu_rt->rt6i_flags |= RTF_PCPU;
1076 return pcpu_rt;
1077}
1078
1079/* It should be called with read_lock_bh(&tb6_lock) acquired */
1080static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1081{
Martin KaFai Laua73e4192015-08-14 11:05:53 -07001082 struct rt6_info *pcpu_rt, **p;
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001083
1084 p = this_cpu_ptr(rt->rt6i_pcpu);
1085 pcpu_rt = *p;
1086
Wei Wangd3843fe2017-10-06 12:06:06 -07001087 if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false))
Martin KaFai Laua73e4192015-08-14 11:05:53 -07001088 rt6_dst_from_metrics_check(pcpu_rt);
Wei Wangd3843fe2017-10-06 12:06:06 -07001089
Martin KaFai Laua73e4192015-08-14 11:05:53 -07001090 return pcpu_rt;
1091}
1092
1093static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1094{
1095 struct rt6_info *pcpu_rt, *prev, **p;
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001096
1097 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1098 if (!pcpu_rt) {
1099 struct net *net = dev_net(rt->dst.dev);
1100
Martin KaFai Lau9c7370a2015-08-14 11:05:54 -07001101 dst_hold(&net->ipv6.ip6_null_entry->dst);
1102 return net->ipv6.ip6_null_entry;
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001103 }
1104
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001105 dst_hold(&pcpu_rt->dst);
Wei Wanga94b9362017-10-06 12:06:04 -07001106 p = this_cpu_ptr(rt->rt6i_pcpu);
1107 prev = cmpxchg(p, NULL, pcpu_rt);
1108 if (prev) {
1109 /* If someone did it before us, return prev instead */
1110 /* release refcnt taken by ip6_rt_pcpu_alloc() */
1111 dst_release_immediate(&pcpu_rt->dst);
1112 /* release refcnt taken by above dst_hold() */
1113 dst_release_immediate(&pcpu_rt->dst);
1114 dst_hold(&prev->dst);
1115 pcpu_rt = prev;
1116 }
1117
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001118 rt6_dst_from_metrics_check(pcpu_rt);
1119 return pcpu_rt;
1120}
1121
Wei Wang35732d02017-10-06 12:05:57 -07001122/* exception hash table implementation
1123 */
1124static DEFINE_SPINLOCK(rt6_exception_lock);
1125
1126/* Remove rt6_ex from hash table and free the memory
1127 * Caller must hold rt6_exception_lock
1128 */
1129static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1130 struct rt6_exception *rt6_ex)
1131{
1132 if (!bucket || !rt6_ex)
1133 return;
1134 rt6_ex->rt6i->rt6i_node = NULL;
1135 hlist_del_rcu(&rt6_ex->hlist);
1136 rt6_release(rt6_ex->rt6i);
1137 kfree_rcu(rt6_ex, rcu);
1138 WARN_ON_ONCE(!bucket->depth);
1139 bucket->depth--;
1140}
1141
1142/* Remove oldest rt6_ex in bucket and free the memory
1143 * Caller must hold rt6_exception_lock
1144 */
1145static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1146{
1147 struct rt6_exception *rt6_ex, *oldest = NULL;
1148
1149 if (!bucket)
1150 return;
1151
1152 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1153 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1154 oldest = rt6_ex;
1155 }
1156 rt6_remove_exception(bucket, oldest);
1157}
1158
1159static u32 rt6_exception_hash(const struct in6_addr *dst,
1160 const struct in6_addr *src)
1161{
1162 static u32 seed __read_mostly;
1163 u32 val;
1164
1165 net_get_random_once(&seed, sizeof(seed));
1166 val = jhash(dst, sizeof(*dst), seed);
1167
1168#ifdef CONFIG_IPV6_SUBTREES
1169 if (src)
1170 val = jhash(src, sizeof(*src), val);
1171#endif
1172 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1173}
1174
1175/* Helper function to find the cached rt in the hash table
1176 * and update bucket pointer to point to the bucket for this
1177 * (daddr, saddr) pair
1178 * Caller must hold rt6_exception_lock
1179 */
1180static struct rt6_exception *
1181__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1182 const struct in6_addr *daddr,
1183 const struct in6_addr *saddr)
1184{
1185 struct rt6_exception *rt6_ex;
1186 u32 hval;
1187
1188 if (!(*bucket) || !daddr)
1189 return NULL;
1190
1191 hval = rt6_exception_hash(daddr, saddr);
1192 *bucket += hval;
1193
1194 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1195 struct rt6_info *rt6 = rt6_ex->rt6i;
1196 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1197
1198#ifdef CONFIG_IPV6_SUBTREES
1199 if (matched && saddr)
1200 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1201#endif
1202 if (matched)
1203 return rt6_ex;
1204 }
1205 return NULL;
1206}
1207
1208/* Helper function to find the cached rt in the hash table
1209 * and update bucket pointer to point to the bucket for this
1210 * (daddr, saddr) pair
1211 * Caller must hold rcu_read_lock()
1212 */
1213static struct rt6_exception *
1214__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1215 const struct in6_addr *daddr,
1216 const struct in6_addr *saddr)
1217{
1218 struct rt6_exception *rt6_ex;
1219 u32 hval;
1220
1221 WARN_ON_ONCE(!rcu_read_lock_held());
1222
1223 if (!(*bucket) || !daddr)
1224 return NULL;
1225
1226 hval = rt6_exception_hash(daddr, saddr);
1227 *bucket += hval;
1228
1229 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1230 struct rt6_info *rt6 = rt6_ex->rt6i;
1231 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1232
1233#ifdef CONFIG_IPV6_SUBTREES
1234 if (matched && saddr)
1235 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1236#endif
1237 if (matched)
1238 return rt6_ex;
1239 }
1240 return NULL;
1241}
1242
1243static int rt6_insert_exception(struct rt6_info *nrt,
1244 struct rt6_info *ort)
1245{
1246 struct rt6_exception_bucket *bucket;
1247 struct in6_addr *src_key = NULL;
1248 struct rt6_exception *rt6_ex;
1249 int err = 0;
1250
1251 /* ort can't be a cache or pcpu route */
1252 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
1253 ort = (struct rt6_info *)ort->dst.from;
1254 WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
1255
1256 spin_lock_bh(&rt6_exception_lock);
1257
1258 if (ort->exception_bucket_flushed) {
1259 err = -EINVAL;
1260 goto out;
1261 }
1262
1263 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1264 lockdep_is_held(&rt6_exception_lock));
1265 if (!bucket) {
1266 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1267 GFP_ATOMIC);
1268 if (!bucket) {
1269 err = -ENOMEM;
1270 goto out;
1271 }
1272 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1273 }
1274
1275#ifdef CONFIG_IPV6_SUBTREES
1276 /* rt6i_src.plen != 0 indicates ort is in subtree
1277 * and exception table is indexed by a hash of
1278 * both rt6i_dst and rt6i_src.
1279 * Otherwise, the exception table is indexed by
1280 * a hash of only rt6i_dst.
1281 */
1282 if (ort->rt6i_src.plen)
1283 src_key = &nrt->rt6i_src.addr;
1284#endif
Wei Wang60006a42017-10-06 12:05:58 -07001285
1286 /* Update rt6i_prefsrc as it could be changed
1287 * in rt6_remove_prefsrc()
1288 */
1289 nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
Wei Wangf5bbe7e2017-10-06 12:05:59 -07001290 /* rt6_mtu_change() might lower mtu on ort.
1291 * Only insert this exception route if its mtu
1292 * is less than ort's mtu value.
1293 */
1294 if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) {
1295 err = -EINVAL;
1296 goto out;
1297 }
Wei Wang60006a42017-10-06 12:05:58 -07001298
Wei Wang35732d02017-10-06 12:05:57 -07001299 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1300 src_key);
1301 if (rt6_ex)
1302 rt6_remove_exception(bucket, rt6_ex);
1303
1304 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1305 if (!rt6_ex) {
1306 err = -ENOMEM;
1307 goto out;
1308 }
1309 rt6_ex->rt6i = nrt;
1310 rt6_ex->stamp = jiffies;
1311 atomic_inc(&nrt->rt6i_ref);
1312 nrt->rt6i_node = ort->rt6i_node;
1313 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1314 bucket->depth++;
1315
1316 if (bucket->depth > FIB6_MAX_DEPTH)
1317 rt6_exception_remove_oldest(bucket);
1318
1319out:
1320 spin_unlock_bh(&rt6_exception_lock);
1321
1322 /* Update fn->fn_sernum to invalidate all cached dst */
1323 if (!err)
1324 fib6_update_sernum(ort);
1325
1326 return err;
1327}
1328
1329void rt6_flush_exceptions(struct rt6_info *rt)
1330{
1331 struct rt6_exception_bucket *bucket;
1332 struct rt6_exception *rt6_ex;
1333 struct hlist_node *tmp;
1334 int i;
1335
1336 spin_lock_bh(&rt6_exception_lock);
1337 /* Prevent rt6_insert_exception() to recreate the bucket list */
1338 rt->exception_bucket_flushed = 1;
1339
1340 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1341 lockdep_is_held(&rt6_exception_lock));
1342 if (!bucket)
1343 goto out;
1344
1345 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1346 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1347 rt6_remove_exception(bucket, rt6_ex);
1348 WARN_ON_ONCE(bucket->depth);
1349 bucket++;
1350 }
1351
1352out:
1353 spin_unlock_bh(&rt6_exception_lock);
1354}
1355
1356/* Find cached rt in the hash table inside passed in rt
1357 * Caller has to hold rcu_read_lock()
1358 */
1359static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
1360 struct in6_addr *daddr,
1361 struct in6_addr *saddr)
1362{
1363 struct rt6_exception_bucket *bucket;
1364 struct in6_addr *src_key = NULL;
1365 struct rt6_exception *rt6_ex;
1366 struct rt6_info *res = NULL;
1367
1368 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1369
1370#ifdef CONFIG_IPV6_SUBTREES
1371 /* rt6i_src.plen != 0 indicates rt is in subtree
1372 * and exception table is indexed by a hash of
1373 * both rt6i_dst and rt6i_src.
1374 * Otherwise, the exception table is indexed by
1375 * a hash of only rt6i_dst.
1376 */
1377 if (rt->rt6i_src.plen)
1378 src_key = saddr;
1379#endif
1380 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1381
1382 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1383 res = rt6_ex->rt6i;
1384
1385 return res;
1386}
1387
1388/* Remove the passed in cached rt from the hash table that contains it */
1389int rt6_remove_exception_rt(struct rt6_info *rt)
1390{
1391 struct rt6_info *from = (struct rt6_info *)rt->dst.from;
1392 struct rt6_exception_bucket *bucket;
1393 struct in6_addr *src_key = NULL;
1394 struct rt6_exception *rt6_ex;
1395 int err;
1396
1397 if (!from ||
1398 !(rt->rt6i_flags | RTF_CACHE))
1399 return -EINVAL;
1400
1401 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1402 return -ENOENT;
1403
1404 spin_lock_bh(&rt6_exception_lock);
1405 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1406 lockdep_is_held(&rt6_exception_lock));
1407#ifdef CONFIG_IPV6_SUBTREES
1408 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1409 * and exception table is indexed by a hash of
1410 * both rt6i_dst and rt6i_src.
1411 * Otherwise, the exception table is indexed by
1412 * a hash of only rt6i_dst.
1413 */
1414 if (from->rt6i_src.plen)
1415 src_key = &rt->rt6i_src.addr;
1416#endif
1417 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1418 &rt->rt6i_dst.addr,
1419 src_key);
1420 if (rt6_ex) {
1421 rt6_remove_exception(bucket, rt6_ex);
1422 err = 0;
1423 } else {
1424 err = -ENOENT;
1425 }
1426
1427 spin_unlock_bh(&rt6_exception_lock);
1428 return err;
1429}
1430
1431/* Find rt6_ex which contains the passed in rt cache and
1432 * refresh its stamp
1433 */
1434static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1435{
1436 struct rt6_info *from = (struct rt6_info *)rt->dst.from;
1437 struct rt6_exception_bucket *bucket;
1438 struct in6_addr *src_key = NULL;
1439 struct rt6_exception *rt6_ex;
1440
1441 if (!from ||
1442 !(rt->rt6i_flags | RTF_CACHE))
1443 return;
1444
1445 rcu_read_lock();
1446 bucket = rcu_dereference(from->rt6i_exception_bucket);
1447
1448#ifdef CONFIG_IPV6_SUBTREES
1449 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1450 * and exception table is indexed by a hash of
1451 * both rt6i_dst and rt6i_src.
1452 * Otherwise, the exception table is indexed by
1453 * a hash of only rt6i_dst.
1454 */
1455 if (from->rt6i_src.plen)
1456 src_key = &rt->rt6i_src.addr;
1457#endif
1458 rt6_ex = __rt6_find_exception_rcu(&bucket,
1459 &rt->rt6i_dst.addr,
1460 src_key);
1461 if (rt6_ex)
1462 rt6_ex->stamp = jiffies;
1463
1464 rcu_read_unlock();
1465}
1466
Wei Wang60006a42017-10-06 12:05:58 -07001467static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
1468{
1469 struct rt6_exception_bucket *bucket;
1470 struct rt6_exception *rt6_ex;
1471 int i;
1472
1473 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1474 lockdep_is_held(&rt6_exception_lock));
1475
1476 if (bucket) {
1477 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1478 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1479 rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1480 }
1481 bucket++;
1482 }
1483 }
1484}
1485
Wei Wangf5bbe7e2017-10-06 12:05:59 -07001486static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu)
1487{
1488 struct rt6_exception_bucket *bucket;
1489 struct rt6_exception *rt6_ex;
1490 int i;
1491
1492 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1493 lockdep_is_held(&rt6_exception_lock));
1494
1495 if (bucket) {
1496 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1497 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1498 struct rt6_info *entry = rt6_ex->rt6i;
1499 /* For RTF_CACHE with rt6i_pmtu == 0
1500 * (i.e. a redirected route),
1501 * the metrics of its rt->dst.from has already
1502 * been updated.
1503 */
1504 if (entry->rt6i_pmtu && entry->rt6i_pmtu > mtu)
1505 entry->rt6i_pmtu = mtu;
1506 }
1507 bucket++;
1508 }
1509 }
1510}
1511
Wei Wangb16cb452017-10-06 12:06:00 -07001512#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1513
1514static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
1515 struct in6_addr *gateway)
1516{
1517 struct rt6_exception_bucket *bucket;
1518 struct rt6_exception *rt6_ex;
1519 struct hlist_node *tmp;
1520 int i;
1521
1522 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1523 return;
1524
1525 spin_lock_bh(&rt6_exception_lock);
1526 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1527 lockdep_is_held(&rt6_exception_lock));
1528
1529 if (bucket) {
1530 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1531 hlist_for_each_entry_safe(rt6_ex, tmp,
1532 &bucket->chain, hlist) {
1533 struct rt6_info *entry = rt6_ex->rt6i;
1534
1535 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1536 RTF_CACHE_GATEWAY &&
1537 ipv6_addr_equal(gateway,
1538 &entry->rt6i_gateway)) {
1539 rt6_remove_exception(bucket, rt6_ex);
1540 }
1541 }
1542 bucket++;
1543 }
1544 }
1545
1546 spin_unlock_bh(&rt6_exception_lock);
1547}
1548
Wei Wangc757faa2017-10-06 12:06:01 -07001549static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1550 struct rt6_exception *rt6_ex,
1551 struct fib6_gc_args *gc_args,
1552 unsigned long now)
1553{
1554 struct rt6_info *rt = rt6_ex->rt6i;
1555
1556 if (atomic_read(&rt->dst.__refcnt) == 1 &&
1557 time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1558 RT6_TRACE("aging clone %p\n", rt);
1559 rt6_remove_exception(bucket, rt6_ex);
1560 return;
1561 } else if (rt->rt6i_flags & RTF_GATEWAY) {
1562 struct neighbour *neigh;
1563 __u8 neigh_flags = 0;
1564
1565 neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
1566 if (neigh) {
1567 neigh_flags = neigh->flags;
1568 neigh_release(neigh);
1569 }
1570 if (!(neigh_flags & NTF_ROUTER)) {
1571 RT6_TRACE("purging route %p via non-router but gateway\n",
1572 rt);
1573 rt6_remove_exception(bucket, rt6_ex);
1574 return;
1575 }
1576 }
1577 gc_args->more++;
1578}
1579
1580void rt6_age_exceptions(struct rt6_info *rt,
1581 struct fib6_gc_args *gc_args,
1582 unsigned long now)
1583{
1584 struct rt6_exception_bucket *bucket;
1585 struct rt6_exception *rt6_ex;
1586 struct hlist_node *tmp;
1587 int i;
1588
1589 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1590 return;
1591
1592 spin_lock_bh(&rt6_exception_lock);
1593 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1594 lockdep_is_held(&rt6_exception_lock));
1595
1596 if (bucket) {
1597 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1598 hlist_for_each_entry_safe(rt6_ex, tmp,
1599 &bucket->chain, hlist) {
1600 rt6_age_examine_exception(bucket, rt6_ex,
1601 gc_args, now);
1602 }
1603 bucket++;
1604 }
1605 }
1606 spin_unlock_bh(&rt6_exception_lock);
1607}
1608
David Ahern9ff74382016-06-13 13:44:19 -07001609struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1610 int oif, struct flowi6 *fl6, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001611{
Martin KaFai Lau367efcb2014-10-20 13:42:45 -07001612 struct fib6_node *fn, *saved_fn;
Wei Wang2b760fc2017-10-06 12:06:03 -07001613 struct rt6_info *rt, *rt_cache;
Thomas Grafc71099a2006-08-04 23:20:06 -07001614 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001615
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -07001616 strict |= flags & RT6_LOOKUP_F_IFACE;
David Ahernd5d32e42016-10-24 12:27:23 -07001617 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
Martin KaFai Lau367efcb2014-10-20 13:42:45 -07001618 if (net->ipv6.devconf_all->forwarding == 0)
1619 strict |= RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001620
Thomas Grafc71099a2006-08-04 23:20:06 -07001621 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001622
David S. Miller4c9483b2011-03-12 16:22:43 -05001623 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Martin KaFai Lau367efcb2014-10-20 13:42:45 -07001624 saved_fn = fn;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001625
David Ahernca254492015-10-12 11:47:10 -07001626 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1627 oif = 0;
1628
Martin KaFai Laua3c00e42014-10-20 13:42:43 -07001629redo_rt6_select:
Wei Wang8d1040e2017-10-06 12:06:08 -07001630 rt = rt6_select(net, fn, oif, strict);
Nicolas Dichtel52bd4c02013-06-28 17:35:48 +02001631 if (rt->rt6i_nsiblings)
Martin KaFai Lau367efcb2014-10-20 13:42:45 -07001632 rt = rt6_multipath_select(rt, fl6, oif, strict);
Martin KaFai Laua3c00e42014-10-20 13:42:43 -07001633 if (rt == net->ipv6.ip6_null_entry) {
1634 fn = fib6_backtrack(fn, &fl6->saddr);
1635 if (fn)
1636 goto redo_rt6_select;
Martin KaFai Lau367efcb2014-10-20 13:42:45 -07001637 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1638 /* also consider unreachable route */
1639 strict &= ~RT6_LOOKUP_F_REACHABLE;
1640 fn = saved_fn;
1641 goto redo_rt6_select;
Martin KaFai Lau367efcb2014-10-20 13:42:45 -07001642 }
Martin KaFai Laua3c00e42014-10-20 13:42:43 -07001643 }
1644
Wei Wang2b760fc2017-10-06 12:06:03 -07001645 /*Search through exception table */
1646 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
1647 if (rt_cache)
1648 rt = rt_cache;
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -08001649
Wei Wangd3843fe2017-10-06 12:06:06 -07001650 if (rt == net->ipv6.ip6_null_entry) {
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001651 read_unlock_bh(&table->tb6_lock);
Wei Wangd3843fe2017-10-06 12:06:06 -07001652 dst_hold(&rt->dst);
1653 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1654 return rt;
1655 } else if (rt->rt6i_flags & RTF_CACHE) {
1656 if (ip6_hold_safe(net, &rt, true)) {
1657 dst_use_noref(&rt->dst, jiffies);
1658 rt6_dst_from_metrics_check(rt);
1659 }
1660 read_unlock_bh(&table->tb6_lock);
David Ahernb8115802015-11-19 12:24:22 -08001661 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001662 return rt;
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001663 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1664 !(rt->rt6i_flags & RTF_GATEWAY))) {
1665 /* Create a RTF_CACHE clone which will not be
1666 * owned by the fib6 tree. It is for the special case where
1667 * the daddr in the skb during the neighbor look-up is different
1668 * from the fl6->daddr used to look-up route here.
1669 */
Thomas Grafc71099a2006-08-04 23:20:06 -07001670
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001671 struct rt6_info *uncached_rt;
1672
Wei Wangd3843fe2017-10-06 12:06:06 -07001673 if (ip6_hold_safe(net, &rt, true)) {
1674 dst_use_noref(&rt->dst, jiffies);
1675 } else {
1676 read_unlock_bh(&table->tb6_lock);
1677 uncached_rt = rt;
1678 goto uncached_rt_out;
1679 }
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001680 read_unlock_bh(&table->tb6_lock);
1681
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001682 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1683 dst_release(&rt->dst);
1684
Wei Wang1cfb71e2017-06-17 10:42:33 -07001685 if (uncached_rt) {
1686 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1687 * No need for another dst_hold()
1688 */
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -07001689 rt6_uncached_list_add(uncached_rt);
Wei Wang1cfb71e2017-06-17 10:42:33 -07001690 } else {
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001691 uncached_rt = net->ipv6.ip6_null_entry;
Wei Wang1cfb71e2017-06-17 10:42:33 -07001692 dst_hold(&uncached_rt->dst);
1693 }
David Ahernb8115802015-11-19 12:24:22 -08001694
Wei Wangd3843fe2017-10-06 12:06:06 -07001695uncached_rt_out:
David Ahernb8115802015-11-19 12:24:22 -08001696 trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001697 return uncached_rt;
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001698
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001699 } else {
1700 /* Get a percpu copy */
1701
1702 struct rt6_info *pcpu_rt;
1703
Wei Wangd3843fe2017-10-06 12:06:06 -07001704 dst_use_noref(&rt->dst, jiffies);
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001705 pcpu_rt = rt6_get_pcpu_route(rt);
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001706
Martin KaFai Lau9c7370a2015-08-14 11:05:54 -07001707 if (pcpu_rt) {
1708 read_unlock_bh(&table->tb6_lock);
1709 } else {
Wei Wanga94b9362017-10-06 12:06:04 -07001710 /* atomic_inc_not_zero() is needed when using rcu */
1711 if (atomic_inc_not_zero(&rt->rt6i_ref)) {
1712 /* We have to do the read_unlock first
1713 * because rt6_make_pcpu_route() may trigger
1714 * ip6_dst_gc() which will take the write_lock.
1715 *
1716 * No dst_hold() on rt is needed because grabbing
1717 * rt->rt6i_ref makes sure rt can't be released.
1718 */
1719 read_unlock_bh(&table->tb6_lock);
1720 pcpu_rt = rt6_make_pcpu_route(rt);
1721 rt6_release(rt);
1722 } else {
1723 /* rt is already removed from tree */
1724 read_unlock_bh(&table->tb6_lock);
1725 pcpu_rt = net->ipv6.ip6_null_entry;
1726 dst_hold(&pcpu_rt->dst);
1727 }
Martin KaFai Lau9c7370a2015-08-14 11:05:54 -07001728 }
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001729
David Ahernb8115802015-11-19 12:24:22 -08001730 trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001731 return pcpu_rt;
1732 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001733}
David Ahern9ff74382016-06-13 13:44:19 -07001734EXPORT_SYMBOL_GPL(ip6_pol_route);
Thomas Grafc71099a2006-08-04 23:20:06 -07001735
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001736static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -05001737 struct flowi6 *fl6, int flags)
Pavel Emelyanov4acad722007-10-15 13:02:51 -07001738{
David S. Miller4c9483b2011-03-12 16:22:43 -05001739 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -07001740}
1741
Mahesh Bandeward409b842016-09-16 12:59:08 -07001742struct dst_entry *ip6_route_input_lookup(struct net *net,
1743 struct net_device *dev,
1744 struct flowi6 *fl6, int flags)
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00001745{
1746 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1747 flags |= RT6_LOOKUP_F_IFACE;
1748
1749 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1750}
Mahesh Bandeward409b842016-09-16 12:59:08 -07001751EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00001752
Jakub Sitnicki23aebda2017-08-23 09:58:29 +02001753static void ip6_multipath_l3_keys(const struct sk_buff *skb,
1754 struct flow_keys *keys)
1755{
1756 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1757 const struct ipv6hdr *key_iph = outer_iph;
1758 const struct ipv6hdr *inner_iph;
1759 const struct icmp6hdr *icmph;
1760 struct ipv6hdr _inner_iph;
1761
1762 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1763 goto out;
1764
1765 icmph = icmp6_hdr(skb);
1766 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1767 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1768 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1769 icmph->icmp6_type != ICMPV6_PARAMPROB)
1770 goto out;
1771
1772 inner_iph = skb_header_pointer(skb,
1773 skb_transport_offset(skb) + sizeof(*icmph),
1774 sizeof(_inner_iph), &_inner_iph);
1775 if (!inner_iph)
1776 goto out;
1777
1778 key_iph = inner_iph;
1779out:
1780 memset(keys, 0, sizeof(*keys));
1781 keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1782 keys->addrs.v6addrs.src = key_iph->saddr;
1783 keys->addrs.v6addrs.dst = key_iph->daddr;
1784 keys->tags.flow_label = ip6_flowinfo(key_iph);
1785 keys->basic.ip_proto = key_iph->nexthdr;
1786}
1787
1788/* if skb is set it will be used and fl6 can be NULL */
1789u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
1790{
1791 struct flow_keys hash_keys;
1792
1793 if (skb) {
1794 ip6_multipath_l3_keys(skb, &hash_keys);
1795 return flow_hash_from_keys(&hash_keys);
1796 }
1797
1798 return get_hash_from_flowi6(fl6);
1799}
1800
Thomas Grafc71099a2006-08-04 23:20:06 -07001801void ip6_route_input(struct sk_buff *skb)
1802{
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001803 const struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001804 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -07001805 int flags = RT6_LOOKUP_F_HAS_SADDR;
Jiri Benc904af042015-08-20 13:56:31 +02001806 struct ip_tunnel_info *tun_info;
David S. Miller4c9483b2011-03-12 16:22:43 -05001807 struct flowi6 fl6 = {
David Aherne0d56fd2016-09-10 12:09:57 -07001808 .flowi6_iif = skb->dev->ifindex,
David S. Miller4c9483b2011-03-12 16:22:43 -05001809 .daddr = iph->daddr,
1810 .saddr = iph->saddr,
YOSHIFUJI Hideaki / 吉藤英明6502ca52013-01-13 05:01:51 +00001811 .flowlabel = ip6_flowinfo(iph),
David S. Miller4c9483b2011-03-12 16:22:43 -05001812 .flowi6_mark = skb->mark,
1813 .flowi6_proto = iph->nexthdr,
Thomas Grafc71099a2006-08-04 23:20:06 -07001814 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001815
Jiri Benc904af042015-08-20 13:56:31 +02001816 tun_info = skb_tunnel_info(skb);
Jiri Benc46fa0622015-08-28 20:48:19 +02001817 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
Jiri Benc904af042015-08-20 13:56:31 +02001818 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
Jakub Sitnicki23aebda2017-08-23 09:58:29 +02001819 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
1820 fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
Jiri Benc06e9d042015-08-20 13:56:26 +02001821 skb_dst_drop(skb);
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00001822 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
Thomas Grafc71099a2006-08-04 23:20:06 -07001823}
1824
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001825static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -05001826 struct flowi6 *fl6, int flags)
Thomas Grafc71099a2006-08-04 23:20:06 -07001827{
David S. Miller4c9483b2011-03-12 16:22:43 -05001828 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -07001829}
1830
Paolo Abeni6f21c962016-01-29 12:30:19 +01001831struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1832 struct flowi6 *fl6, int flags)
Thomas Grafc71099a2006-08-04 23:20:06 -07001833{
David Ahernd46a9d62015-10-21 08:42:22 -07001834 bool any_src;
Thomas Grafc71099a2006-08-04 23:20:06 -07001835
David Ahern4c1feac2016-09-10 12:09:56 -07001836 if (rt6_need_strict(&fl6->daddr)) {
1837 struct dst_entry *dst;
1838
1839 dst = l3mdev_link_scope_lookup(net, fl6);
1840 if (dst)
1841 return dst;
1842 }
David Ahernca254492015-10-12 11:47:10 -07001843
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00001844 fl6->flowi6_iif = LOOPBACK_IFINDEX;
David McCullough4dc27d1c2012-06-25 15:42:26 +00001845
David Ahernd46a9d62015-10-21 08:42:22 -07001846 any_src = ipv6_addr_any(&fl6->saddr);
David Ahern741a11d2015-09-28 10:12:13 -07001847 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
David Ahernd46a9d62015-10-21 08:42:22 -07001848 (fl6->flowi6_oif && any_src))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -07001849 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -07001850
David Ahernd46a9d62015-10-21 08:42:22 -07001851 if (!any_src)
Thomas Grafadaa70b2006-10-13 15:01:03 -07001852 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki / 吉藤英明0c9a2ac2010-03-07 00:14:44 +00001853 else if (sk)
1854 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
Thomas Grafadaa70b2006-10-13 15:01:03 -07001855
David S. Miller4c9483b2011-03-12 16:22:43 -05001856 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001857}
Paolo Abeni6f21c962016-01-29 12:30:19 +01001858EXPORT_SYMBOL_GPL(ip6_route_output_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001859
David S. Miller2774c132011-03-01 14:59:04 -08001860struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -07001861{
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001862 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
Wei Wang1dbe32522017-06-17 10:42:26 -07001863 struct net_device *loopback_dev = net->loopback_dev;
David S. Miller14e50e52007-05-24 18:17:54 -07001864 struct dst_entry *new = NULL;
1865
Wei Wang1dbe32522017-06-17 10:42:26 -07001866 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
Wei Wangb2a9c0e2017-06-17 10:42:41 -07001867 DST_OBSOLETE_NONE, 0);
David S. Miller14e50e52007-05-24 18:17:54 -07001868 if (rt) {
Martin KaFai Lau0a1f5962015-10-15 16:39:58 -07001869 rt6_info_init(rt);
1870
Changli Gaod8d1f302010-06-10 23:31:35 -07001871 new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -07001872 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -08001873 new->input = dst_discard;
Eric W. Biedermanede20592015-10-07 16:48:47 -05001874 new->output = dst_discard_out;
David S. Miller14e50e52007-05-24 18:17:54 -07001875
Martin KaFai Lau0a1f5962015-10-15 16:39:58 -07001876 dst_copy_metrics(new, &ort->dst);
David S. Miller14e50e52007-05-24 18:17:54 -07001877
Wei Wang1dbe32522017-06-17 10:42:26 -07001878 rt->rt6i_idev = in6_dev_get(loopback_dev);
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001879 rt->rt6i_gateway = ort->rt6i_gateway;
Martin KaFai Lau0a1f5962015-10-15 16:39:58 -07001880 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
David S. Miller14e50e52007-05-24 18:17:54 -07001881 rt->rt6i_metric = 0;
1882
1883 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1884#ifdef CONFIG_IPV6_SUBTREES
1885 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1886#endif
David S. Miller14e50e52007-05-24 18:17:54 -07001887 }
1888
David S. Miller69ead7a2011-03-01 14:45:33 -08001889 dst_release(dst_orig);
1890 return new ? new : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -07001891}
David S. Miller14e50e52007-05-24 18:17:54 -07001892
Linus Torvalds1da177e2005-04-16 15:20:36 -07001893/*
1894 * Destination cache support functions
1895 */
1896
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -07001897static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1898{
1899 if (rt->dst.from &&
1900 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1901 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1902}
1903
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001904static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1905{
Steffen Klassert36143642017-08-25 09:05:42 +02001906 u32 rt_cookie = 0;
Wei Wangc5cff852017-08-21 09:47:10 -07001907
1908 if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001909 return NULL;
1910
1911 if (rt6_check_expired(rt))
1912 return NULL;
1913
1914 return &rt->dst;
1915}
1916
1917static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1918{
Martin KaFai Lau5973fb12015-11-11 11:51:07 -08001919 if (!__rt6_check_expired(rt) &&
1920 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001921 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1922 return &rt->dst;
1923 else
1924 return NULL;
1925}
1926
Linus Torvalds1da177e2005-04-16 15:20:36 -07001927static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1928{
1929 struct rt6_info *rt;
1930
1931 rt = (struct rt6_info *) dst;
1932
Nicolas Dichtel6f3118b2012-09-10 22:09:46 +00001933 /* All IPV6 dsts are created with ->obsolete set to the value
1934 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1935 * into this function always.
1936 */
Hannes Frederic Sowae3bc10b2013-10-24 07:48:24 +02001937
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -07001938 rt6_dst_from_metrics_check(rt);
1939
Martin KaFai Lau02bcf4e2015-11-11 11:51:08 -08001940 if (rt->rt6i_flags & RTF_PCPU ||
Wei Wanga4c2fd72017-06-17 10:42:42 -07001941 (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001942 return rt6_dst_from_check(rt, cookie);
1943 else
1944 return rt6_check(rt, cookie);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945}
1946
1947static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1948{
1949 struct rt6_info *rt = (struct rt6_info *) dst;
1950
1951 if (rt) {
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +00001952 if (rt->rt6i_flags & RTF_CACHE) {
1953 if (rt6_check_expired(rt)) {
1954 ip6_del_rt(rt);
1955 dst = NULL;
1956 }
1957 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001958 dst_release(dst);
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +00001959 dst = NULL;
1960 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001961 }
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +00001962 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001963}
1964
1965static void ip6_link_failure(struct sk_buff *skb)
1966{
1967 struct rt6_info *rt;
1968
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00001969 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001970
Eric Dumazetadf30902009-06-02 05:19:30 +00001971 rt = (struct rt6_info *) skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001972 if (rt) {
Hannes Frederic Sowa1eb4f752013-07-10 23:00:57 +02001973 if (rt->rt6i_flags & RTF_CACHE) {
Wei Wangad65a2f2017-06-17 10:42:35 -07001974 if (dst_hold_safe(&rt->dst))
1975 ip6_del_rt(rt);
Wei Wangc5cff852017-08-21 09:47:10 -07001976 } else {
1977 struct fib6_node *fn;
1978
1979 rcu_read_lock();
1980 fn = rcu_dereference(rt->rt6i_node);
1981 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
1982 fn->fn_sernum = -1;
1983 rcu_read_unlock();
Hannes Frederic Sowa1eb4f752013-07-10 23:00:57 +02001984 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001985 }
1986}
1987
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07001988static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1989{
1990 struct net *net = dev_net(rt->dst.dev);
1991
1992 rt->rt6i_flags |= RTF_MODIFIED;
1993 rt->rt6i_pmtu = mtu;
1994 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1995}
1996
Martin KaFai Lau0d3f6d22015-11-11 11:51:06 -08001997static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1998{
1999 return !(rt->rt6i_flags & RTF_CACHE) &&
Wei Wang4e587ea2017-08-25 15:03:10 -07002000 (rt->rt6i_flags & RTF_PCPU ||
2001 rcu_access_pointer(rt->rt6i_node));
Martin KaFai Lau0d3f6d22015-11-11 11:51:06 -08002002}
2003
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002004static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2005 const struct ipv6hdr *iph, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002006{
Julian Anastasov0dec8792017-02-06 23:14:16 +02002007 const struct in6_addr *daddr, *saddr;
Ian Morris67ba4152014-08-24 21:53:10 +01002008 struct rt6_info *rt6 = (struct rt6_info *)dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002009
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002010 if (rt6->rt6i_flags & RTF_LOCAL)
2011 return;
2012
Xin Long19bda362016-10-28 18:18:01 +08002013 if (dst_metric_locked(dst, RTAX_MTU))
2014 return;
2015
Julian Anastasov0dec8792017-02-06 23:14:16 +02002016 if (iph) {
2017 daddr = &iph->daddr;
2018 saddr = &iph->saddr;
2019 } else if (sk) {
2020 daddr = &sk->sk_v6_daddr;
2021 saddr = &inet6_sk(sk)->saddr;
2022 } else {
2023 daddr = NULL;
2024 saddr = NULL;
2025 }
2026 dst_confirm_neigh(dst, daddr);
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002027 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2028 if (mtu >= dst_mtu(dst))
2029 return;
David S. Miller81aded22012-06-15 14:54:11 -07002030
Martin KaFai Lau0d3f6d22015-11-11 11:51:06 -08002031 if (!rt6_cache_allowed_for_pmtu(rt6)) {
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002032 rt6_do_update_pmtu(rt6, mtu);
Wei Wang2b760fc2017-10-06 12:06:03 -07002033 /* update rt6_ex->stamp for cache */
2034 if (rt6->rt6i_flags & RTF_CACHE)
2035 rt6_update_exception_stamp_rt(rt6);
Julian Anastasov0dec8792017-02-06 23:14:16 +02002036 } else if (daddr) {
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002037 struct rt6_info *nrt6;
Hagen Paul Pfeifer9d289712015-01-15 22:34:25 +01002038
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002039 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
2040 if (nrt6) {
2041 rt6_do_update_pmtu(nrt6, mtu);
Wei Wang2b760fc2017-10-06 12:06:03 -07002042 if (rt6_insert_exception(nrt6, rt6))
2043 dst_release_immediate(&nrt6->dst);
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002044 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002045 }
2046}
2047
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002048static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2049 struct sk_buff *skb, u32 mtu)
2050{
2051 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2052}
2053
David S. Miller42ae66c2012-06-15 20:01:57 -07002054void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09002055 int oif, u32 mark, kuid_t uid)
David S. Miller81aded22012-06-15 14:54:11 -07002056{
2057 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2058 struct dst_entry *dst;
2059 struct flowi6 fl6;
2060
2061 memset(&fl6, 0, sizeof(fl6));
2062 fl6.flowi6_oif = oif;
Lorenzo Colitti1b3c61d2014-05-13 10:17:34 -07002063 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
David S. Miller81aded22012-06-15 14:54:11 -07002064 fl6.daddr = iph->daddr;
2065 fl6.saddr = iph->saddr;
YOSHIFUJI Hideaki / 吉藤英明6502ca52013-01-13 05:01:51 +00002066 fl6.flowlabel = ip6_flowinfo(iph);
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09002067 fl6.flowi6_uid = uid;
David S. Miller81aded22012-06-15 14:54:11 -07002068
2069 dst = ip6_route_output(net, NULL, &fl6);
2070 if (!dst->error)
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002071 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
David S. Miller81aded22012-06-15 14:54:11 -07002072 dst_release(dst);
2073}
2074EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2075
2076void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2077{
Martin KaFai Lau33c162a2016-04-11 15:29:36 -07002078 struct dst_entry *dst;
2079
David S. Miller81aded22012-06-15 14:54:11 -07002080 ip6_update_pmtu(skb, sock_net(sk), mtu,
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09002081 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
Martin KaFai Lau33c162a2016-04-11 15:29:36 -07002082
2083 dst = __sk_dst_get(sk);
2084 if (!dst || !dst->obsolete ||
2085 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2086 return;
2087
2088 bh_lock_sock(sk);
2089 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2090 ip6_datagram_dst_update(sk, false);
2091 bh_unlock_sock(sk);
David S. Miller81aded22012-06-15 14:54:11 -07002092}
2093EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2094
Duan Jiongb55b76b2013-09-04 19:44:21 +08002095/* Handle redirects */
2096struct ip6rd_flowi {
2097 struct flowi6 fl6;
2098 struct in6_addr gateway;
2099};
2100
2101static struct rt6_info *__ip6_route_redirect(struct net *net,
2102 struct fib6_table *table,
2103 struct flowi6 *fl6,
2104 int flags)
2105{
2106 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
Wei Wang2b760fc2017-10-06 12:06:03 -07002107 struct rt6_info *rt, *rt_cache;
Duan Jiongb55b76b2013-09-04 19:44:21 +08002108 struct fib6_node *fn;
2109
2110 /* Get the "current" route for this destination and
Alexander Alemayhu67c408c2017-01-07 23:53:00 +01002111 * check if the redirect has come from appropriate router.
Duan Jiongb55b76b2013-09-04 19:44:21 +08002112 *
2113 * RFC 4861 specifies that redirects should only be
2114 * accepted if they come from the nexthop to the target.
2115 * Due to the way the routes are chosen, this notion
2116 * is a bit fuzzy and one might need to check all possible
2117 * routes.
2118 */
2119
2120 read_lock_bh(&table->tb6_lock);
2121 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2122restart:
2123 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2124 if (rt6_check_expired(rt))
2125 continue;
2126 if (rt->dst.error)
2127 break;
2128 if (!(rt->rt6i_flags & RTF_GATEWAY))
2129 continue;
2130 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
2131 continue;
Wei Wang2b760fc2017-10-06 12:06:03 -07002132 /* rt_cache's gateway might be different from its 'parent'
2133 * in the case of an ip redirect.
2134 * So we keep searching in the exception table if the gateway
2135 * is different.
2136 */
2137 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) {
2138 rt_cache = rt6_find_cached_rt(rt,
2139 &fl6->daddr,
2140 &fl6->saddr);
2141 if (rt_cache &&
2142 ipv6_addr_equal(&rdfl->gateway,
2143 &rt_cache->rt6i_gateway)) {
2144 rt = rt_cache;
2145 break;
2146 }
Duan Jiongb55b76b2013-09-04 19:44:21 +08002147 continue;
Wei Wang2b760fc2017-10-06 12:06:03 -07002148 }
Duan Jiongb55b76b2013-09-04 19:44:21 +08002149 break;
2150 }
2151
2152 if (!rt)
2153 rt = net->ipv6.ip6_null_entry;
2154 else if (rt->dst.error) {
2155 rt = net->ipv6.ip6_null_entry;
Martin KaFai Laub0a1ba52015-01-20 19:16:02 -08002156 goto out;
2157 }
2158
2159 if (rt == net->ipv6.ip6_null_entry) {
Martin KaFai Laua3c00e42014-10-20 13:42:43 -07002160 fn = fib6_backtrack(fn, &fl6->saddr);
2161 if (fn)
2162 goto restart;
Duan Jiongb55b76b2013-09-04 19:44:21 +08002163 }
Martin KaFai Laua3c00e42014-10-20 13:42:43 -07002164
Martin KaFai Laub0a1ba52015-01-20 19:16:02 -08002165out:
Wei Wangd3843fe2017-10-06 12:06:06 -07002166 ip6_hold_safe(net, &rt, true);
Duan Jiongb55b76b2013-09-04 19:44:21 +08002167
2168 read_unlock_bh(&table->tb6_lock);
2169
David Ahernb8115802015-11-19 12:24:22 -08002170 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
Duan Jiongb55b76b2013-09-04 19:44:21 +08002171 return rt;
2172};
2173
2174static struct dst_entry *ip6_route_redirect(struct net *net,
2175 const struct flowi6 *fl6,
2176 const struct in6_addr *gateway)
2177{
2178 int flags = RT6_LOOKUP_F_HAS_SADDR;
2179 struct ip6rd_flowi rdfl;
2180
2181 rdfl.fl6 = *fl6;
2182 rdfl.gateway = *gateway;
2183
2184 return fib6_rule_lookup(net, &rdfl.fl6,
2185 flags, __ip6_route_redirect);
2186}
2187
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09002188void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2189 kuid_t uid)
David S. Miller3a5ad2e2012-07-12 00:08:07 -07002190{
2191 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2192 struct dst_entry *dst;
2193 struct flowi6 fl6;
2194
2195 memset(&fl6, 0, sizeof(fl6));
Julian Anastasove374c612014-04-28 10:51:56 +03002196 fl6.flowi6_iif = LOOPBACK_IFINDEX;
David S. Miller3a5ad2e2012-07-12 00:08:07 -07002197 fl6.flowi6_oif = oif;
2198 fl6.flowi6_mark = mark;
David S. Miller3a5ad2e2012-07-12 00:08:07 -07002199 fl6.daddr = iph->daddr;
2200 fl6.saddr = iph->saddr;
YOSHIFUJI Hideaki / 吉藤英明6502ca52013-01-13 05:01:51 +00002201 fl6.flowlabel = ip6_flowinfo(iph);
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09002202 fl6.flowi6_uid = uid;
David S. Miller3a5ad2e2012-07-12 00:08:07 -07002203
Duan Jiongb55b76b2013-09-04 19:44:21 +08002204 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
2205 rt6_do_redirect(dst, NULL, skb);
David S. Miller3a5ad2e2012-07-12 00:08:07 -07002206 dst_release(dst);
2207}
2208EXPORT_SYMBOL_GPL(ip6_redirect);
2209
Duan Jiongc92a59e2013-08-22 12:07:35 +08002210void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2211 u32 mark)
2212{
2213 const struct ipv6hdr *iph = ipv6_hdr(skb);
2214 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2215 struct dst_entry *dst;
2216 struct flowi6 fl6;
2217
2218 memset(&fl6, 0, sizeof(fl6));
Julian Anastasove374c612014-04-28 10:51:56 +03002219 fl6.flowi6_iif = LOOPBACK_IFINDEX;
Duan Jiongc92a59e2013-08-22 12:07:35 +08002220 fl6.flowi6_oif = oif;
2221 fl6.flowi6_mark = mark;
Duan Jiongc92a59e2013-08-22 12:07:35 +08002222 fl6.daddr = msg->dest;
2223 fl6.saddr = iph->daddr;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09002224 fl6.flowi6_uid = sock_net_uid(net, NULL);
Duan Jiongc92a59e2013-08-22 12:07:35 +08002225
Duan Jiongb55b76b2013-09-04 19:44:21 +08002226 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
2227 rt6_do_redirect(dst, NULL, skb);
Duan Jiongc92a59e2013-08-22 12:07:35 +08002228 dst_release(dst);
2229}
2230
David S. Miller3a5ad2e2012-07-12 00:08:07 -07002231void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2232{
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09002233 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2234 sk->sk_uid);
David S. Miller3a5ad2e2012-07-12 00:08:07 -07002235}
2236EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2237
David S. Miller0dbaee32010-12-13 12:52:14 -08002238static unsigned int ip6_default_advmss(const struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002239{
David S. Miller0dbaee32010-12-13 12:52:14 -08002240 struct net_device *dev = dst->dev;
2241 unsigned int mtu = dst_mtu(dst);
2242 struct net *net = dev_net(dev);
2243
Linus Torvalds1da177e2005-04-16 15:20:36 -07002244 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2245
Daniel Lezcano55786892008-03-04 13:47:47 -08002246 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2247 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002248
2249 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002250 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2251 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2252 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002253 * rely only on pmtu discovery"
2254 */
2255 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2256 mtu = IPV6_MAXPLEN;
2257 return mtu;
2258}
2259
Steffen Klassertebb762f2011-11-23 02:12:51 +00002260static unsigned int ip6_mtu(const struct dst_entry *dst)
David S. Millerd33e4552010-12-14 13:01:14 -08002261{
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -07002262 const struct rt6_info *rt = (const struct rt6_info *)dst;
2263 unsigned int mtu = rt->rt6i_pmtu;
David S. Millerd33e4552010-12-14 13:01:14 -08002264 struct inet6_dev *idev;
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002265
2266 if (mtu)
Eric Dumazet30f78d82014-04-10 21:23:36 -07002267 goto out;
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002268
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -07002269 mtu = dst_metric_raw(dst, RTAX_MTU);
2270 if (mtu)
2271 goto out;
2272
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002273 mtu = IPV6_MIN_MTU;
David S. Millerd33e4552010-12-14 13:01:14 -08002274
2275 rcu_read_lock();
2276 idev = __in6_dev_get(dst->dev);
2277 if (idev)
2278 mtu = idev->cnf.mtu6;
2279 rcu_read_unlock();
2280
Eric Dumazet30f78d82014-04-10 21:23:36 -07002281out:
Roopa Prabhu14972cb2016-08-24 20:10:43 -07002282 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2283
2284 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
David S. Millerd33e4552010-12-14 13:01:14 -08002285}
2286
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08002287struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
David S. Miller87a11572011-12-06 17:04:13 -05002288 struct flowi6 *fl6)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002289{
David S. Miller87a11572011-12-06 17:04:13 -05002290 struct dst_entry *dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002291 struct rt6_info *rt;
2292 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002293 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002294
David S. Miller38308472011-12-03 18:02:47 -05002295 if (unlikely(!idev))
Eric Dumazet122bdf62012-03-14 21:13:11 +00002296 return ERR_PTR(-ENODEV);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002297
Martin KaFai Lauad706862015-08-14 11:05:52 -07002298 rt = ip6_dst_alloc(net, dev, 0);
David S. Miller38308472011-12-03 18:02:47 -05002299 if (unlikely(!rt)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002300 in6_dev_put(idev);
David S. Miller87a11572011-12-06 17:04:13 -05002301 dst = ERR_PTR(-ENOMEM);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002302 goto out;
2303 }
2304
Yan, Zheng8e2ec632011-09-05 21:34:30 +00002305 rt->dst.flags |= DST_HOST;
2306 rt->dst.output = ip6_output;
Julian Anastasov550bab42013-10-20 15:43:04 +03002307 rt->rt6i_gateway = fl6->daddr;
David S. Miller87a11572011-12-06 17:04:13 -05002308 rt->rt6i_dst.addr = fl6->daddr;
Yan, Zheng8e2ec632011-09-05 21:34:30 +00002309 rt->rt6i_dst.plen = 128;
2310 rt->rt6i_idev = idev;
Li RongQing14edd872012-10-24 14:01:18 +08002311 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002312
Wei Wang587fea72017-06-17 10:42:36 -07002313 /* Add this dst into uncached_list so that rt6_ifdown() can
2314 * do proper release of the net_device
2315 */
2316 rt6_uncached_list_add(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002317
David S. Miller87a11572011-12-06 17:04:13 -05002318 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2319
Linus Torvalds1da177e2005-04-16 15:20:36 -07002320out:
David S. Miller87a11572011-12-06 17:04:13 -05002321 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002322}
2323
Daniel Lezcano569d3642008-01-18 03:56:57 -08002324static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002325{
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002326 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08002327 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2328 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2329 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2330 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2331 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Eric Dumazetfc66f952010-10-08 06:37:34 +00002332 int entries;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002333
Eric Dumazetfc66f952010-10-08 06:37:34 +00002334 entries = dst_entries_get_fast(ops);
Michal Kubeček49a18d82013-08-01 10:04:24 +02002335 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
Eric Dumazetfc66f952010-10-08 06:37:34 +00002336 entries <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002337 goto out;
2338
Benjamin Thery6891a342008-03-04 13:49:47 -08002339 net->ipv6.ip6_rt_gc_expire++;
Li RongQing14956642014-05-19 17:30:28 +08002340 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
Eric Dumazetfc66f952010-10-08 06:37:34 +00002341 entries = dst_entries_get_slow(ops);
2342 if (entries < ops->gc_thresh)
Daniel Lezcano7019b782008-03-04 13:50:14 -08002343 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002344out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08002345 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
Eric Dumazetfc66f952010-10-08 06:37:34 +00002346 return entries > rt_max_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002347}
2348
Florian Westphale715b6d2015-01-05 23:57:44 +01002349static int ip6_convert_metrics(struct mx6_config *mxc,
2350 const struct fib6_config *cfg)
2351{
Daniel Borkmannc3a8d942015-08-31 15:58:47 +02002352 bool ecn_ca = false;
Florian Westphale715b6d2015-01-05 23:57:44 +01002353 struct nlattr *nla;
2354 int remaining;
2355 u32 *mp;
2356
Ian Morris63159f22015-03-29 14:00:04 +01002357 if (!cfg->fc_mx)
Florian Westphale715b6d2015-01-05 23:57:44 +01002358 return 0;
2359
2360 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
2361 if (unlikely(!mp))
2362 return -ENOMEM;
2363
2364 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
2365 int type = nla_type(nla);
Daniel Borkmann1bb14802015-08-31 15:58:45 +02002366 u32 val;
Florian Westphale715b6d2015-01-05 23:57:44 +01002367
Daniel Borkmann1bb14802015-08-31 15:58:45 +02002368 if (!type)
2369 continue;
2370 if (unlikely(type > RTAX_MAX))
2371 goto err;
Daniel Borkmannea697632015-01-05 23:57:47 +01002372
Daniel Borkmann1bb14802015-08-31 15:58:45 +02002373 if (type == RTAX_CC_ALGO) {
2374 char tmp[TCP_CA_NAME_MAX];
2375
2376 nla_strlcpy(tmp, nla, sizeof(tmp));
Daniel Borkmannc3a8d942015-08-31 15:58:47 +02002377 val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
Daniel Borkmann1bb14802015-08-31 15:58:45 +02002378 if (val == TCP_CA_UNSPEC)
Florian Westphale715b6d2015-01-05 23:57:44 +01002379 goto err;
Daniel Borkmann1bb14802015-08-31 15:58:45 +02002380 } else {
2381 val = nla_get_u32(nla);
Florian Westphale715b6d2015-01-05 23:57:44 +01002382 }
Paolo Abeni626abd52016-05-13 18:33:41 +02002383 if (type == RTAX_HOPLIMIT && val > 255)
2384 val = 255;
Daniel Borkmannb8d3e412015-08-31 15:58:46 +02002385 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
2386 goto err;
Daniel Borkmann1bb14802015-08-31 15:58:45 +02002387
2388 mp[type - 1] = val;
2389 __set_bit(type - 1, mxc->mx_valid);
Florian Westphale715b6d2015-01-05 23:57:44 +01002390 }
2391
Daniel Borkmannc3a8d942015-08-31 15:58:47 +02002392 if (ecn_ca) {
2393 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
2394 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
2395 }
Florian Westphale715b6d2015-01-05 23:57:44 +01002396
Daniel Borkmannc3a8d942015-08-31 15:58:47 +02002397 mxc->mx = mp;
Florian Westphale715b6d2015-01-05 23:57:44 +01002398 return 0;
2399 err:
2400 kfree(mp);
2401 return -EINVAL;
2402}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002403
David Ahern8c145862016-04-24 21:26:04 -07002404static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2405 struct fib6_config *cfg,
2406 const struct in6_addr *gw_addr)
2407{
2408 struct flowi6 fl6 = {
2409 .flowi6_oif = cfg->fc_ifindex,
2410 .daddr = *gw_addr,
2411 .saddr = cfg->fc_prefsrc,
2412 };
2413 struct fib6_table *table;
2414 struct rt6_info *rt;
David Ahernd5d32e42016-10-24 12:27:23 -07002415 int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
David Ahern8c145862016-04-24 21:26:04 -07002416
2417 table = fib6_get_table(net, cfg->fc_table);
2418 if (!table)
2419 return NULL;
2420
2421 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2422 flags |= RT6_LOOKUP_F_HAS_SADDR;
2423
2424 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
2425
2426 /* if table lookup failed, fall back to full lookup */
2427 if (rt == net->ipv6.ip6_null_entry) {
2428 ip6_rt_put(rt);
2429 rt = NULL;
2430 }
2431
2432 return rt;
2433}
2434
David Ahern333c4302017-05-21 10:12:04 -06002435static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
2436 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002437{
Daniel Lezcano55786892008-03-04 13:47:47 -08002438 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002439 struct rt6_info *rt = NULL;
2440 struct net_device *dev = NULL;
2441 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07002442 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002443 int addr_type;
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07002444 int err = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002445
David Ahern557c44b2017-04-19 14:19:43 -07002446 /* RTF_PCPU is an internal flag; can not be set by userspace */
David Ahernd5d531c2017-05-21 10:12:05 -06002447 if (cfg->fc_flags & RTF_PCPU) {
2448 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
David Ahern557c44b2017-04-19 14:19:43 -07002449 goto out;
David Ahernd5d531c2017-05-21 10:12:05 -06002450 }
David Ahern557c44b2017-04-19 14:19:43 -07002451
David Ahernd5d531c2017-05-21 10:12:05 -06002452 if (cfg->fc_dst_len > 128) {
2453 NL_SET_ERR_MSG(extack, "Invalid prefix length");
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07002454 goto out;
David Ahernd5d531c2017-05-21 10:12:05 -06002455 }
2456 if (cfg->fc_src_len > 128) {
2457 NL_SET_ERR_MSG(extack, "Invalid source address length");
2458 goto out;
2459 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002460#ifndef CONFIG_IPV6_SUBTREES
David Ahernd5d531c2017-05-21 10:12:05 -06002461 if (cfg->fc_src_len) {
2462 NL_SET_ERR_MSG(extack,
2463 "Specifying source address requires IPV6_SUBTREES to be enabled");
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07002464 goto out;
David Ahernd5d531c2017-05-21 10:12:05 -06002465 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002466#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07002467 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002468 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08002469 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002470 if (!dev)
2471 goto out;
2472 idev = in6_dev_get(dev);
2473 if (!idev)
2474 goto out;
2475 }
2476
Thomas Graf86872cb2006-08-22 00:01:08 -07002477 if (cfg->fc_metric == 0)
2478 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002479
Matti Vaittinend71314b2011-11-14 00:14:49 +00002480 err = -ENOBUFS;
David S. Miller38308472011-12-03 18:02:47 -05002481 if (cfg->fc_nlinfo.nlh &&
2482 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
Matti Vaittinend71314b2011-11-14 00:14:49 +00002483 table = fib6_get_table(net, cfg->fc_table);
David S. Miller38308472011-12-03 18:02:47 -05002484 if (!table) {
Joe Perchesf3213832012-05-15 14:11:53 +00002485 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
Matti Vaittinend71314b2011-11-14 00:14:49 +00002486 table = fib6_new_table(net, cfg->fc_table);
2487 }
2488 } else {
2489 table = fib6_new_table(net, cfg->fc_table);
2490 }
David S. Miller38308472011-12-03 18:02:47 -05002491
2492 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07002493 goto out;
Thomas Grafc71099a2006-08-04 23:20:06 -07002494
Martin KaFai Lauad706862015-08-14 11:05:52 -07002495 rt = ip6_dst_alloc(net, NULL,
2496 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002497
David S. Miller38308472011-12-03 18:02:47 -05002498 if (!rt) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002499 err = -ENOMEM;
2500 goto out;
2501 }
2502
Gao feng1716a962012-04-06 00:13:10 +00002503 if (cfg->fc_flags & RTF_EXPIRES)
2504 rt6_set_expires(rt, jiffies +
2505 clock_t_to_jiffies(cfg->fc_expires));
2506 else
2507 rt6_clean_expires(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002508
Thomas Graf86872cb2006-08-22 00:01:08 -07002509 if (cfg->fc_protocol == RTPROT_UNSPEC)
2510 cfg->fc_protocol = RTPROT_BOOT;
2511 rt->rt6i_protocol = cfg->fc_protocol;
2512
2513 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002514
2515 if (addr_type & IPV6_ADDR_MULTICAST)
Changli Gaod8d1f302010-06-10 23:31:35 -07002516 rt->dst.input = ip6_mc_input;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002517 else if (cfg->fc_flags & RTF_LOCAL)
2518 rt->dst.input = ip6_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002519 else
Changli Gaod8d1f302010-06-10 23:31:35 -07002520 rt->dst.input = ip6_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002521
Changli Gaod8d1f302010-06-10 23:31:35 -07002522 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002523
Roopa Prabhu19e42e42015-07-21 10:43:48 +02002524 if (cfg->fc_encap) {
2525 struct lwtunnel_state *lwtstate;
2526
David Ahern30357d72017-01-30 12:07:37 -08002527 err = lwtunnel_build_state(cfg->fc_encap_type,
Tom Herbert127eb7c2015-08-24 09:45:41 -07002528 cfg->fc_encap, AF_INET6, cfg,
David Ahern9ae28722017-05-27 16:19:28 -06002529 &lwtstate, extack);
Roopa Prabhu19e42e42015-07-21 10:43:48 +02002530 if (err)
2531 goto out;
Jiri Benc61adedf2015-08-20 13:56:25 +02002532 rt->dst.lwtstate = lwtstate_get(lwtstate);
2533 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
2534 rt->dst.lwtstate->orig_output = rt->dst.output;
2535 rt->dst.output = lwtunnel_output;
Tom Herbert25368622015-08-17 13:42:24 -07002536 }
Jiri Benc61adedf2015-08-20 13:56:25 +02002537 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
2538 rt->dst.lwtstate->orig_input = rt->dst.input;
2539 rt->dst.input = lwtunnel_input;
Tom Herbert25368622015-08-17 13:42:24 -07002540 }
Roopa Prabhu19e42e42015-07-21 10:43:48 +02002541 }
2542
Thomas Graf86872cb2006-08-22 00:01:08 -07002543 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2544 rt->rt6i_dst.plen = cfg->fc_dst_len;
Martin KaFai Lauafc4eef2015-04-28 13:03:07 -07002545 if (rt->rt6i_dst.plen == 128)
Michal Kubečeke5fd3872014-03-27 13:04:08 +01002546 rt->dst.flags |= DST_HOST;
Michal Kubečeke5fd3872014-03-27 13:04:08 +01002547
Linus Torvalds1da177e2005-04-16 15:20:36 -07002548#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07002549 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
2550 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002551#endif
2552
Thomas Graf86872cb2006-08-22 00:01:08 -07002553 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002554
2555 /* We cannot add true routes via loopback here,
2556 they would result in kernel looping; promote them to reject routes
2557 */
Thomas Graf86872cb2006-08-22 00:01:08 -07002558 if ((cfg->fc_flags & RTF_REJECT) ||
David S. Miller38308472011-12-03 18:02:47 -05002559 (dev && (dev->flags & IFF_LOOPBACK) &&
2560 !(addr_type & IPV6_ADDR_LOOPBACK) &&
2561 !(cfg->fc_flags & RTF_LOCAL))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002562 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08002563 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002564 if (dev) {
2565 dev_put(dev);
2566 in6_dev_put(idev);
2567 }
Daniel Lezcano55786892008-03-04 13:47:47 -08002568 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002569 dev_hold(dev);
2570 idev = in6_dev_get(dev);
2571 if (!idev) {
2572 err = -ENODEV;
2573 goto out;
2574 }
2575 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002576 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
Nicolas Dichtelef2c7d72012-09-05 02:12:42 +00002577 switch (cfg->fc_type) {
2578 case RTN_BLACKHOLE:
2579 rt->dst.error = -EINVAL;
Eric W. Biedermanede20592015-10-07 16:48:47 -05002580 rt->dst.output = dst_discard_out;
Kamala R7150aed2013-12-02 19:55:21 +05302581 rt->dst.input = dst_discard;
Nicolas Dichtelef2c7d72012-09-05 02:12:42 +00002582 break;
2583 case RTN_PROHIBIT:
2584 rt->dst.error = -EACCES;
Kamala R7150aed2013-12-02 19:55:21 +05302585 rt->dst.output = ip6_pkt_prohibit_out;
2586 rt->dst.input = ip6_pkt_prohibit;
Nicolas Dichtelef2c7d72012-09-05 02:12:42 +00002587 break;
Nicolas Dichtelb4949ab2012-09-06 05:53:35 +00002588 case RTN_THROW:
Nikola Forró0315e382015-09-17 16:01:32 +02002589 case RTN_UNREACHABLE:
Nicolas Dichtelef2c7d72012-09-05 02:12:42 +00002590 default:
Kamala R7150aed2013-12-02 19:55:21 +05302591 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
Nikola Forró0315e382015-09-17 16:01:32 +02002592 : (cfg->fc_type == RTN_UNREACHABLE)
2593 ? -EHOSTUNREACH : -ENETUNREACH;
Kamala R7150aed2013-12-02 19:55:21 +05302594 rt->dst.output = ip6_pkt_discard_out;
2595 rt->dst.input = ip6_pkt_discard;
Nicolas Dichtelef2c7d72012-09-05 02:12:42 +00002596 break;
2597 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002598 goto install_route;
2599 }
2600
Thomas Graf86872cb2006-08-22 00:01:08 -07002601 if (cfg->fc_flags & RTF_GATEWAY) {
Eric Dumazetb71d1d42011-04-22 04:53:02 +00002602 const struct in6_addr *gw_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002603 int gwa_type;
2604
Thomas Graf86872cb2006-08-22 00:01:08 -07002605 gw_addr = &cfg->fc_gateway;
Florian Westphal330567b2015-08-07 10:54:28 +02002606 gwa_type = ipv6_addr_type(gw_addr);
Florian Westphal48ed7b22015-05-21 00:25:41 +02002607
2608 /* if gw_addr is local we will fail to detect this in case
2609 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2610 * will return already-added prefix route via interface that
2611 * prefix route was assigned to, which might be non-loopback.
2612 */
2613 err = -EINVAL;
Florian Westphal330567b2015-08-07 10:54:28 +02002614 if (ipv6_chk_addr_and_flags(net, gw_addr,
2615 gwa_type & IPV6_ADDR_LINKLOCAL ?
David Ahernd5d531c2017-05-21 10:12:05 -06002616 dev : NULL, 0, 0)) {
2617 NL_SET_ERR_MSG(extack, "Invalid gateway address");
Florian Westphal48ed7b22015-05-21 00:25:41 +02002618 goto out;
David Ahernd5d531c2017-05-21 10:12:05 -06002619 }
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002620 rt->rt6i_gateway = *gw_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002621
2622 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
David Ahern8c145862016-04-24 21:26:04 -07002623 struct rt6_info *grt = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002624
2625 /* IPv6 strictly inhibits using not link-local
2626 addresses as nexthop address.
2627 Otherwise, router will not able to send redirects.
2628 It is very good, but in some (rare!) circumstances
2629 (SIT, PtP, NBMA NOARP links) it is handy to allow
2630 some exceptions. --ANK
Erik Nordmark96d58222016-12-03 20:57:09 -08002631 We allow IPv4-mapped nexthops to support RFC4798-type
2632 addressing
Linus Torvalds1da177e2005-04-16 15:20:36 -07002633 */
Erik Nordmark96d58222016-12-03 20:57:09 -08002634 if (!(gwa_type & (IPV6_ADDR_UNICAST |
David Ahernd5d531c2017-05-21 10:12:05 -06002635 IPV6_ADDR_MAPPED))) {
2636 NL_SET_ERR_MSG(extack,
2637 "Invalid gateway address");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002638 goto out;
David Ahernd5d531c2017-05-21 10:12:05 -06002639 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002640
Vincent Bernata435a072016-09-18 17:46:07 +02002641 if (cfg->fc_table) {
David Ahern8c145862016-04-24 21:26:04 -07002642 grt = ip6_nh_lookup_table(net, cfg, gw_addr);
2643
Vincent Bernata435a072016-09-18 17:46:07 +02002644 if (grt) {
2645 if (grt->rt6i_flags & RTF_GATEWAY ||
2646 (dev && dev != grt->dst.dev)) {
2647 ip6_rt_put(grt);
2648 grt = NULL;
2649 }
2650 }
2651 }
2652
David Ahern8c145862016-04-24 21:26:04 -07002653 if (!grt)
2654 grt = rt6_lookup(net, gw_addr, NULL,
2655 cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002656
2657 err = -EHOSTUNREACH;
David S. Miller38308472011-12-03 18:02:47 -05002658 if (!grt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002659 goto out;
2660 if (dev) {
David S. Millerd1918542011-12-28 20:19:20 -05002661 if (dev != grt->dst.dev) {
Amerigo Wang94e187c2012-10-29 00:13:19 +00002662 ip6_rt_put(grt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002663 goto out;
2664 }
2665 } else {
David S. Millerd1918542011-12-28 20:19:20 -05002666 dev = grt->dst.dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002667 idev = grt->rt6i_idev;
2668 dev_hold(dev);
2669 in6_dev_hold(grt->rt6i_idev);
2670 }
David S. Miller38308472011-12-03 18:02:47 -05002671 if (!(grt->rt6i_flags & RTF_GATEWAY))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002672 err = 0;
Amerigo Wang94e187c2012-10-29 00:13:19 +00002673 ip6_rt_put(grt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002674
2675 if (err)
2676 goto out;
2677 }
2678 err = -EINVAL;
David Ahernd5d531c2017-05-21 10:12:05 -06002679 if (!dev) {
2680 NL_SET_ERR_MSG(extack, "Egress device not specified");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002681 goto out;
David Ahernd5d531c2017-05-21 10:12:05 -06002682 } else if (dev->flags & IFF_LOOPBACK) {
2683 NL_SET_ERR_MSG(extack,
2684 "Egress device can not be loopback device for this route");
2685 goto out;
2686 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002687 }
2688
2689 err = -ENODEV;
David S. Miller38308472011-12-03 18:02:47 -05002690 if (!dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002691 goto out;
2692
Daniel Walterc3968a82011-04-13 21:10:57 +00002693 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2694 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
David Ahernd5d531c2017-05-21 10:12:05 -06002695 NL_SET_ERR_MSG(extack, "Invalid source address");
Daniel Walterc3968a82011-04-13 21:10:57 +00002696 err = -EINVAL;
2697 goto out;
2698 }
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002699 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
Daniel Walterc3968a82011-04-13 21:10:57 +00002700 rt->rt6i_prefsrc.plen = 128;
2701 } else
2702 rt->rt6i_prefsrc.plen = 0;
2703
Thomas Graf86872cb2006-08-22 00:01:08 -07002704 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002705
2706install_route:
Changli Gaod8d1f302010-06-10 23:31:35 -07002707 rt->dst.dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002708 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07002709 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08002710
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002711 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08002712
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07002713 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002714out:
2715 if (dev)
2716 dev_put(dev);
2717 if (idev)
2718 in6_dev_put(idev);
Wei Wang587fea72017-06-17 10:42:36 -07002719 if (rt)
2720 dst_release_immediate(&rt->dst);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07002721
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07002722 return ERR_PTR(err);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07002723}
2724
David Ahern333c4302017-05-21 10:12:04 -06002725int ip6_route_add(struct fib6_config *cfg,
2726 struct netlink_ext_ack *extack)
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07002727{
2728 struct mx6_config mxc = { .mx = NULL, };
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07002729 struct rt6_info *rt;
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07002730 int err;
2731
David Ahern333c4302017-05-21 10:12:04 -06002732 rt = ip6_route_info_create(cfg, extack);
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07002733 if (IS_ERR(rt)) {
2734 err = PTR_ERR(rt);
2735 rt = NULL;
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07002736 goto out;
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07002737 }
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07002738
2739 err = ip6_convert_metrics(&mxc, cfg);
2740 if (err)
2741 goto out;
2742
David Ahern333c4302017-05-21 10:12:04 -06002743 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07002744
2745 kfree(mxc.mx);
2746
2747 return err;
2748out:
Wei Wang587fea72017-06-17 10:42:36 -07002749 if (rt)
2750 dst_release_immediate(&rt->dst);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07002751
Linus Torvalds1da177e2005-04-16 15:20:36 -07002752 return err;
2753}
2754
Thomas Graf86872cb2006-08-22 00:01:08 -07002755static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002756{
2757 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07002758 struct fib6_table *table;
David S. Millerd1918542011-12-28 20:19:20 -05002759 struct net *net = dev_net(rt->dst.dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002760
Wei Wanga4c2fd72017-06-17 10:42:42 -07002761 if (rt == net->ipv6.ip6_null_entry) {
Gao feng6825a262012-09-19 19:25:34 +00002762 err = -ENOENT;
2763 goto out;
2764 }
Patrick McHardy6c813a72006-08-06 22:22:47 -07002765
Thomas Grafc71099a2006-08-04 23:20:06 -07002766 table = rt->rt6i_table;
2767 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -07002768 err = fib6_del(rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -07002769 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002770
Gao feng6825a262012-09-19 19:25:34 +00002771out:
Amerigo Wang94e187c2012-10-29 00:13:19 +00002772 ip6_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002773 return err;
2774}
2775
Thomas Grafe0a1ad732006-08-22 00:00:21 -07002776int ip6_del_rt(struct rt6_info *rt)
2777{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08002778 struct nl_info info = {
David S. Millerd1918542011-12-28 20:19:20 -05002779 .nl_net = dev_net(rt->dst.dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08002780 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002781 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07002782}
2783
David Ahern0ae81332017-02-02 12:37:08 -08002784static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2785{
2786 struct nl_info *info = &cfg->fc_nlinfo;
WANG Conge3330032017-02-27 16:07:43 -08002787 struct net *net = info->nl_net;
David Ahern16a16cd2017-02-02 12:37:11 -08002788 struct sk_buff *skb = NULL;
David Ahern0ae81332017-02-02 12:37:08 -08002789 struct fib6_table *table;
WANG Conge3330032017-02-27 16:07:43 -08002790 int err = -ENOENT;
David Ahern0ae81332017-02-02 12:37:08 -08002791
WANG Conge3330032017-02-27 16:07:43 -08002792 if (rt == net->ipv6.ip6_null_entry)
2793 goto out_put;
David Ahern0ae81332017-02-02 12:37:08 -08002794 table = rt->rt6i_table;
2795 write_lock_bh(&table->tb6_lock);
2796
2797 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
2798 struct rt6_info *sibling, *next_sibling;
2799
David Ahern16a16cd2017-02-02 12:37:11 -08002800 /* prefer to send a single notification with all hops */
2801 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
2802 if (skb) {
2803 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2804
WANG Conge3330032017-02-27 16:07:43 -08002805 if (rt6_fill_node(net, skb, rt,
David Ahern16a16cd2017-02-02 12:37:11 -08002806 NULL, NULL, 0, RTM_DELROUTE,
2807 info->portid, seq, 0) < 0) {
2808 kfree_skb(skb);
2809 skb = NULL;
2810 } else
2811 info->skip_notify = 1;
2812 }
2813
David Ahern0ae81332017-02-02 12:37:08 -08002814 list_for_each_entry_safe(sibling, next_sibling,
2815 &rt->rt6i_siblings,
2816 rt6i_siblings) {
2817 err = fib6_del(sibling, info);
2818 if (err)
WANG Conge3330032017-02-27 16:07:43 -08002819 goto out_unlock;
David Ahern0ae81332017-02-02 12:37:08 -08002820 }
2821 }
2822
2823 err = fib6_del(rt, info);
WANG Conge3330032017-02-27 16:07:43 -08002824out_unlock:
David Ahern0ae81332017-02-02 12:37:08 -08002825 write_unlock_bh(&table->tb6_lock);
WANG Conge3330032017-02-27 16:07:43 -08002826out_put:
David Ahern0ae81332017-02-02 12:37:08 -08002827 ip6_rt_put(rt);
David Ahern16a16cd2017-02-02 12:37:11 -08002828
2829 if (skb) {
WANG Conge3330032017-02-27 16:07:43 -08002830 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
David Ahern16a16cd2017-02-02 12:37:11 -08002831 info->nlh, gfp_any());
2832 }
David Ahern0ae81332017-02-02 12:37:08 -08002833 return err;
2834}
2835
David Ahern333c4302017-05-21 10:12:04 -06002836static int ip6_route_del(struct fib6_config *cfg,
2837 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002838{
Wei Wang2b760fc2017-10-06 12:06:03 -07002839 struct rt6_info *rt, *rt_cache;
Thomas Grafc71099a2006-08-04 23:20:06 -07002840 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002841 struct fib6_node *fn;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002842 int err = -ESRCH;
2843
Daniel Lezcano55786892008-03-04 13:47:47 -08002844 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
David Ahernd5d531c2017-05-21 10:12:05 -06002845 if (!table) {
2846 NL_SET_ERR_MSG(extack, "FIB table does not exist");
Thomas Grafc71099a2006-08-04 23:20:06 -07002847 return err;
David Ahernd5d531c2017-05-21 10:12:05 -06002848 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002849
Thomas Grafc71099a2006-08-04 23:20:06 -07002850 read_lock_bh(&table->tb6_lock);
2851
2852 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07002853 &cfg->fc_dst, cfg->fc_dst_len,
Wei Wang38fbeee2017-10-06 12:06:02 -07002854 &cfg->fc_src, cfg->fc_src_len,
Wei Wang2b760fc2017-10-06 12:06:03 -07002855 !(cfg->fc_flags & RTF_CACHE));
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002856
Linus Torvalds1da177e2005-04-16 15:20:36 -07002857 if (fn) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002858 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
Wei Wang2b760fc2017-10-06 12:06:03 -07002859 if (cfg->fc_flags & RTF_CACHE) {
2860 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
2861 &cfg->fc_src);
2862 if (!rt_cache)
2863 continue;
2864 rt = rt_cache;
2865 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002866 if (cfg->fc_ifindex &&
David S. Millerd1918542011-12-28 20:19:20 -05002867 (!rt->dst.dev ||
2868 rt->dst.dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002869 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07002870 if (cfg->fc_flags & RTF_GATEWAY &&
2871 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002872 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07002873 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002874 continue;
Mantas Mc2ed1882016-12-16 10:30:59 +02002875 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2876 continue;
Wei Wangd3843fe2017-10-06 12:06:06 -07002877 if (!dst_hold_safe(&rt->dst))
2878 break;
Thomas Grafc71099a2006-08-04 23:20:06 -07002879 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002880
David Ahern0ae81332017-02-02 12:37:08 -08002881 /* if gateway was specified only delete the one hop */
2882 if (cfg->fc_flags & RTF_GATEWAY)
2883 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2884
2885 return __ip6_del_rt_siblings(rt, cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002886 }
2887 }
Thomas Grafc71099a2006-08-04 23:20:06 -07002888 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002889
2890 return err;
2891}
2892
David S. Miller6700c272012-07-17 03:29:28 -07002893static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07002894{
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07002895 struct netevent_redirect netevent;
David S. Millere8599ff2012-07-11 23:43:53 -07002896 struct rt6_info *rt, *nrt = NULL;
David S. Millere8599ff2012-07-11 23:43:53 -07002897 struct ndisc_options ndopts;
2898 struct inet6_dev *in6_dev;
2899 struct neighbour *neigh;
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00002900 struct rd_msg *msg;
David S. Miller6e157b62012-07-12 00:05:02 -07002901 int optlen, on_link;
2902 u8 *lladdr;
David S. Millere8599ff2012-07-11 23:43:53 -07002903
Simon Horman29a3cad2013-05-28 20:34:26 +00002904 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00002905 optlen -= sizeof(*msg);
David S. Millere8599ff2012-07-11 23:43:53 -07002906
2907 if (optlen < 0) {
David S. Miller6e157b62012-07-12 00:05:02 -07002908 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
David S. Millere8599ff2012-07-11 23:43:53 -07002909 return;
2910 }
2911
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00002912 msg = (struct rd_msg *)icmp6_hdr(skb);
David S. Millere8599ff2012-07-11 23:43:53 -07002913
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00002914 if (ipv6_addr_is_multicast(&msg->dest)) {
David S. Miller6e157b62012-07-12 00:05:02 -07002915 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
David S. Millere8599ff2012-07-11 23:43:53 -07002916 return;
2917 }
2918
David S. Miller6e157b62012-07-12 00:05:02 -07002919 on_link = 0;
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00002920 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
David S. Millere8599ff2012-07-11 23:43:53 -07002921 on_link = 1;
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00002922 } else if (ipv6_addr_type(&msg->target) !=
David S. Millere8599ff2012-07-11 23:43:53 -07002923 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
David S. Miller6e157b62012-07-12 00:05:02 -07002924 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
David S. Millere8599ff2012-07-11 23:43:53 -07002925 return;
2926 }
2927
2928 in6_dev = __in6_dev_get(skb->dev);
2929 if (!in6_dev)
2930 return;
2931 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2932 return;
2933
2934 /* RFC2461 8.1:
2935 * The IP source address of the Redirect MUST be the same as the current
2936 * first-hop router for the specified ICMP Destination Address.
2937 */
2938
Alexander Aringf997c552016-06-15 21:20:23 +02002939 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
David S. Millere8599ff2012-07-11 23:43:53 -07002940 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2941 return;
2942 }
David S. Miller6e157b62012-07-12 00:05:02 -07002943
2944 lladdr = NULL;
David S. Millere8599ff2012-07-11 23:43:53 -07002945 if (ndopts.nd_opts_tgt_lladdr) {
2946 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2947 skb->dev);
2948 if (!lladdr) {
2949 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2950 return;
2951 }
2952 }
2953
David S. Miller6e157b62012-07-12 00:05:02 -07002954 rt = (struct rt6_info *) dst;
Matthias Schifferec13ad12015-11-02 01:24:38 +01002955 if (rt->rt6i_flags & RTF_REJECT) {
David S. Miller6e157b62012-07-12 00:05:02 -07002956 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2957 return;
2958 }
2959
2960 /* Redirect received -> path was valid.
2961 * Look, redirects are sent only in response to data packets,
2962 * so that this nexthop apparently is reachable. --ANK
2963 */
Julian Anastasov0dec8792017-02-06 23:14:16 +02002964 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
David S. Miller6e157b62012-07-12 00:05:02 -07002965
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00002966 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
David S. Millere8599ff2012-07-11 23:43:53 -07002967 if (!neigh)
2968 return;
2969
Linus Torvalds1da177e2005-04-16 15:20:36 -07002970 /*
2971 * We have finally decided to accept it.
2972 */
2973
Alexander Aringf997c552016-06-15 21:20:23 +02002974 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002975 NEIGH_UPDATE_F_WEAK_OVERRIDE|
2976 NEIGH_UPDATE_F_OVERRIDE|
2977 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
Alexander Aringf997c552016-06-15 21:20:23 +02002978 NEIGH_UPDATE_F_ISROUTER)),
2979 NDISC_REDIRECT, &ndopts);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002980
Martin KaFai Lau83a09ab2015-05-22 20:56:05 -07002981 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
David S. Miller38308472011-12-03 18:02:47 -05002982 if (!nrt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002983 goto out;
2984
2985 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2986 if (on_link)
2987 nrt->rt6i_flags &= ~RTF_GATEWAY;
2988
Xin Longb91d5322017-08-03 14:13:46 +08002989 nrt->rt6i_protocol = RTPROT_REDIRECT;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002990 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002991
Wei Wang2b760fc2017-10-06 12:06:03 -07002992 /* No need to remove rt from the exception table if rt is
2993 * a cached route because rt6_insert_exception() will
2994 * takes care of it
2995 */
2996 if (rt6_insert_exception(nrt, rt)) {
2997 dst_release_immediate(&nrt->dst);
2998 goto out;
2999 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003000
Changli Gaod8d1f302010-06-10 23:31:35 -07003001 netevent.old = &rt->dst;
3002 netevent.new = &nrt->dst;
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00003003 netevent.daddr = &msg->dest;
YOSHIFUJI Hideaki / 吉藤英明60592832013-01-14 09:28:27 +00003004 netevent.neigh = neigh;
Tom Tucker8d717402006-07-30 20:43:36 -07003005 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3006
Linus Torvalds1da177e2005-04-16 15:20:36 -07003007out:
David S. Millere8599ff2012-07-11 23:43:53 -07003008 neigh_release(neigh);
David S. Miller6e157b62012-07-12 00:05:02 -07003009}
3010
Linus Torvalds1da177e2005-04-16 15:20:36 -07003011/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07003012 * Misc support functions
3013 */
3014
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -07003015static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
3016{
3017 BUG_ON(from->dst.from);
3018
3019 rt->rt6i_flags &= ~RTF_EXPIRES;
3020 dst_hold(&from->dst);
3021 rt->dst.from = &from->dst;
3022 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
3023}
3024
Martin KaFai Lau83a09ab2015-05-22 20:56:05 -07003025static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003026{
Martin KaFai Lau83a09ab2015-05-22 20:56:05 -07003027 rt->dst.input = ort->dst.input;
3028 rt->dst.output = ort->dst.output;
3029 rt->rt6i_dst = ort->rt6i_dst;
3030 rt->dst.error = ort->dst.error;
3031 rt->rt6i_idev = ort->rt6i_idev;
3032 if (rt->rt6i_idev)
3033 in6_dev_hold(rt->rt6i_idev);
3034 rt->dst.lastuse = jiffies;
3035 rt->rt6i_gateway = ort->rt6i_gateway;
3036 rt->rt6i_flags = ort->rt6i_flags;
3037 rt6_set_from(rt, ort);
3038 rt->rt6i_metric = ort->rt6i_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003039#ifdef CONFIG_IPV6_SUBTREES
Martin KaFai Lau83a09ab2015-05-22 20:56:05 -07003040 rt->rt6i_src = ort->rt6i_src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003041#endif
Martin KaFai Lau83a09ab2015-05-22 20:56:05 -07003042 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
3043 rt->rt6i_table = ort->rt6i_table;
Jiri Benc61adedf2015-08-20 13:56:25 +02003044 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003045}
3046
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003047#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08003048static struct rt6_info *rt6_get_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00003049 const struct in6_addr *prefix, int prefixlen,
David Ahern830218c2016-10-24 10:52:35 -07003050 const struct in6_addr *gwaddr,
3051 struct net_device *dev)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003052{
David Ahern830218c2016-10-24 10:52:35 -07003053 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3054 int ifindex = dev->ifindex;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003055 struct fib6_node *fn;
3056 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07003057 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003058
David Ahern830218c2016-10-24 10:52:35 -07003059 table = fib6_get_table(net, tb_id);
David S. Miller38308472011-12-03 18:02:47 -05003060 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07003061 return NULL;
3062
Li RongQing5744dd92012-09-11 21:59:01 +00003063 read_lock_bh(&table->tb6_lock);
Wei Wang38fbeee2017-10-06 12:06:02 -07003064 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003065 if (!fn)
3066 goto out;
3067
Changli Gaod8d1f302010-06-10 23:31:35 -07003068 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
David S. Millerd1918542011-12-28 20:19:20 -05003069 if (rt->dst.dev->ifindex != ifindex)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003070 continue;
3071 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
3072 continue;
3073 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
3074 continue;
Wei Wangd3843fe2017-10-06 12:06:06 -07003075 ip6_hold_safe(NULL, &rt, false);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003076 break;
3077 }
3078out:
Li RongQing5744dd92012-09-11 21:59:01 +00003079 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003080 return rt;
3081}
3082
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08003083static struct rt6_info *rt6_add_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00003084 const struct in6_addr *prefix, int prefixlen,
David Ahern830218c2016-10-24 10:52:35 -07003085 const struct in6_addr *gwaddr,
3086 struct net_device *dev,
Eric Dumazet95c96172012-04-15 05:58:06 +00003087 unsigned int pref)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003088{
Thomas Graf86872cb2006-08-22 00:01:08 -07003089 struct fib6_config cfg = {
Rami Rosen238fc7e2008-02-09 23:43:11 -08003090 .fc_metric = IP6_RT_PRIO_USER,
David Ahern830218c2016-10-24 10:52:35 -07003091 .fc_ifindex = dev->ifindex,
Thomas Graf86872cb2006-08-22 00:01:08 -07003092 .fc_dst_len = prefixlen,
3093 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3094 RTF_UP | RTF_PREF(pref),
Xin Longb91d5322017-08-03 14:13:46 +08003095 .fc_protocol = RTPROT_RA,
Eric W. Biederman15e47302012-09-07 20:12:54 +00003096 .fc_nlinfo.portid = 0,
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08003097 .fc_nlinfo.nlh = NULL,
3098 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07003099 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003100
David Ahern830218c2016-10-24 10:52:35 -07003101 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00003102 cfg.fc_dst = *prefix;
3103 cfg.fc_gateway = *gwaddr;
Thomas Graf86872cb2006-08-22 00:01:08 -07003104
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08003105 /* We should treat it as a default route if prefix length is 0. */
3106 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07003107 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003108
David Ahern333c4302017-05-21 10:12:04 -06003109 ip6_route_add(&cfg, NULL);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003110
David Ahern830218c2016-10-24 10:52:35 -07003111 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003112}
3113#endif
3114
Eric Dumazetb71d1d42011-04-22 04:53:02 +00003115struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09003116{
David Ahern830218c2016-10-24 10:52:35 -07003117 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003118 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07003119 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003120
David Ahern830218c2016-10-24 10:52:35 -07003121 table = fib6_get_table(dev_net(dev), tb_id);
David S. Miller38308472011-12-03 18:02:47 -05003122 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07003123 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003124
Li RongQing5744dd92012-09-11 21:59:01 +00003125 read_lock_bh(&table->tb6_lock);
Ian Morris67ba4152014-08-24 21:53:10 +01003126 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
David S. Millerd1918542011-12-28 20:19:20 -05003127 if (dev == rt->dst.dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08003128 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07003129 ipv6_addr_equal(&rt->rt6i_gateway, addr))
3130 break;
3131 }
3132 if (rt)
Wei Wangd3843fe2017-10-06 12:06:06 -07003133 ip6_hold_safe(NULL, &rt, false);
Li RongQing5744dd92012-09-11 21:59:01 +00003134 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003135 return rt;
3136}
3137
Eric Dumazetb71d1d42011-04-22 04:53:02 +00003138struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08003139 struct net_device *dev,
3140 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003141{
Thomas Graf86872cb2006-08-22 00:01:08 -07003142 struct fib6_config cfg = {
David Ahernca254492015-10-12 11:47:10 -07003143 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08003144 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07003145 .fc_ifindex = dev->ifindex,
3146 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3147 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Xin Longb91d5322017-08-03 14:13:46 +08003148 .fc_protocol = RTPROT_RA,
Eric W. Biederman15e47302012-09-07 20:12:54 +00003149 .fc_nlinfo.portid = 0,
Daniel Lezcano55786892008-03-04 13:47:47 -08003150 .fc_nlinfo.nlh = NULL,
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09003151 .fc_nlinfo.nl_net = dev_net(dev),
Thomas Graf86872cb2006-08-22 00:01:08 -07003152 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07003153
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00003154 cfg.fc_gateway = *gwaddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003155
David Ahern333c4302017-05-21 10:12:04 -06003156 if (!ip6_route_add(&cfg, NULL)) {
David Ahern830218c2016-10-24 10:52:35 -07003157 struct fib6_table *table;
3158
3159 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3160 if (table)
3161 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3162 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003163
Linus Torvalds1da177e2005-04-16 15:20:36 -07003164 return rt6_get_dflt_router(gwaddr, dev);
3165}
3166
David Ahern830218c2016-10-24 10:52:35 -07003167static void __rt6_purge_dflt_routers(struct fib6_table *table)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003168{
3169 struct rt6_info *rt;
3170
3171restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07003172 read_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07003173 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
Lorenzo Colitti3e8b0ac2013-03-03 20:46:46 +00003174 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3175 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
Wei Wangd3843fe2017-10-06 12:06:06 -07003176 if (dst_hold_safe(&rt->dst)) {
3177 read_unlock_bh(&table->tb6_lock);
3178 ip6_del_rt(rt);
3179 } else {
3180 read_unlock_bh(&table->tb6_lock);
3181 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003182 goto restart;
3183 }
3184 }
Thomas Grafc71099a2006-08-04 23:20:06 -07003185 read_unlock_bh(&table->tb6_lock);
David Ahern830218c2016-10-24 10:52:35 -07003186
3187 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3188}
3189
3190void rt6_purge_dflt_routers(struct net *net)
3191{
3192 struct fib6_table *table;
3193 struct hlist_head *head;
3194 unsigned int h;
3195
3196 rcu_read_lock();
3197
3198 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3199 head = &net->ipv6.fib_table_hash[h];
3200 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3201 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3202 __rt6_purge_dflt_routers(table);
3203 }
3204 }
3205
3206 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003207}
3208
Daniel Lezcano55786892008-03-04 13:47:47 -08003209static void rtmsg_to_fib6_config(struct net *net,
3210 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07003211 struct fib6_config *cfg)
3212{
3213 memset(cfg, 0, sizeof(*cfg));
3214
David Ahernca254492015-10-12 11:47:10 -07003215 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3216 : RT6_TABLE_MAIN;
Thomas Graf86872cb2006-08-22 00:01:08 -07003217 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3218 cfg->fc_metric = rtmsg->rtmsg_metric;
3219 cfg->fc_expires = rtmsg->rtmsg_info;
3220 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3221 cfg->fc_src_len = rtmsg->rtmsg_src_len;
3222 cfg->fc_flags = rtmsg->rtmsg_flags;
3223
Daniel Lezcano55786892008-03-04 13:47:47 -08003224 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08003225
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00003226 cfg->fc_dst = rtmsg->rtmsg_dst;
3227 cfg->fc_src = rtmsg->rtmsg_src;
3228 cfg->fc_gateway = rtmsg->rtmsg_gateway;
Thomas Graf86872cb2006-08-22 00:01:08 -07003229}
3230
Daniel Lezcano55786892008-03-04 13:47:47 -08003231int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003232{
Thomas Graf86872cb2006-08-22 00:01:08 -07003233 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003234 struct in6_rtmsg rtmsg;
3235 int err;
3236
Ian Morris67ba4152014-08-24 21:53:10 +01003237 switch (cmd) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003238 case SIOCADDRT: /* Add a route */
3239 case SIOCDELRT: /* Delete a route */
Eric W. Biedermanaf31f412012-11-16 03:03:06 +00003240 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003241 return -EPERM;
3242 err = copy_from_user(&rtmsg, arg,
3243 sizeof(struct in6_rtmsg));
3244 if (err)
3245 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07003246
Daniel Lezcano55786892008-03-04 13:47:47 -08003247 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07003248
Linus Torvalds1da177e2005-04-16 15:20:36 -07003249 rtnl_lock();
3250 switch (cmd) {
3251 case SIOCADDRT:
David Ahern333c4302017-05-21 10:12:04 -06003252 err = ip6_route_add(&cfg, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003253 break;
3254 case SIOCDELRT:
David Ahern333c4302017-05-21 10:12:04 -06003255 err = ip6_route_del(&cfg, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003256 break;
3257 default:
3258 err = -EINVAL;
3259 }
3260 rtnl_unlock();
3261
3262 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07003263 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003264
3265 return -EINVAL;
3266}
3267
3268/*
3269 * Drop the packet on the floor
3270 */
3271
Brian Haleyd5fdd6b2009-06-23 04:31:07 -07003272static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003273{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003274 int type;
Eric Dumazetadf30902009-06-02 05:19:30 +00003275 struct dst_entry *dst = skb_dst(skb);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003276 switch (ipstats_mib_noroutes) {
3277 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07003278 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
Ulrich Weber45bb0062010-02-25 23:28:58 +00003279 if (type == IPV6_ADDR_ANY) {
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07003280 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3281 IPSTATS_MIB_INADDRERRORS);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003282 break;
3283 }
3284 /* FALLTHROUGH */
3285 case IPSTATS_MIB_OUTNOROUTES:
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07003286 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3287 ipstats_mib_noroutes);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003288 break;
3289 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00003290 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003291 kfree_skb(skb);
3292 return 0;
3293}
3294
Thomas Graf9ce8ade2006-10-18 20:46:54 -07003295static int ip6_pkt_discard(struct sk_buff *skb)
3296{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003297 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07003298}
3299
Eric W. Biedermanede20592015-10-07 16:48:47 -05003300static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003301{
Eric Dumazetadf30902009-06-02 05:19:30 +00003302 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003303 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003304}
3305
Thomas Graf9ce8ade2006-10-18 20:46:54 -07003306static int ip6_pkt_prohibit(struct sk_buff *skb)
3307{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003308 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07003309}
3310
Eric W. Biedermanede20592015-10-07 16:48:47 -05003311static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
Thomas Graf9ce8ade2006-10-18 20:46:54 -07003312{
Eric Dumazetadf30902009-06-02 05:19:30 +00003313 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003314 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07003315}
3316
Linus Torvalds1da177e2005-04-16 15:20:36 -07003317/*
3318 * Allocate a dst for local (unicast / anycast) address.
3319 */
3320
3321struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
3322 const struct in6_addr *addr,
David S. Miller8f031512011-12-06 16:48:14 -05003323 bool anycast)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003324{
David Ahernca254492015-10-12 11:47:10 -07003325 u32 tb_id;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09003326 struct net *net = dev_net(idev->dev);
David Ahern4832c302017-08-17 12:17:20 -07003327 struct net_device *dev = idev->dev;
David Ahern5f02ce242016-09-10 12:09:54 -07003328 struct rt6_info *rt;
3329
David Ahern5f02ce242016-09-10 12:09:54 -07003330 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
Hannes Frederic Sowaa3300ef2013-12-07 03:33:45 +01003331 if (!rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003332 return ERR_PTR(-ENOMEM);
3333
Linus Torvalds1da177e2005-04-16 15:20:36 -07003334 in6_dev_hold(idev);
3335
David S. Miller11d53b42011-06-24 15:23:34 -07003336 rt->dst.flags |= DST_HOST;
Changli Gaod8d1f302010-06-10 23:31:35 -07003337 rt->dst.input = ip6_input;
3338 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003339 rt->rt6i_idev = idev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003340
David Ahern94b5e0f2017-02-02 08:52:21 -08003341 rt->rt6i_protocol = RTPROT_KERNEL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003342 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09003343 if (anycast)
3344 rt->rt6i_flags |= RTF_ANYCAST;
3345 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07003346 rt->rt6i_flags |= RTF_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003347
Julian Anastasov550bab42013-10-20 15:43:04 +03003348 rt->rt6i_gateway = *addr;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00003349 rt->rt6i_dst.addr = *addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003350 rt->rt6i_dst.plen = 128;
David Ahernca254492015-10-12 11:47:10 -07003351 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3352 rt->rt6i_table = fib6_get_table(net, tb_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003353
Linus Torvalds1da177e2005-04-16 15:20:36 -07003354 return rt;
3355}
3356
Daniel Walterc3968a82011-04-13 21:10:57 +00003357/* remove deleted ip from prefsrc entries */
3358struct arg_dev_net_ip {
3359 struct net_device *dev;
3360 struct net *net;
3361 struct in6_addr *addr;
3362};
3363
3364static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
3365{
3366 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3367 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3368 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3369
David S. Millerd1918542011-12-28 20:19:20 -05003370 if (((void *)rt->dst.dev == dev || !dev) &&
Daniel Walterc3968a82011-04-13 21:10:57 +00003371 rt != net->ipv6.ip6_null_entry &&
3372 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
Wei Wang60006a42017-10-06 12:05:58 -07003373 spin_lock_bh(&rt6_exception_lock);
Daniel Walterc3968a82011-04-13 21:10:57 +00003374 /* remove prefsrc entry */
3375 rt->rt6i_prefsrc.plen = 0;
Wei Wang60006a42017-10-06 12:05:58 -07003376 /* need to update cache as well */
3377 rt6_exceptions_remove_prefsrc(rt);
3378 spin_unlock_bh(&rt6_exception_lock);
Daniel Walterc3968a82011-04-13 21:10:57 +00003379 }
3380 return 0;
3381}
3382
3383void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3384{
3385 struct net *net = dev_net(ifp->idev->dev);
3386 struct arg_dev_net_ip adni = {
3387 .dev = ifp->idev->dev,
3388 .net = net,
3389 .addr = &ifp->addr,
3390 };
Li RongQing0c3584d2013-12-27 16:32:38 +08003391 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
Daniel Walterc3968a82011-04-13 21:10:57 +00003392}
3393
Duan Jiongbe7a0102014-05-15 15:56:14 +08003394#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
Duan Jiongbe7a0102014-05-15 15:56:14 +08003395
3396/* Remove routers and update dst entries when gateway turn into host. */
3397static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
3398{
3399 struct in6_addr *gateway = (struct in6_addr *)arg;
3400
Wei Wang2b760fc2017-10-06 12:06:03 -07003401 if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
3402 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
Duan Jiongbe7a0102014-05-15 15:56:14 +08003403 return -1;
3404 }
Wei Wangb16cb452017-10-06 12:06:00 -07003405
3406 /* Further clean up cached routes in exception table.
3407 * This is needed because cached route may have a different
3408 * gateway than its 'parent' in the case of an ip redirect.
3409 */
3410 rt6_exceptions_clean_tohost(rt, gateway);
3411
Duan Jiongbe7a0102014-05-15 15:56:14 +08003412 return 0;
3413}
3414
3415void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3416{
3417 fib6_clean_all(net, fib6_clean_tohost, gateway);
3418}
3419
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003420struct arg_dev_net {
3421 struct net_device *dev;
3422 struct net *net;
3423};
3424
David Aherna1a22c12017-01-18 07:40:36 -08003425/* called with write lock held for table with rt */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003426static int fib6_ifdown(struct rt6_info *rt, void *arg)
3427{
stephen hemmingerbc3ef662010-12-16 17:42:40 +00003428 const struct arg_dev_net *adn = arg;
3429 const struct net_device *dev = adn->dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003430
David S. Millerd1918542011-12-28 20:19:20 -05003431 if ((rt->dst.dev == dev || !dev) &&
David Aherna1a22c12017-01-18 07:40:36 -08003432 rt != adn->net->ipv6.ip6_null_entry &&
3433 (rt->rt6i_nsiblings == 0 ||
David Ahern8397ed32017-06-07 12:26:23 -06003434 (dev && netdev_unregistering(dev)) ||
David Aherna1a22c12017-01-18 07:40:36 -08003435 !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003436 return -1;
David S. Millerc159d302011-12-26 15:24:36 -05003437
Linus Torvalds1da177e2005-04-16 15:20:36 -07003438 return 0;
3439}
3440
Daniel Lezcanof3db4852008-03-03 23:27:06 -08003441void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003442{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003443 struct arg_dev_net adn = {
3444 .dev = dev,
3445 .net = net,
3446 };
3447
Li RongQing0c3584d2013-12-27 16:32:38 +08003448 fib6_clean_all(net, fib6_ifdown, &adn);
Eric W. Biedermane332bc62015-10-12 11:02:08 -05003449 if (dev)
3450 rt6_uncached_list_flush_dev(net, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003451}
3452
Eric Dumazet95c96172012-04-15 05:58:06 +00003453struct rt6_mtu_change_arg {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003454 struct net_device *dev;
Eric Dumazet95c96172012-04-15 05:58:06 +00003455 unsigned int mtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003456};
3457
3458static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
3459{
3460 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
3461 struct inet6_dev *idev;
3462
3463 /* In IPv6 pmtu discovery is not optional,
3464 so that RTAX_MTU lock cannot disable it.
3465 We still use this lock to block changes
3466 caused by addrconf/ndisc.
3467 */
3468
3469 idev = __in6_dev_get(arg->dev);
David S. Miller38308472011-12-03 18:02:47 -05003470 if (!idev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003471 return 0;
3472
3473 /* For administrative MTU increase, there is no way to discover
3474 IPv6 PMTU increase, so PMTU increase should be updated here.
3475 Since RFC 1981 doesn't include administrative MTU increase
3476 update PMTU increase is a MUST. (i.e. jumbo frame)
3477 */
3478 /*
3479 If new MTU is less than route PMTU, this new MTU will be the
3480 lowest MTU in the path, update the route PMTU to reflect PMTU
3481 decreases; if new MTU is greater than route PMTU, and the
3482 old MTU is the lowest MTU in the path, update the route PMTU
3483 to reflect the increase. In this case if the other nodes' MTU
3484 also have the lowest MTU, TOO BIG MESSAGE will be lead to
Alexander Alemayhu67c408c2017-01-07 23:53:00 +01003485 PMTU discovery.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003486 */
David S. Millerd1918542011-12-28 20:19:20 -05003487 if (rt->dst.dev == arg->dev &&
Maciej Żenczykowskifb56be82016-11-04 14:51:54 -07003488 dst_metric_raw(&rt->dst, RTAX_MTU) &&
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -07003489 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
Wei Wangf5bbe7e2017-10-06 12:05:59 -07003490 spin_lock_bh(&rt6_exception_lock);
Wei Wang2b760fc2017-10-06 12:06:03 -07003491 if (dst_mtu(&rt->dst) >= arg->mtu ||
3492 (dst_mtu(&rt->dst) < arg->mtu &&
3493 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -07003494 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
3495 }
Wei Wangf5bbe7e2017-10-06 12:05:59 -07003496 rt6_exceptions_update_pmtu(rt, arg->mtu);
3497 spin_unlock_bh(&rt6_exception_lock);
Simon Arlott566cfd82007-07-26 00:09:55 -07003498 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003499 return 0;
3500}
3501
Eric Dumazet95c96172012-04-15 05:58:06 +00003502void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003503{
Thomas Grafc71099a2006-08-04 23:20:06 -07003504 struct rt6_mtu_change_arg arg = {
3505 .dev = dev,
3506 .mtu = mtu,
3507 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07003508
Li RongQing0c3584d2013-12-27 16:32:38 +08003509 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003510}
3511
Patrick McHardyef7c79e2007-06-05 12:38:30 -07003512static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07003513 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07003514 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07003515 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07003516 [RTA_PRIORITY] = { .type = NLA_U32 },
3517 [RTA_METRICS] = { .type = NLA_NESTED },
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00003518 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
Lubomir Rintelc78ba6d2015-03-11 15:39:21 +01003519 [RTA_PREF] = { .type = NLA_U8 },
Roopa Prabhu19e42e42015-07-21 10:43:48 +02003520 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
3521 [RTA_ENCAP] = { .type = NLA_NESTED },
Xin Long32bc2012015-12-16 17:50:11 +08003522 [RTA_EXPIRES] = { .type = NLA_U32 },
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09003523 [RTA_UID] = { .type = NLA_U32 },
Liping Zhang3b45a412017-02-27 20:59:39 +08003524 [RTA_MARK] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07003525};
3526
3527static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
David Ahern333c4302017-05-21 10:12:04 -06003528 struct fib6_config *cfg,
3529 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003530{
Thomas Graf86872cb2006-08-22 00:01:08 -07003531 struct rtmsg *rtm;
3532 struct nlattr *tb[RTA_MAX+1];
Lubomir Rintelc78ba6d2015-03-11 15:39:21 +01003533 unsigned int pref;
Thomas Graf86872cb2006-08-22 00:01:08 -07003534 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003535
Johannes Bergfceb6432017-04-12 14:34:07 +02003536 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
3537 NULL);
Thomas Graf86872cb2006-08-22 00:01:08 -07003538 if (err < 0)
3539 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003540
Thomas Graf86872cb2006-08-22 00:01:08 -07003541 err = -EINVAL;
3542 rtm = nlmsg_data(nlh);
3543 memset(cfg, 0, sizeof(*cfg));
3544
3545 cfg->fc_table = rtm->rtm_table;
3546 cfg->fc_dst_len = rtm->rtm_dst_len;
3547 cfg->fc_src_len = rtm->rtm_src_len;
3548 cfg->fc_flags = RTF_UP;
3549 cfg->fc_protocol = rtm->rtm_protocol;
Nicolas Dichtelef2c7d72012-09-05 02:12:42 +00003550 cfg->fc_type = rtm->rtm_type;
Thomas Graf86872cb2006-08-22 00:01:08 -07003551
Nicolas Dichtelef2c7d72012-09-05 02:12:42 +00003552 if (rtm->rtm_type == RTN_UNREACHABLE ||
3553 rtm->rtm_type == RTN_BLACKHOLE ||
Nicolas Dichtelb4949ab2012-09-06 05:53:35 +00003554 rtm->rtm_type == RTN_PROHIBIT ||
3555 rtm->rtm_type == RTN_THROW)
Thomas Graf86872cb2006-08-22 00:01:08 -07003556 cfg->fc_flags |= RTF_REJECT;
3557
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00003558 if (rtm->rtm_type == RTN_LOCAL)
3559 cfg->fc_flags |= RTF_LOCAL;
3560
Martin KaFai Lau1f56a01f2015-04-28 13:03:03 -07003561 if (rtm->rtm_flags & RTM_F_CLONED)
3562 cfg->fc_flags |= RTF_CACHE;
3563
Eric W. Biederman15e47302012-09-07 20:12:54 +00003564 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
Thomas Graf86872cb2006-08-22 00:01:08 -07003565 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09003566 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07003567
3568 if (tb[RTA_GATEWAY]) {
Jiri Benc67b61f62015-03-29 16:59:26 +02003569 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
Thomas Graf86872cb2006-08-22 00:01:08 -07003570 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003571 }
Thomas Graf86872cb2006-08-22 00:01:08 -07003572
3573 if (tb[RTA_DST]) {
3574 int plen = (rtm->rtm_dst_len + 7) >> 3;
3575
3576 if (nla_len(tb[RTA_DST]) < plen)
3577 goto errout;
3578
3579 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003580 }
Thomas Graf86872cb2006-08-22 00:01:08 -07003581
3582 if (tb[RTA_SRC]) {
3583 int plen = (rtm->rtm_src_len + 7) >> 3;
3584
3585 if (nla_len(tb[RTA_SRC]) < plen)
3586 goto errout;
3587
3588 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003589 }
Thomas Graf86872cb2006-08-22 00:01:08 -07003590
Daniel Walterc3968a82011-04-13 21:10:57 +00003591 if (tb[RTA_PREFSRC])
Jiri Benc67b61f62015-03-29 16:59:26 +02003592 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
Daniel Walterc3968a82011-04-13 21:10:57 +00003593
Thomas Graf86872cb2006-08-22 00:01:08 -07003594 if (tb[RTA_OIF])
3595 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
3596
3597 if (tb[RTA_PRIORITY])
3598 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
3599
3600 if (tb[RTA_METRICS]) {
3601 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
3602 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003603 }
Thomas Graf86872cb2006-08-22 00:01:08 -07003604
3605 if (tb[RTA_TABLE])
3606 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
3607
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00003608 if (tb[RTA_MULTIPATH]) {
3609 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
3610 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
David Ahern9ed59592017-01-17 14:57:36 -08003611
3612 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
David Ahernc255bd62017-05-27 16:19:27 -06003613 cfg->fc_mp_len, extack);
David Ahern9ed59592017-01-17 14:57:36 -08003614 if (err < 0)
3615 goto errout;
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00003616 }
3617
Lubomir Rintelc78ba6d2015-03-11 15:39:21 +01003618 if (tb[RTA_PREF]) {
3619 pref = nla_get_u8(tb[RTA_PREF]);
3620 if (pref != ICMPV6_ROUTER_PREF_LOW &&
3621 pref != ICMPV6_ROUTER_PREF_HIGH)
3622 pref = ICMPV6_ROUTER_PREF_MEDIUM;
3623 cfg->fc_flags |= RTF_PREF(pref);
3624 }
3625
Roopa Prabhu19e42e42015-07-21 10:43:48 +02003626 if (tb[RTA_ENCAP])
3627 cfg->fc_encap = tb[RTA_ENCAP];
3628
David Ahern9ed59592017-01-17 14:57:36 -08003629 if (tb[RTA_ENCAP_TYPE]) {
Roopa Prabhu19e42e42015-07-21 10:43:48 +02003630 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
3631
David Ahernc255bd62017-05-27 16:19:27 -06003632 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
David Ahern9ed59592017-01-17 14:57:36 -08003633 if (err < 0)
3634 goto errout;
3635 }
3636
Xin Long32bc2012015-12-16 17:50:11 +08003637 if (tb[RTA_EXPIRES]) {
3638 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
3639
3640 if (addrconf_finite_timeout(timeout)) {
3641 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
3642 cfg->fc_flags |= RTF_EXPIRES;
3643 }
3644 }
3645
Thomas Graf86872cb2006-08-22 00:01:08 -07003646 err = 0;
3647errout:
3648 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003649}
3650
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003651struct rt6_nh {
3652 struct rt6_info *rt6_info;
3653 struct fib6_config r_cfg;
3654 struct mx6_config mxc;
3655 struct list_head next;
3656};
3657
3658static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
3659{
3660 struct rt6_nh *nh;
3661
3662 list_for_each_entry(nh, rt6_nh_list, next) {
David Ahern7d4d5062017-02-02 12:37:12 -08003663 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003664 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
3665 nh->r_cfg.fc_ifindex);
3666 }
3667}
3668
3669static int ip6_route_info_append(struct list_head *rt6_nh_list,
3670 struct rt6_info *rt, struct fib6_config *r_cfg)
3671{
3672 struct rt6_nh *nh;
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003673 int err = -EEXIST;
3674
3675 list_for_each_entry(nh, rt6_nh_list, next) {
3676 /* check if rt6_info already exists */
David Ahernf06b7542017-07-05 14:41:46 -06003677 if (rt6_duplicate_nexthop(nh->rt6_info, rt))
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003678 return err;
3679 }
3680
3681 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
3682 if (!nh)
3683 return -ENOMEM;
3684 nh->rt6_info = rt;
3685 err = ip6_convert_metrics(&nh->mxc, r_cfg);
3686 if (err) {
3687 kfree(nh);
3688 return err;
3689 }
3690 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
3691 list_add_tail(&nh->next, rt6_nh_list);
3692
3693 return 0;
3694}
3695
David Ahern3b1137f2017-02-02 12:37:10 -08003696static void ip6_route_mpath_notify(struct rt6_info *rt,
3697 struct rt6_info *rt_last,
3698 struct nl_info *info,
3699 __u16 nlflags)
3700{
3701 /* if this is an APPEND route, then rt points to the first route
3702 * inserted and rt_last points to last route inserted. Userspace
3703 * wants a consistent dump of the route which starts at the first
3704 * nexthop. Since sibling routes are always added at the end of
3705 * the list, find the first sibling of the last route appended
3706 */
3707 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
3708 rt = list_first_entry(&rt_last->rt6i_siblings,
3709 struct rt6_info,
3710 rt6i_siblings);
3711 }
3712
3713 if (rt)
3714 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
3715}
3716
David Ahern333c4302017-05-21 10:12:04 -06003717static int ip6_route_multipath_add(struct fib6_config *cfg,
3718 struct netlink_ext_ack *extack)
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00003719{
David Ahern3b1137f2017-02-02 12:37:10 -08003720 struct rt6_info *rt_notif = NULL, *rt_last = NULL;
3721 struct nl_info *info = &cfg->fc_nlinfo;
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00003722 struct fib6_config r_cfg;
3723 struct rtnexthop *rtnh;
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003724 struct rt6_info *rt;
3725 struct rt6_nh *err_nh;
3726 struct rt6_nh *nh, *nh_safe;
David Ahern3b1137f2017-02-02 12:37:10 -08003727 __u16 nlflags;
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00003728 int remaining;
3729 int attrlen;
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003730 int err = 1;
3731 int nhn = 0;
3732 int replace = (cfg->fc_nlinfo.nlh &&
3733 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
3734 LIST_HEAD(rt6_nh_list);
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00003735
David Ahern3b1137f2017-02-02 12:37:10 -08003736 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
3737 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
3738 nlflags |= NLM_F_APPEND;
3739
Michal Kubeček35f1b4e2015-05-18 20:53:55 +02003740 remaining = cfg->fc_mp_len;
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00003741 rtnh = (struct rtnexthop *)cfg->fc_mp;
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00003742
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003743 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
3744 * rt6_info structs per nexthop
3745 */
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00003746 while (rtnh_ok(rtnh, remaining)) {
3747 memcpy(&r_cfg, cfg, sizeof(*cfg));
3748 if (rtnh->rtnh_ifindex)
3749 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3750
3751 attrlen = rtnh_attrlen(rtnh);
3752 if (attrlen > 0) {
3753 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3754
3755 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3756 if (nla) {
Jiri Benc67b61f62015-03-29 16:59:26 +02003757 r_cfg.fc_gateway = nla_get_in6_addr(nla);
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00003758 r_cfg.fc_flags |= RTF_GATEWAY;
3759 }
Roopa Prabhu19e42e42015-07-21 10:43:48 +02003760 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
3761 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3762 if (nla)
3763 r_cfg.fc_encap_type = nla_get_u16(nla);
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00003764 }
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003765
David Ahern333c4302017-05-21 10:12:04 -06003766 rt = ip6_route_info_create(&r_cfg, extack);
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07003767 if (IS_ERR(rt)) {
3768 err = PTR_ERR(rt);
3769 rt = NULL;
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003770 goto cleanup;
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07003771 }
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003772
3773 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00003774 if (err) {
Wei Wang587fea72017-06-17 10:42:36 -07003775 dst_release_immediate(&rt->dst);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003776 goto cleanup;
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00003777 }
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003778
3779 rtnh = rtnh_next(rtnh, &remaining);
3780 }
3781
David Ahern3b1137f2017-02-02 12:37:10 -08003782 /* for add and replace send one notification with all nexthops.
3783 * Skip the notification in fib6_add_rt2node and send one with
3784 * the full route when done
3785 */
3786 info->skip_notify = 1;
3787
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003788 err_nh = NULL;
3789 list_for_each_entry(nh, &rt6_nh_list, next) {
David Ahern3b1137f2017-02-02 12:37:10 -08003790 rt_last = nh->rt6_info;
David Ahern333c4302017-05-21 10:12:04 -06003791 err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
David Ahern3b1137f2017-02-02 12:37:10 -08003792 /* save reference to first route for notification */
3793 if (!rt_notif && !err)
3794 rt_notif = nh->rt6_info;
3795
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003796 /* nh->rt6_info is used or freed at this point, reset to NULL*/
3797 nh->rt6_info = NULL;
3798 if (err) {
3799 if (replace && nhn)
3800 ip6_print_replace_route_err(&rt6_nh_list);
3801 err_nh = nh;
3802 goto add_errout;
3803 }
3804
Nicolas Dichtel1a724182012-11-01 22:58:22 +00003805 /* Because each route is added like a single route we remove
Michal Kubeček27596472015-05-18 20:54:00 +02003806 * these flags after the first nexthop: if there is a collision,
3807 * we have already failed to add the first nexthop:
3808 * fib6_add_rt2node() has rejected it; when replacing, old
3809 * nexthops have been replaced by first new, the rest should
3810 * be added to it.
Nicolas Dichtel1a724182012-11-01 22:58:22 +00003811 */
Michal Kubeček27596472015-05-18 20:54:00 +02003812 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3813 NLM_F_REPLACE);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003814 nhn++;
3815 }
3816
David Ahern3b1137f2017-02-02 12:37:10 -08003817 /* success ... tell user about new route */
3818 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003819 goto cleanup;
3820
3821add_errout:
David Ahern3b1137f2017-02-02 12:37:10 -08003822 /* send notification for routes that were added so that
3823 * the delete notifications sent by ip6_route_del are
3824 * coherent
3825 */
3826 if (rt_notif)
3827 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3828
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003829 /* Delete routes that were already added */
3830 list_for_each_entry(nh, &rt6_nh_list, next) {
3831 if (err_nh == nh)
3832 break;
David Ahern333c4302017-05-21 10:12:04 -06003833 ip6_route_del(&nh->r_cfg, extack);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003834 }
3835
3836cleanup:
3837 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
Wei Wang587fea72017-06-17 10:42:36 -07003838 if (nh->rt6_info)
3839 dst_release_immediate(&nh->rt6_info->dst);
Wu Fengguang52fe51f2015-09-10 06:57:12 +08003840 kfree(nh->mxc.mx);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003841 list_del(&nh->next);
3842 kfree(nh);
3843 }
3844
3845 return err;
3846}
3847
David Ahern333c4302017-05-21 10:12:04 -06003848static int ip6_route_multipath_del(struct fib6_config *cfg,
3849 struct netlink_ext_ack *extack)
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003850{
3851 struct fib6_config r_cfg;
3852 struct rtnexthop *rtnh;
3853 int remaining;
3854 int attrlen;
3855 int err = 1, last_err = 0;
3856
3857 remaining = cfg->fc_mp_len;
3858 rtnh = (struct rtnexthop *)cfg->fc_mp;
3859
3860 /* Parse a Multipath Entry */
3861 while (rtnh_ok(rtnh, remaining)) {
3862 memcpy(&r_cfg, cfg, sizeof(*cfg));
3863 if (rtnh->rtnh_ifindex)
3864 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3865
3866 attrlen = rtnh_attrlen(rtnh);
3867 if (attrlen > 0) {
3868 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3869
3870 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3871 if (nla) {
3872 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3873 r_cfg.fc_flags |= RTF_GATEWAY;
3874 }
3875 }
David Ahern333c4302017-05-21 10:12:04 -06003876 err = ip6_route_del(&r_cfg, extack);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003877 if (err)
3878 last_err = err;
3879
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00003880 rtnh = rtnh_next(rtnh, &remaining);
3881 }
3882
3883 return last_err;
3884}
3885
David Ahernc21ef3e2017-04-16 09:48:24 -07003886static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3887 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003888{
Thomas Graf86872cb2006-08-22 00:01:08 -07003889 struct fib6_config cfg;
3890 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003891
David Ahern333c4302017-05-21 10:12:04 -06003892 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
Thomas Graf86872cb2006-08-22 00:01:08 -07003893 if (err < 0)
3894 return err;
3895
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00003896 if (cfg.fc_mp)
David Ahern333c4302017-05-21 10:12:04 -06003897 return ip6_route_multipath_del(&cfg, extack);
David Ahern0ae81332017-02-02 12:37:08 -08003898 else {
3899 cfg.fc_delete_all_nh = 1;
David Ahern333c4302017-05-21 10:12:04 -06003900 return ip6_route_del(&cfg, extack);
David Ahern0ae81332017-02-02 12:37:08 -08003901 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003902}
3903
David Ahernc21ef3e2017-04-16 09:48:24 -07003904static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3905 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003906{
Thomas Graf86872cb2006-08-22 00:01:08 -07003907 struct fib6_config cfg;
3908 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003909
David Ahern333c4302017-05-21 10:12:04 -06003910 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
Thomas Graf86872cb2006-08-22 00:01:08 -07003911 if (err < 0)
3912 return err;
3913
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00003914 if (cfg.fc_mp)
David Ahern333c4302017-05-21 10:12:04 -06003915 return ip6_route_multipath_add(&cfg, extack);
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00003916 else
David Ahern333c4302017-05-21 10:12:04 -06003917 return ip6_route_add(&cfg, extack);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003918}
3919
David Ahernbeb1afac52017-02-02 12:37:09 -08003920static size_t rt6_nlmsg_size(struct rt6_info *rt)
Thomas Graf339bf982006-11-10 14:10:15 -08003921{
David Ahernbeb1afac52017-02-02 12:37:09 -08003922 int nexthop_len = 0;
3923
3924 if (rt->rt6i_nsiblings) {
3925 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
3926 + NLA_ALIGN(sizeof(struct rtnexthop))
3927 + nla_total_size(16) /* RTA_GATEWAY */
David Ahernbeb1afac52017-02-02 12:37:09 -08003928 + lwtunnel_get_encap_size(rt->dst.lwtstate);
3929
3930 nexthop_len *= rt->rt6i_nsiblings;
3931 }
3932
Thomas Graf339bf982006-11-10 14:10:15 -08003933 return NLMSG_ALIGN(sizeof(struct rtmsg))
3934 + nla_total_size(16) /* RTA_SRC */
3935 + nla_total_size(16) /* RTA_DST */
3936 + nla_total_size(16) /* RTA_GATEWAY */
3937 + nla_total_size(16) /* RTA_PREFSRC */
3938 + nla_total_size(4) /* RTA_TABLE */
3939 + nla_total_size(4) /* RTA_IIF */
3940 + nla_total_size(4) /* RTA_OIF */
3941 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08003942 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Daniel Borkmannea697632015-01-05 23:57:47 +01003943 + nla_total_size(sizeof(struct rta_cacheinfo))
Lubomir Rintelc78ba6d2015-03-11 15:39:21 +01003944 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
Roopa Prabhu19e42e42015-07-21 10:43:48 +02003945 + nla_total_size(1) /* RTA_PREF */
David Ahernbeb1afac52017-02-02 12:37:09 -08003946 + lwtunnel_get_encap_size(rt->dst.lwtstate)
3947 + nexthop_len;
3948}
3949
3950static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
David Ahern5be083c2017-03-06 15:57:31 -08003951 unsigned int *flags, bool skip_oif)
David Ahernbeb1afac52017-02-02 12:37:09 -08003952{
3953 if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
3954 *flags |= RTNH_F_LINKDOWN;
3955 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3956 *flags |= RTNH_F_DEAD;
3957 }
3958
3959 if (rt->rt6i_flags & RTF_GATEWAY) {
3960 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3961 goto nla_put_failure;
3962 }
3963
Ido Schimmelfe400792017-08-15 09:09:49 +02003964 if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
Ido Schimmel61e4d012017-08-03 13:28:20 +02003965 *flags |= RTNH_F_OFFLOAD;
3966
David Ahern5be083c2017-03-06 15:57:31 -08003967 /* not needed for multipath encoding b/c it has a rtnexthop struct */
3968 if (!skip_oif && rt->dst.dev &&
David Ahernbeb1afac52017-02-02 12:37:09 -08003969 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3970 goto nla_put_failure;
3971
3972 if (rt->dst.lwtstate &&
3973 lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3974 goto nla_put_failure;
3975
3976 return 0;
3977
3978nla_put_failure:
3979 return -EMSGSIZE;
3980}
3981
David Ahern5be083c2017-03-06 15:57:31 -08003982/* add multipath next hop */
David Ahernbeb1afac52017-02-02 12:37:09 -08003983static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
3984{
3985 struct rtnexthop *rtnh;
3986 unsigned int flags = 0;
3987
3988 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
3989 if (!rtnh)
3990 goto nla_put_failure;
3991
3992 rtnh->rtnh_hops = 0;
3993 rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
3994
David Ahern5be083c2017-03-06 15:57:31 -08003995 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
David Ahernbeb1afac52017-02-02 12:37:09 -08003996 goto nla_put_failure;
3997
3998 rtnh->rtnh_flags = flags;
3999
4000 /* length of rtnetlink header + attributes */
4001 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4002
4003 return 0;
4004
4005nla_put_failure:
4006 return -EMSGSIZE;
Thomas Graf339bf982006-11-10 14:10:15 -08004007}
4008
Brian Haley191cd582008-08-14 15:33:21 -07004009static int rt6_fill_node(struct net *net,
4010 struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07004011 struct in6_addr *dst, struct in6_addr *src,
Eric W. Biederman15e47302012-09-07 20:12:54 +00004012 int iif, int type, u32 portid, u32 seq,
David Ahernf8cfe2c2017-01-17 15:51:08 -08004013 unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004014{
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -07004015 u32 metrics[RTAX_MAX];
Linus Torvalds1da177e2005-04-16 15:20:36 -07004016 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07004017 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08004018 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07004019 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004020
Eric W. Biederman15e47302012-09-07 20:12:54 +00004021 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
David S. Miller38308472011-12-03 18:02:47 -05004022 if (!nlh)
Patrick McHardy26932562007-01-31 23:16:40 -08004023 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07004024
4025 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004026 rtm->rtm_family = AF_INET6;
4027 rtm->rtm_dst_len = rt->rt6i_dst.plen;
4028 rtm->rtm_src_len = rt->rt6i_src.plen;
4029 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07004030 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07004031 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07004032 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07004033 table = RT6_TABLE_UNSPEC;
4034 rtm->rtm_table = table;
David S. Millerc78679e2012-04-01 20:27:33 -04004035 if (nla_put_u32(skb, RTA_TABLE, table))
4036 goto nla_put_failure;
Nicolas Dichtelef2c7d72012-09-05 02:12:42 +00004037 if (rt->rt6i_flags & RTF_REJECT) {
4038 switch (rt->dst.error) {
4039 case -EINVAL:
4040 rtm->rtm_type = RTN_BLACKHOLE;
4041 break;
4042 case -EACCES:
4043 rtm->rtm_type = RTN_PROHIBIT;
4044 break;
Nicolas Dichtelb4949ab2012-09-06 05:53:35 +00004045 case -EAGAIN:
4046 rtm->rtm_type = RTN_THROW;
4047 break;
Nicolas Dichtelef2c7d72012-09-05 02:12:42 +00004048 default:
4049 rtm->rtm_type = RTN_UNREACHABLE;
4050 break;
4051 }
4052 }
David S. Miller38308472011-12-03 18:02:47 -05004053 else if (rt->rt6i_flags & RTF_LOCAL)
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00004054 rtm->rtm_type = RTN_LOCAL;
David Ahern4ee39732017-03-15 18:14:33 -07004055 else if (rt->rt6i_flags & RTF_ANYCAST)
4056 rtm->rtm_type = RTN_ANYCAST;
David S. Millerd1918542011-12-28 20:19:20 -05004057 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
Linus Torvalds1da177e2005-04-16 15:20:36 -07004058 rtm->rtm_type = RTN_LOCAL;
4059 else
4060 rtm->rtm_type = RTN_UNICAST;
4061 rtm->rtm_flags = 0;
4062 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4063 rtm->rtm_protocol = rt->rt6i_protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004064
David S. Miller38308472011-12-03 18:02:47 -05004065 if (rt->rt6i_flags & RTF_CACHE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004066 rtm->rtm_flags |= RTM_F_CLONED;
4067
4068 if (dst) {
Jiri Benc930345e2015-03-29 16:59:25 +02004069 if (nla_put_in6_addr(skb, RTA_DST, dst))
David S. Millerc78679e2012-04-01 20:27:33 -04004070 goto nla_put_failure;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09004071 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004072 } else if (rtm->rtm_dst_len)
Jiri Benc930345e2015-03-29 16:59:25 +02004073 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
David S. Millerc78679e2012-04-01 20:27:33 -04004074 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004075#ifdef CONFIG_IPV6_SUBTREES
4076 if (src) {
Jiri Benc930345e2015-03-29 16:59:25 +02004077 if (nla_put_in6_addr(skb, RTA_SRC, src))
David S. Millerc78679e2012-04-01 20:27:33 -04004078 goto nla_put_failure;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09004079 rtm->rtm_src_len = 128;
David S. Millerc78679e2012-04-01 20:27:33 -04004080 } else if (rtm->rtm_src_len &&
Jiri Benc930345e2015-03-29 16:59:25 +02004081 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
David S. Millerc78679e2012-04-01 20:27:33 -04004082 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004083#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09004084 if (iif) {
4085#ifdef CONFIG_IPV6_MROUTE
4086 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
David Ahernfd61c6b2017-01-17 15:51:07 -08004087 int err = ip6mr_get_route(net, skb, rtm, portid);
Nikolay Aleksandrov2cf75072016-09-25 23:08:31 +02004088
David Ahernfd61c6b2017-01-17 15:51:07 -08004089 if (err == 0)
4090 return 0;
4091 if (err < 0)
4092 goto nla_put_failure;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09004093 } else
4094#endif
David S. Millerc78679e2012-04-01 20:27:33 -04004095 if (nla_put_u32(skb, RTA_IIF, iif))
4096 goto nla_put_failure;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09004097 } else if (dst) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004098 struct in6_addr saddr_buf;
David S. Millerc78679e2012-04-01 20:27:33 -04004099 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
Jiri Benc930345e2015-03-29 16:59:25 +02004100 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
David S. Millerc78679e2012-04-01 20:27:33 -04004101 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004102 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07004103
Daniel Walterc3968a82011-04-13 21:10:57 +00004104 if (rt->rt6i_prefsrc.plen) {
4105 struct in6_addr saddr_buf;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00004106 saddr_buf = rt->rt6i_prefsrc.addr;
Jiri Benc930345e2015-03-29 16:59:25 +02004107 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
David S. Millerc78679e2012-04-01 20:27:33 -04004108 goto nla_put_failure;
Daniel Walterc3968a82011-04-13 21:10:57 +00004109 }
4110
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -07004111 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
4112 if (rt->rt6i_pmtu)
4113 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
4114 if (rtnetlink_put_metrics(skb, metrics) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07004115 goto nla_put_failure;
4116
David S. Millerc78679e2012-04-01 20:27:33 -04004117 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
4118 goto nla_put_failure;
Li Wei82539472012-07-29 16:01:30 +00004119
David Ahernbeb1afac52017-02-02 12:37:09 -08004120 /* For multipath routes, walk the siblings list and add
4121 * each as a nexthop within RTA_MULTIPATH.
4122 */
4123 if (rt->rt6i_nsiblings) {
4124 struct rt6_info *sibling, *next_sibling;
4125 struct nlattr *mp;
4126
4127 mp = nla_nest_start(skb, RTA_MULTIPATH);
4128 if (!mp)
4129 goto nla_put_failure;
4130
4131 if (rt6_add_nexthop(skb, rt) < 0)
4132 goto nla_put_failure;
4133
4134 list_for_each_entry_safe(sibling, next_sibling,
4135 &rt->rt6i_siblings, rt6i_siblings) {
4136 if (rt6_add_nexthop(skb, sibling) < 0)
4137 goto nla_put_failure;
4138 }
4139
4140 nla_nest_end(skb, mp);
4141 } else {
David Ahern5be083c2017-03-06 15:57:31 -08004142 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
David Ahernbeb1afac52017-02-02 12:37:09 -08004143 goto nla_put_failure;
4144 }
4145
Li Wei82539472012-07-29 16:01:30 +00004146 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07004147
David S. Miller87a50692012-07-10 05:06:14 -07004148 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08004149 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004150
Lubomir Rintelc78ba6d2015-03-11 15:39:21 +01004151 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
4152 goto nla_put_failure;
4153
Roopa Prabhu19e42e42015-07-21 10:43:48 +02004154
Johannes Berg053c0952015-01-16 22:09:00 +01004155 nlmsg_end(skb, nlh);
4156 return 0;
Thomas Graf2d7202b2006-08-22 00:01:27 -07004157
4158nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08004159 nlmsg_cancel(skb, nlh);
4160 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004161}
4162
Patrick McHardy1b43af52006-08-10 23:11:17 -07004163int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004164{
4165 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
David Ahern1f17e2f2017-01-26 13:54:08 -08004166 struct net *net = arg->net;
4167
4168 if (rt == net->ipv6.ip6_null_entry)
4169 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004170
Thomas Graf2d7202b2006-08-22 00:01:27 -07004171 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4172 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
David Ahernf8cfe2c2017-01-17 15:51:08 -08004173
4174 /* user wants prefix routes only */
4175 if (rtm->rtm_flags & RTM_F_PREFIX &&
4176 !(rt->rt6i_flags & RTF_PREFIX_RT)) {
4177 /* success since this is not a prefix route */
4178 return 1;
4179 }
4180 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004181
David Ahern1f17e2f2017-01-26 13:54:08 -08004182 return rt6_fill_node(net,
Brian Haley191cd582008-08-14 15:33:21 -07004183 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
Eric W. Biederman15e47302012-09-07 20:12:54 +00004184 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
David Ahernf8cfe2c2017-01-17 15:51:08 -08004185 NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004186}
4187
David Ahernc21ef3e2017-04-16 09:48:24 -07004188static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4189 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004190{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09004191 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07004192 struct nlattr *tb[RTA_MAX+1];
Roopa Prabhu18c3a612017-05-25 10:42:40 -07004193 int err, iif = 0, oif = 0;
4194 struct dst_entry *dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004195 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07004196 struct sk_buff *skb;
4197 struct rtmsg *rtm;
David S. Miller4c9483b2011-03-12 16:22:43 -05004198 struct flowi6 fl6;
Roopa Prabhu18c3a612017-05-25 10:42:40 -07004199 bool fibmatch;
Thomas Grafab364a62006-08-22 00:01:47 -07004200
Johannes Bergfceb6432017-04-12 14:34:07 +02004201 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
David Ahernc21ef3e2017-04-16 09:48:24 -07004202 extack);
Thomas Grafab364a62006-08-22 00:01:47 -07004203 if (err < 0)
4204 goto errout;
4205
4206 err = -EINVAL;
David S. Miller4c9483b2011-03-12 16:22:43 -05004207 memset(&fl6, 0, sizeof(fl6));
Hannes Frederic Sowa38b70972016-06-11 20:08:19 +02004208 rtm = nlmsg_data(nlh);
4209 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
Roopa Prabhu18c3a612017-05-25 10:42:40 -07004210 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
Thomas Grafab364a62006-08-22 00:01:47 -07004211
4212 if (tb[RTA_SRC]) {
4213 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4214 goto errout;
4215
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00004216 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
Thomas Grafab364a62006-08-22 00:01:47 -07004217 }
4218
4219 if (tb[RTA_DST]) {
4220 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4221 goto errout;
4222
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00004223 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
Thomas Grafab364a62006-08-22 00:01:47 -07004224 }
4225
4226 if (tb[RTA_IIF])
4227 iif = nla_get_u32(tb[RTA_IIF]);
4228
4229 if (tb[RTA_OIF])
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00004230 oif = nla_get_u32(tb[RTA_OIF]);
Thomas Grafab364a62006-08-22 00:01:47 -07004231
Lorenzo Colitti2e47b292014-05-15 16:38:41 -07004232 if (tb[RTA_MARK])
4233 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4234
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09004235 if (tb[RTA_UID])
4236 fl6.flowi6_uid = make_kuid(current_user_ns(),
4237 nla_get_u32(tb[RTA_UID]));
4238 else
4239 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4240
Thomas Grafab364a62006-08-22 00:01:47 -07004241 if (iif) {
4242 struct net_device *dev;
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00004243 int flags = 0;
4244
Florian Westphal121622d2017-08-15 16:34:42 +02004245 rcu_read_lock();
4246
4247 dev = dev_get_by_index_rcu(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07004248 if (!dev) {
Florian Westphal121622d2017-08-15 16:34:42 +02004249 rcu_read_unlock();
Thomas Grafab364a62006-08-22 00:01:47 -07004250 err = -ENODEV;
4251 goto errout;
4252 }
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00004253
4254 fl6.flowi6_iif = iif;
4255
4256 if (!ipv6_addr_any(&fl6.saddr))
4257 flags |= RT6_LOOKUP_F_HAS_SADDR;
4258
Roopa Prabhu18c3a612017-05-25 10:42:40 -07004259 if (!fibmatch)
4260 dst = ip6_route_input_lookup(net, dev, &fl6, flags);
Arnd Bergmann401481e2017-08-18 13:34:22 +02004261 else
4262 dst = ip6_route_lookup(net, &fl6, 0);
Florian Westphal121622d2017-08-15 16:34:42 +02004263
4264 rcu_read_unlock();
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00004265 } else {
4266 fl6.flowi6_oif = oif;
4267
Roopa Prabhu18c3a612017-05-25 10:42:40 -07004268 if (!fibmatch)
4269 dst = ip6_route_output(net, NULL, &fl6);
Arnd Bergmann401481e2017-08-18 13:34:22 +02004270 else
4271 dst = ip6_route_lookup(net, &fl6, 0);
Roopa Prabhu18c3a612017-05-25 10:42:40 -07004272 }
4273
Roopa Prabhu18c3a612017-05-25 10:42:40 -07004274
4275 rt = container_of(dst, struct rt6_info, dst);
4276 if (rt->dst.error) {
4277 err = rt->dst.error;
4278 ip6_rt_put(rt);
4279 goto errout;
Thomas Grafab364a62006-08-22 00:01:47 -07004280 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004281
WANG Cong9d6acb32017-03-01 20:48:39 -08004282 if (rt == net->ipv6.ip6_null_entry) {
4283 err = rt->dst.error;
4284 ip6_rt_put(rt);
4285 goto errout;
4286 }
4287
Linus Torvalds1da177e2005-04-16 15:20:36 -07004288 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
David S. Miller38308472011-12-03 18:02:47 -05004289 if (!skb) {
Amerigo Wang94e187c2012-10-29 00:13:19 +00004290 ip6_rt_put(rt);
Thomas Grafab364a62006-08-22 00:01:47 -07004291 err = -ENOBUFS;
4292 goto errout;
4293 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004294
Changli Gaod8d1f302010-06-10 23:31:35 -07004295 skb_dst_set(skb, &rt->dst);
Roopa Prabhu18c3a612017-05-25 10:42:40 -07004296 if (fibmatch)
4297 err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
4298 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4299 nlh->nlmsg_seq, 0);
4300 else
4301 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
4302 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4303 nlh->nlmsg_seq, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004304 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07004305 kfree_skb(skb);
4306 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004307 }
4308
Eric W. Biederman15e47302012-09-07 20:12:54 +00004309 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
Thomas Grafab364a62006-08-22 00:01:47 -07004310errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07004311 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004312}
4313
Roopa Prabhu37a1d362015-09-13 10:18:33 -07004314void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
4315 unsigned int nlm_flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004316{
4317 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08004318 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08004319 u32 seq;
4320 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004321
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08004322 err = -ENOBUFS;
David S. Miller38308472011-12-03 18:02:47 -05004323 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07004324
Roopa Prabhu19e42e42015-07-21 10:43:48 +02004325 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
David S. Miller38308472011-12-03 18:02:47 -05004326 if (!skb)
Thomas Graf21713eb2006-08-15 00:35:24 -07004327 goto errout;
4328
Brian Haley191cd582008-08-14 15:33:21 -07004329 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
David Ahernf8cfe2c2017-01-17 15:51:08 -08004330 event, info->portid, seq, nlm_flags);
Patrick McHardy26932562007-01-31 23:16:40 -08004331 if (err < 0) {
4332 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4333 WARN_ON(err == -EMSGSIZE);
4334 kfree_skb(skb);
4335 goto errout;
4336 }
Eric W. Biederman15e47302012-09-07 20:12:54 +00004337 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
Pablo Neira Ayuso1ce85fe2009-02-24 23:18:28 -08004338 info->nlh, gfp_any());
4339 return;
Thomas Graf21713eb2006-08-15 00:35:24 -07004340errout:
4341 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08004342 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004343}
4344
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004345static int ip6_route_dev_notify(struct notifier_block *this,
Jiri Pirko351638e2013-05-28 01:30:21 +00004346 unsigned long event, void *ptr)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004347{
Jiri Pirko351638e2013-05-28 01:30:21 +00004348 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09004349 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004350
WANG Cong242d3a42017-05-08 10:12:13 -07004351 if (!(dev->flags & IFF_LOOPBACK))
4352 return NOTIFY_OK;
4353
4354 if (event == NETDEV_REGISTER) {
Changli Gaod8d1f302010-06-10 23:31:35 -07004355 net->ipv6.ip6_null_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004356 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
4357#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07004358 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004359 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07004360 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004361 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
4362#endif
WANG Cong76da0702017-06-20 11:42:27 -07004363 } else if (event == NETDEV_UNREGISTER &&
4364 dev->reg_state != NETREG_UNREGISTERED) {
4365 /* NETDEV_UNREGISTER could be fired for multiple times by
4366 * netdev_wait_allrefs(). Make sure we only call this once.
4367 */
Eric Dumazet12d94a82017-08-15 04:09:51 -07004368 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
WANG Cong242d3a42017-05-08 10:12:13 -07004369#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Eric Dumazet12d94a82017-08-15 04:09:51 -07004370 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
4371 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
WANG Cong242d3a42017-05-08 10:12:13 -07004372#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004373 }
4374
4375 return NOTIFY_OK;
4376}
4377
Linus Torvalds1da177e2005-04-16 15:20:36 -07004378/*
4379 * /proc
4380 */
4381
4382#ifdef CONFIG_PROC_FS
4383
Alexey Dobriyan33120b32007-11-06 05:27:11 -08004384static const struct file_operations ipv6_route_proc_fops = {
4385 .owner = THIS_MODULE,
4386 .open = ipv6_route_open,
4387 .read = seq_read,
4388 .llseek = seq_lseek,
Hannes Frederic Sowa8d2ca1d2013-09-21 16:55:59 +02004389 .release = seq_release_net,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08004390};
4391
Linus Torvalds1da177e2005-04-16 15:20:36 -07004392static int rt6_stats_seq_show(struct seq_file *seq, void *v)
4393{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08004394 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004395 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08004396 net->ipv6.rt6_stats->fib_nodes,
4397 net->ipv6.rt6_stats->fib_route_nodes,
4398 net->ipv6.rt6_stats->fib_rt_alloc,
4399 net->ipv6.rt6_stats->fib_rt_entries,
4400 net->ipv6.rt6_stats->fib_rt_cache,
Eric Dumazetfc66f952010-10-08 06:37:34 +00004401 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08004402 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004403
4404 return 0;
4405}
4406
4407static int rt6_stats_seq_open(struct inode *inode, struct file *file)
4408{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07004409 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08004410}
4411
Arjan van de Ven9a321442007-02-12 00:55:35 -08004412static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004413 .owner = THIS_MODULE,
4414 .open = rt6_stats_seq_open,
4415 .read = seq_read,
4416 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07004417 .release = single_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004418};
4419#endif /* CONFIG_PROC_FS */
4420
4421#ifdef CONFIG_SYSCTL
4422
Linus Torvalds1da177e2005-04-16 15:20:36 -07004423static
Joe Perchesfe2c6332013-06-11 23:04:25 -07004424int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004425 void __user *buffer, size_t *lenp, loff_t *ppos)
4426{
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00004427 struct net *net;
4428 int delay;
4429 if (!write)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004430 return -EINVAL;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00004431
4432 net = (struct net *)ctl->extra1;
4433 delay = net->ipv6.sysctl.flush_delay;
4434 proc_dointvec(ctl, write, buffer, lenp, ppos);
Michal Kubeček2ac3ac82013-08-01 10:04:14 +02004435 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00004436 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004437}
4438
Joe Perchesfe2c6332013-06-11 23:04:25 -07004439struct ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09004440 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004441 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08004442 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004443 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07004444 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08004445 .proc_handler = ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07004446 },
4447 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004448 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08004449 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004450 .maxlen = sizeof(int),
4451 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08004452 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004453 },
4454 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004455 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08004456 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004457 .maxlen = sizeof(int),
4458 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08004459 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004460 },
4461 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004462 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08004463 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004464 .maxlen = sizeof(int),
4465 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08004466 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004467 },
4468 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004469 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08004470 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004471 .maxlen = sizeof(int),
4472 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08004473 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004474 },
4475 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004476 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08004477 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004478 .maxlen = sizeof(int),
4479 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08004480 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004481 },
4482 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004483 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08004484 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004485 .maxlen = sizeof(int),
4486 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07004487 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004488 },
4489 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004490 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08004491 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004492 .maxlen = sizeof(int),
4493 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08004494 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004495 },
4496 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004497 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08004498 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004499 .maxlen = sizeof(int),
4500 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07004501 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004502 },
4503 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004504 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08004505 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004506 .maxlen = sizeof(int),
4507 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08004508 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004509 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08004510 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004511};
4512
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00004513struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
Daniel Lezcano760f2d02008-01-10 02:53:43 -08004514{
4515 struct ctl_table *table;
4516
4517 table = kmemdup(ipv6_route_table_template,
4518 sizeof(ipv6_route_table_template),
4519 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09004520
4521 if (table) {
4522 table[0].data = &net->ipv6.sysctl.flush_delay;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00004523 table[0].extra1 = net;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00004524 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09004525 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
4526 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
4527 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
4528 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
4529 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
4530 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
4531 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
Alexey Dobriyan9c69fab2009-12-18 20:11:03 -08004532 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
Eric W. Biederman464dc802012-11-16 03:02:59 +00004533
4534 /* Don't export sysctls to unprivileged users */
4535 if (net->user_ns != &init_user_ns)
4536 table[0].procname = NULL;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09004537 }
4538
Daniel Lezcano760f2d02008-01-10 02:53:43 -08004539 return table;
4540}
Linus Torvalds1da177e2005-04-16 15:20:36 -07004541#endif
4542
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00004543static int __net_init ip6_route_net_init(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08004544{
Pavel Emelyanov633d424b2008-04-21 14:25:23 -07004545 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004546
Alexey Dobriyan86393e52009-08-29 01:34:49 +00004547 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
4548 sizeof(net->ipv6.ip6_dst_ops));
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08004549
Eric Dumazetfc66f952010-10-08 06:37:34 +00004550 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
4551 goto out_ip6_dst_ops;
4552
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004553 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
4554 sizeof(*net->ipv6.ip6_null_entry),
4555 GFP_KERNEL);
4556 if (!net->ipv6.ip6_null_entry)
Eric Dumazetfc66f952010-10-08 06:37:34 +00004557 goto out_ip6_dst_entries;
Changli Gaod8d1f302010-06-10 23:31:35 -07004558 net->ipv6.ip6_null_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004559 (struct dst_entry *)net->ipv6.ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07004560 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08004561 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
4562 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004563
4564#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Vincent Bernatfeca7d82017-08-08 20:23:49 +02004565 net->ipv6.fib6_has_custom_rules = false;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004566 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
4567 sizeof(*net->ipv6.ip6_prohibit_entry),
4568 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07004569 if (!net->ipv6.ip6_prohibit_entry)
4570 goto out_ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07004571 net->ipv6.ip6_prohibit_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004572 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07004573 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08004574 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
4575 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004576
4577 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
4578 sizeof(*net->ipv6.ip6_blk_hole_entry),
4579 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07004580 if (!net->ipv6.ip6_blk_hole_entry)
4581 goto out_ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07004582 net->ipv6.ip6_blk_hole_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004583 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07004584 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08004585 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
4586 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004587#endif
4588
Peter Zijlstrab339a47c2008-10-07 14:15:00 -07004589 net->ipv6.sysctl.flush_delay = 0;
4590 net->ipv6.sysctl.ip6_rt_max_size = 4096;
4591 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
4592 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
4593 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
4594 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
4595 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
4596 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
4597
Benjamin Thery6891a342008-03-04 13:49:47 -08004598 net->ipv6.ip6_rt_gc_expire = 30*HZ;
4599
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004600 ret = 0;
4601out:
4602 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08004603
Peter Zijlstra68fffc62008-10-07 14:12:10 -07004604#ifdef CONFIG_IPV6_MULTIPLE_TABLES
4605out_ip6_prohibit_entry:
4606 kfree(net->ipv6.ip6_prohibit_entry);
4607out_ip6_null_entry:
4608 kfree(net->ipv6.ip6_null_entry);
4609#endif
Eric Dumazetfc66f952010-10-08 06:37:34 +00004610out_ip6_dst_entries:
4611 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08004612out_ip6_dst_ops:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08004613 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08004614}
4615
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00004616static void __net_exit ip6_route_net_exit(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08004617{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004618 kfree(net->ipv6.ip6_null_entry);
4619#ifdef CONFIG_IPV6_MULTIPLE_TABLES
4620 kfree(net->ipv6.ip6_prohibit_entry);
4621 kfree(net->ipv6.ip6_blk_hole_entry);
4622#endif
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00004623 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08004624}
4625
Thomas Grafd1896342012-06-18 12:08:33 +00004626static int __net_init ip6_route_net_init_late(struct net *net)
4627{
4628#ifdef CONFIG_PROC_FS
Gao fengd4beaa62013-02-18 01:34:54 +00004629 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
4630 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
Thomas Grafd1896342012-06-18 12:08:33 +00004631#endif
4632 return 0;
4633}
4634
4635static void __net_exit ip6_route_net_exit_late(struct net *net)
4636{
4637#ifdef CONFIG_PROC_FS
Gao fengece31ff2013-02-18 01:34:56 +00004638 remove_proc_entry("ipv6_route", net->proc_net);
4639 remove_proc_entry("rt6_stats", net->proc_net);
Thomas Grafd1896342012-06-18 12:08:33 +00004640#endif
4641}
4642
Daniel Lezcanocdb18762008-03-04 13:45:33 -08004643static struct pernet_operations ip6_route_net_ops = {
4644 .init = ip6_route_net_init,
4645 .exit = ip6_route_net_exit,
4646};
4647
David S. Millerc3426b42012-06-09 16:27:05 -07004648static int __net_init ipv6_inetpeer_init(struct net *net)
4649{
4650 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
4651
4652 if (!bp)
4653 return -ENOMEM;
4654 inet_peer_base_init(bp);
4655 net->ipv6.peers = bp;
4656 return 0;
4657}
4658
4659static void __net_exit ipv6_inetpeer_exit(struct net *net)
4660{
4661 struct inet_peer_base *bp = net->ipv6.peers;
4662
4663 net->ipv6.peers = NULL;
David S. Miller56a6b242012-06-09 16:32:41 -07004664 inetpeer_invalidate_tree(bp);
David S. Millerc3426b42012-06-09 16:27:05 -07004665 kfree(bp);
4666}
4667
David S. Miller2b823f72012-06-09 19:00:16 -07004668static struct pernet_operations ipv6_inetpeer_ops = {
David S. Millerc3426b42012-06-09 16:27:05 -07004669 .init = ipv6_inetpeer_init,
4670 .exit = ipv6_inetpeer_exit,
4671};
4672
Thomas Grafd1896342012-06-18 12:08:33 +00004673static struct pernet_operations ip6_route_net_late_ops = {
4674 .init = ip6_route_net_init_late,
4675 .exit = ip6_route_net_exit_late,
4676};
4677
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004678static struct notifier_block ip6_route_dev_notifier = {
4679 .notifier_call = ip6_route_dev_notify,
WANG Cong242d3a42017-05-08 10:12:13 -07004680 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004681};
4682
WANG Cong2f460932017-05-03 22:07:31 -07004683void __init ip6_route_init_special_entries(void)
4684{
4685 /* Registering of the loopback is done before this portion of code,
4686 * the loopback reference in rt6_info will not be taken, do it
4687 * manually for init_net */
4688 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
4689 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4690 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4691 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
4692 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4693 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
4694 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4695 #endif
4696}
4697
Daniel Lezcano433d49c2007-12-07 00:43:48 -08004698int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004699{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08004700 int ret;
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -07004701 int cpu;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08004702
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08004703 ret = -ENOMEM;
4704 ip6_dst_ops_template.kmem_cachep =
4705 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
4706 SLAB_HWCACHE_ALIGN, NULL);
4707 if (!ip6_dst_ops_template.kmem_cachep)
Fernando Carrijoc19a28e2009-01-07 18:09:08 -08004708 goto out;
David S. Miller14e50e52007-05-24 18:17:54 -07004709
Eric Dumazetfc66f952010-10-08 06:37:34 +00004710 ret = dst_entries_init(&ip6_dst_blackhole_ops);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004711 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08004712 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08004713
David S. Millerc3426b42012-06-09 16:27:05 -07004714 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
4715 if (ret)
David S. Millere8803b62012-06-16 01:12:19 -07004716 goto out_dst_entries;
Thomas Graf2a0c4512012-06-14 23:00:17 +00004717
David S. Miller7e52b332012-06-15 15:51:55 -07004718 ret = register_pernet_subsys(&ip6_route_net_ops);
4719 if (ret)
4720 goto out_register_inetpeer;
David S. Millerc3426b42012-06-09 16:27:05 -07004721
Arnaud Ebalard5dc121e2008-10-01 02:37:56 -07004722 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
4723
David S. Millere8803b62012-06-16 01:12:19 -07004724 ret = fib6_init();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08004725 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004726 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08004727
Daniel Lezcano433d49c2007-12-07 00:43:48 -08004728 ret = xfrm6_init();
4729 if (ret)
David S. Millere8803b62012-06-16 01:12:19 -07004730 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08004731
Daniel Lezcano433d49c2007-12-07 00:43:48 -08004732 ret = fib6_rules_init();
4733 if (ret)
4734 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08004735
Thomas Grafd1896342012-06-18 12:08:33 +00004736 ret = register_pernet_subsys(&ip6_route_net_late_ops);
4737 if (ret)
4738 goto fib6_rules_init;
4739
Daniel Lezcano433d49c2007-12-07 00:43:48 -08004740 ret = -ENOBUFS;
Florian Westphalb97bac62017-08-09 20:41:48 +02004741 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) ||
4742 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) ||
Florian Westphale3a22b72017-08-15 16:34:43 +02004743 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL,
4744 RTNL_FLAG_DOIT_UNLOCKED))
Thomas Grafd1896342012-06-18 12:08:33 +00004745 goto out_register_late_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08004746
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004747 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08004748 if (ret)
Thomas Grafd1896342012-06-18 12:08:33 +00004749 goto out_register_late_subsys;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004750
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -07004751 for_each_possible_cpu(cpu) {
4752 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
4753
4754 INIT_LIST_HEAD(&ul->head);
4755 spin_lock_init(&ul->lock);
4756 }
4757
Daniel Lezcano433d49c2007-12-07 00:43:48 -08004758out:
4759 return ret;
4760
Thomas Grafd1896342012-06-18 12:08:33 +00004761out_register_late_subsys:
4762 unregister_pernet_subsys(&ip6_route_net_late_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08004763fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08004764 fib6_rules_cleanup();
4765xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08004766 xfrm6_fini();
Thomas Graf2a0c4512012-06-14 23:00:17 +00004767out_fib6_init:
4768 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004769out_register_subsys:
4770 unregister_pernet_subsys(&ip6_route_net_ops);
David S. Miller7e52b332012-06-15 15:51:55 -07004771out_register_inetpeer:
4772 unregister_pernet_subsys(&ipv6_inetpeer_ops);
Eric Dumazetfc66f952010-10-08 06:37:34 +00004773out_dst_entries:
4774 dst_entries_destroy(&ip6_dst_blackhole_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08004775out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08004776 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08004777 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004778}
4779
4780void ip6_route_cleanup(void)
4781{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004782 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Grafd1896342012-06-18 12:08:33 +00004783 unregister_pernet_subsys(&ip6_route_net_late_ops);
Thomas Graf101367c2006-08-04 03:39:02 -07004784 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004785 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004786 fib6_gc_cleanup();
David S. Millerc3426b42012-06-09 16:27:05 -07004787 unregister_pernet_subsys(&ipv6_inetpeer_ops);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004788 unregister_pernet_subsys(&ip6_route_net_ops);
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00004789 dst_entries_destroy(&ip6_dst_blackhole_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08004790 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004791}