blob: 6cb7cff22db9ca9a08d00fb2ccab732c145b4e9c [file] [log] [blame]
Thomas Gleixner2874c5f2019-05-27 08:55:01 +02001// SPDX-License-Identifier: GPL-2.0-or-later
Linus Torvalds1da177e2005-04-16 15:20:36 -07002/*
3 * INET An implementation of the TCP/IP protocol suite for the LINUX
4 * operating system. INET is implemented using the BSD Socket
5 * interface as the means of communication with the user level.
6 *
7 * ROUTE - implementation of the IP router.
8 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07009 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -070010 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
11 * Alan Cox, <gw4pts@gw4pts.ampr.org>
12 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
13 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
14 *
15 * Fixes:
16 * Alan Cox : Verify area fixes.
17 * Alan Cox : cli() protects routing changes
18 * Rui Oliveira : ICMP routing table updates
19 * (rco@di.uminho.pt) Routing table insertion and update
20 * Linus Torvalds : Rewrote bits to be sensible
21 * Alan Cox : Added BSD route gw semantics
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090022 * Alan Cox : Super /proc >4K
Linus Torvalds1da177e2005-04-16 15:20:36 -070023 * Alan Cox : MTU in route table
24 * Alan Cox : MSS actually. Also added the window
25 * clamper.
26 * Sam Lantinga : Fixed route matching in rt_del()
27 * Alan Cox : Routing cache support.
28 * Alan Cox : Removed compatibility cruft.
29 * Alan Cox : RTF_REJECT support.
30 * Alan Cox : TCP irtt support.
31 * Jonathan Naylor : Added Metric support.
32 * Miquel van Smoorenburg : BSD API fixes.
33 * Miquel van Smoorenburg : Metrics.
34 * Alan Cox : Use __u32 properly
35 * Alan Cox : Aligned routing errors more closely with BSD
36 * our system is still very different.
37 * Alan Cox : Faster /proc handling
38 * Alexey Kuznetsov : Massive rework to support tree based routing,
39 * routing caches and better behaviour.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090040 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070041 * Olaf Erb : irtt wasn't being copied right.
42 * Bjorn Ekwall : Kerneld route support.
43 * Alan Cox : Multicast fixed (I hope)
44 * Pavel Krauz : Limited broadcast fixed
45 * Mike McLagan : Routing by source
46 * Alexey Kuznetsov : End of old history. Split to fib.c and
47 * route.c and rewritten from scratch.
48 * Andi Kleen : Load-limit warning messages.
49 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
50 * Vitaly E. Lavrov : Race condition in ip_route_input_slow.
51 * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow.
52 * Vladimir V. Ivanov : IP rule info (flowid) is really useful.
53 * Marc Boucher : routing by fwmark
54 * Robert Olsson : Added rt_cache statistics
55 * Arnaldo C. Melo : Convert proc stuff to seq_file
Eric Dumazetbb1d23b2005-07-05 15:00:32 -070056 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
Ilia Sotnikovcef26852006-03-25 01:38:55 -080057 * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect
58 * Ilia Sotnikov : Removed TOS from hash calculations
Linus Torvalds1da177e2005-04-16 15:20:36 -070059 */
60
Joe Perchesafd465032012-03-12 07:03:32 +000061#define pr_fmt(fmt) "IPv4: " fmt
62
Linus Torvalds1da177e2005-04-16 15:20:36 -070063#include <linux/module.h>
Linus Torvalds7c0f6ba2016-12-24 11:46:01 -080064#include <linux/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070065#include <linux/bitops.h>
66#include <linux/types.h>
67#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070068#include <linux/mm.h>
69#include <linux/string.h>
70#include <linux/socket.h>
71#include <linux/sockios.h>
72#include <linux/errno.h>
73#include <linux/in.h>
74#include <linux/inet.h>
75#include <linux/netdevice.h>
76#include <linux/proc_fs.h>
77#include <linux/init.h>
78#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070079#include <linux/inetdevice.h>
80#include <linux/igmp.h>
81#include <linux/pkt_sched.h>
82#include <linux/mroute.h>
83#include <linux/netfilter_ipv4.h>
84#include <linux/random.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070085#include <linux/rcupdate.h>
86#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090087#include <linux/slab.h>
Eric Dumazet73f156a2014-06-02 05:26:03 -070088#include <linux/jhash.h>
Herbert Xu352e5122007-11-13 21:34:06 -080089#include <net/dst.h>
Thomas Graf1b7179d2015-07-21 10:43:59 +020090#include <net/dst_metadata.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020091#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070092#include <net/protocol.h>
93#include <net/ip.h>
94#include <net/route.h>
95#include <net/inetpeer.h>
96#include <net/sock.h>
97#include <net/ip_fib.h>
98#include <net/arp.h>
99#include <net/tcp.h>
100#include <net/icmp.h>
101#include <net/xfrm.h>
Roopa Prabhu571e7222015-07-21 10:43:47 +0200102#include <net/lwtunnel.h>
Tom Tucker8d717402006-07-30 20:43:36 -0700103#include <net/netevent.h>
Thomas Graf63f34442007-03-22 11:55:17 -0700104#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105#ifdef CONFIG_SYSCTL
106#include <linux/sysctl.h>
107#endif
David S. Miller6e5714e2011-08-03 20:50:44 -0700108#include <net/secure_seq.h>
Thomas Graf1b7179d2015-07-21 10:43:59 +0200109#include <net/ip_tunnels.h>
David Ahern385add92015-09-29 20:07:13 -0700110#include <net/l3mdev.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111
Roopa Prabhub6179812017-05-25 10:42:39 -0700112#include "fib_lookup.h"
113
David S. Miller68a5e3d2011-03-11 20:07:33 -0500114#define RT_FL_TOS(oldflp4) \
Julian Anastasovf61759e2011-12-02 11:39:42 +0000115 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117#define RT_GC_TIMEOUT (300*HZ)
118
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119static int ip_rt_max_size;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700120static int ip_rt_redirect_number __read_mostly = 9;
121static int ip_rt_redirect_load __read_mostly = HZ / 50;
122static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
123static int ip_rt_error_cost __read_mostly = HZ;
124static int ip_rt_error_burst __read_mostly = 5 * HZ;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700125static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
Sabrina Dubrocac7272c22018-02-26 16:13:43 +0100126static u32 ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700127static int ip_rt_min_advmss __read_mostly = 256;
Eric Dumazet9f28a2f2011-12-21 15:47:16 -0500128
Xin Longdeed49d2016-02-18 21:21:19 +0800129static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
Sabrina Dubrocac7272c22018-02-26 16:13:43 +0100130
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131/*
132 * Interface to generic destination cache.
133 */
134
135static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -0800136static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
Steffen Klassertebb762f2011-11-23 02:12:51 +0000137static unsigned int ipv4_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
139static void ipv4_link_failure(struct sk_buff *skb);
David S. Miller6700c272012-07-17 03:29:28 -0700140static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
141 struct sk_buff *skb, u32 mtu);
142static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
143 struct sk_buff *skb);
David S. Millercaacf052012-07-31 15:06:50 -0700144static void ipv4_dst_destroy(struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145
David S. Miller62fa8a82011-01-26 20:51:05 -0800146static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
147{
David S. Miller31248732012-07-10 07:08:18 -0700148 WARN_ON(1);
149 return NULL;
David S. Miller62fa8a82011-01-26 20:51:05 -0800150}
151
David S. Millerf894cbf2012-07-02 21:52:24 -0700152static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
153 struct sk_buff *skb,
154 const void *daddr);
Julian Anastasov63fca652017-02-06 23:14:15 +0200155static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700156
Linus Torvalds1da177e2005-04-16 15:20:36 -0700157static struct dst_ops ipv4_dst_ops = {
158 .family = AF_INET,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159 .check = ipv4_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800160 .default_advmss = ipv4_default_advmss,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000161 .mtu = ipv4_mtu,
David S. Miller62fa8a82011-01-26 20:51:05 -0800162 .cow_metrics = ipv4_cow_metrics,
David S. Millercaacf052012-07-31 15:06:50 -0700163 .destroy = ipv4_dst_destroy,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164 .negative_advice = ipv4_negative_advice,
165 .link_failure = ipv4_link_failure,
166 .update_pmtu = ip_rt_update_pmtu,
David S. Millere47a1852012-07-11 20:55:47 -0700167 .redirect = ip_do_redirect,
Eric W. Biedermanb92dacd2015-10-07 16:48:37 -0500168 .local_out = __ip_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700169 .neigh_lookup = ipv4_neigh_lookup,
Julian Anastasov63fca652017-02-06 23:14:15 +0200170 .confirm_neigh = ipv4_confirm_neigh,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171};
172
173#define ECN_OR_COST(class) TC_PRIO_##class
174
Philippe De Muyter4839c522007-07-09 15:32:57 -0700175const __u8 ip_tos2prio[16] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176 TC_PRIO_BESTEFFORT,
Dan Siemon4a2b9c32011-03-15 13:56:07 +0000177 ECN_OR_COST(BESTEFFORT),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178 TC_PRIO_BESTEFFORT,
179 ECN_OR_COST(BESTEFFORT),
180 TC_PRIO_BULK,
181 ECN_OR_COST(BULK),
182 TC_PRIO_BULK,
183 ECN_OR_COST(BULK),
184 TC_PRIO_INTERACTIVE,
185 ECN_OR_COST(INTERACTIVE),
186 TC_PRIO_INTERACTIVE,
187 ECN_OR_COST(INTERACTIVE),
188 TC_PRIO_INTERACTIVE_BULK,
189 ECN_OR_COST(INTERACTIVE_BULK),
190 TC_PRIO_INTERACTIVE_BULK,
191 ECN_OR_COST(INTERACTIVE_BULK)
192};
Amir Vadaid4a96862012-04-04 21:33:28 +0000193EXPORT_SYMBOL(ip_tos2prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194
Eric Dumazet2f970d82006-01-17 02:54:36 -0800195static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
Christoph Lameter3ed66e92014-04-07 15:39:40 -0700196#define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197
Linus Torvalds1da177e2005-04-16 15:20:36 -0700198#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
200{
Eric Dumazet29e75252008-01-31 17:05:09 -0800201 if (*pos)
David S. Miller89aef892012-07-17 11:00:09 -0700202 return NULL;
Eric Dumazet29e75252008-01-31 17:05:09 -0800203 return SEQ_START_TOKEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204}
205
206static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
207{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 ++*pos;
David S. Miller89aef892012-07-17 11:00:09 -0700209 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210}
211
212static void rt_cache_seq_stop(struct seq_file *seq, void *v)
213{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214}
215
216static int rt_cache_seq_show(struct seq_file *seq, void *v)
217{
218 if (v == SEQ_START_TOKEN)
219 seq_printf(seq, "%-127s\n",
220 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
221 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
222 "HHUptod\tSpecDst");
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900223 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700224}
225
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700226static const struct seq_operations rt_cache_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227 .start = rt_cache_seq_start,
228 .next = rt_cache_seq_next,
229 .stop = rt_cache_seq_stop,
230 .show = rt_cache_seq_show,
231};
232
233static int rt_cache_seq_open(struct inode *inode, struct file *file)
234{
David S. Miller89aef892012-07-17 11:00:09 -0700235 return seq_open(file, &rt_cache_seq_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236}
237
Arjan van de Ven9a321442007-02-12 00:55:35 -0800238static const struct file_operations rt_cache_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239 .open = rt_cache_seq_open,
240 .read = seq_read,
241 .llseek = seq_lseek,
David S. Miller89aef892012-07-17 11:00:09 -0700242 .release = seq_release,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243};
244
245
246static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
247{
248 int cpu;
249
250 if (*pos == 0)
251 return SEQ_START_TOKEN;
252
Rusty Russell0f23174a2008-12-29 12:23:42 +0000253 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254 if (!cpu_possible(cpu))
255 continue;
256 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800257 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 }
259 return NULL;
260}
261
262static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
263{
264 int cpu;
265
Rusty Russell0f23174a2008-12-29 12:23:42 +0000266 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267 if (!cpu_possible(cpu))
268 continue;
269 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800270 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 }
272 return NULL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900273
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274}
275
276static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
277{
278
279}
280
281static int rt_cpu_seq_show(struct seq_file *seq, void *v)
282{
283 struct rt_cache_stat *st = v;
284
285 if (v == SEQ_START_TOKEN) {
Olaf Rempel5bec0032005-04-28 12:16:08 -0700286 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287 return 0;
288 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900289
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
291 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
Eric Dumazetfc66f952010-10-08 06:37:34 +0000292 dst_entries_get_slow(&ipv4_dst_ops),
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700293 0, /* st->in_hit */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294 st->in_slow_tot,
295 st->in_slow_mc,
296 st->in_no_route,
297 st->in_brd,
298 st->in_martian_dst,
299 st->in_martian_src,
300
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700301 0, /* st->out_hit */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700302 st->out_slow_tot,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900303 st->out_slow_mc,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700305 0, /* st->gc_total */
306 0, /* st->gc_ignored */
307 0, /* st->gc_goal_miss */
308 0, /* st->gc_dst_overflow */
309 0, /* st->in_hlist_search */
310 0 /* st->out_hlist_search */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311 );
312 return 0;
313}
314
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700315static const struct seq_operations rt_cpu_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316 .start = rt_cpu_seq_start,
317 .next = rt_cpu_seq_next,
318 .stop = rt_cpu_seq_stop,
319 .show = rt_cpu_seq_show,
320};
321
322
323static int rt_cpu_seq_open(struct inode *inode, struct file *file)
324{
325 return seq_open(file, &rt_cpu_seq_ops);
326}
327
Arjan van de Ven9a321442007-02-12 00:55:35 -0800328static const struct file_operations rt_cpu_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329 .open = rt_cpu_seq_open,
330 .read = seq_read,
331 .llseek = seq_lseek,
332 .release = seq_release,
333};
334
Patrick McHardyc7066f72011-01-14 13:36:42 +0100335#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800336static int rt_acct_proc_show(struct seq_file *m, void *v)
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800337{
Alexey Dobriyana661c412009-11-25 15:40:35 -0800338 struct ip_rt_acct *dst, *src;
339 unsigned int i, j;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800340
Alexey Dobriyana661c412009-11-25 15:40:35 -0800341 dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
342 if (!dst)
343 return -ENOMEM;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800344
Alexey Dobriyana661c412009-11-25 15:40:35 -0800345 for_each_possible_cpu(i) {
346 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
347 for (j = 0; j < 256; j++) {
348 dst[j].o_bytes += src[j].o_bytes;
349 dst[j].o_packets += src[j].o_packets;
350 dst[j].i_bytes += src[j].i_bytes;
351 dst[j].i_packets += src[j].i_packets;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800352 }
353 }
Alexey Dobriyana661c412009-11-25 15:40:35 -0800354
355 seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
356 kfree(dst);
357 return 0;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800358}
359#endif
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800360
Denis V. Lunev73b38712008-02-28 20:51:18 -0800361static int __net_init ip_rt_do_proc_init(struct net *net)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800362{
363 struct proc_dir_entry *pde;
364
Joe Perchesd6444062018-03-23 15:54:38 -0700365 pde = proc_create("rt_cache", 0444, net->proc_net,
Gao fengd4beaa62013-02-18 01:34:54 +0000366 &rt_cache_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800367 if (!pde)
368 goto err1;
369
Joe Perchesd6444062018-03-23 15:54:38 -0700370 pde = proc_create("rt_cache", 0444,
Wang Chen77020722008-02-28 14:14:25 -0800371 net->proc_net_stat, &rt_cpu_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800372 if (!pde)
373 goto err2;
374
Patrick McHardyc7066f72011-01-14 13:36:42 +0100375#ifdef CONFIG_IP_ROUTE_CLASSID
Christoph Hellwig3f3942a2018-05-15 15:57:23 +0200376 pde = proc_create_single("rt_acct", 0, net->proc_net,
377 rt_acct_proc_show);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800378 if (!pde)
379 goto err3;
380#endif
381 return 0;
382
Patrick McHardyc7066f72011-01-14 13:36:42 +0100383#ifdef CONFIG_IP_ROUTE_CLASSID
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800384err3:
385 remove_proc_entry("rt_cache", net->proc_net_stat);
386#endif
387err2:
388 remove_proc_entry("rt_cache", net->proc_net);
389err1:
390 return -ENOMEM;
391}
Denis V. Lunev73b38712008-02-28 20:51:18 -0800392
393static void __net_exit ip_rt_do_proc_exit(struct net *net)
394{
395 remove_proc_entry("rt_cache", net->proc_net_stat);
396 remove_proc_entry("rt_cache", net->proc_net);
Patrick McHardyc7066f72011-01-14 13:36:42 +0100397#ifdef CONFIG_IP_ROUTE_CLASSID
Denis V. Lunev73b38712008-02-28 20:51:18 -0800398 remove_proc_entry("rt_acct", net->proc_net);
Alexey Dobriyan0a931ac2010-01-17 03:32:50 +0000399#endif
Denis V. Lunev73b38712008-02-28 20:51:18 -0800400}
401
402static struct pernet_operations ip_rt_proc_ops __net_initdata = {
403 .init = ip_rt_do_proc_init,
404 .exit = ip_rt_do_proc_exit,
405};
406
407static int __init ip_rt_proc_init(void)
408{
409 return register_pernet_subsys(&ip_rt_proc_ops);
410}
411
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800412#else
Denis V. Lunev73b38712008-02-28 20:51:18 -0800413static inline int ip_rt_proc_init(void)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800414{
415 return 0;
416}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417#endif /* CONFIG_PROC_FS */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900418
Eric Dumazet4331deb2012-07-25 05:11:23 +0000419static inline bool rt_is_expired(const struct rtable *rth)
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700420{
fan.duca4c3fc2013-07-30 08:33:53 +0800421 return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev));
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700422}
423
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +0000424void rt_cache_flush(struct net *net)
Eric Dumazet29e75252008-01-31 17:05:09 -0800425{
fan.duca4c3fc2013-07-30 08:33:53 +0800426 rt_genid_bump_ipv4(net);
Eric Dumazet98376382010-03-08 03:20:00 +0000427}
428
David S. Millerf894cbf2012-07-02 21:52:24 -0700429static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
430 struct sk_buff *skb,
431 const void *daddr)
David Miller3769cff2011-07-11 22:44:24 +0000432{
David Ahern1550c172019-04-05 16:30:27 -0700433 const struct rtable *rt = container_of(dst, struct rtable, dst);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700434 struct net_device *dev = dst->dev;
David Miller3769cff2011-07-11 22:44:24 +0000435 struct neighbour *n;
436
David Ahern5c9f7c12019-04-05 16:30:34 -0700437 rcu_read_lock_bh();
David S. Millerd3aaeb32011-07-18 00:40:17 -0700438
David Ahern5c9f7c12019-04-05 16:30:34 -0700439 if (likely(rt->rt_gw_family == AF_INET)) {
440 n = ip_neigh_gw4(dev, rt->rt_gw4);
441 } else if (rt->rt_gw_family == AF_INET6) {
442 n = ip_neigh_gw6(dev, &rt->rt_gw6);
443 } else {
444 __be32 pkey;
445
446 pkey = skb ? ip_hdr(skb)->daddr : *((__be32 *) daddr);
447 n = ip_neigh_gw4(dev, pkey);
448 }
449
450 if (n && !refcount_inc_not_zero(&n->refcnt))
451 n = NULL;
452
453 rcu_read_unlock_bh();
454
455 return n;
David S. Millerd3aaeb32011-07-18 00:40:17 -0700456}
457
Julian Anastasov63fca652017-02-06 23:14:15 +0200458static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)
459{
David Ahern1550c172019-04-05 16:30:27 -0700460 const struct rtable *rt = container_of(dst, struct rtable, dst);
Julian Anastasov63fca652017-02-06 23:14:15 +0200461 struct net_device *dev = dst->dev;
462 const __be32 *pkey = daddr;
Julian Anastasov63fca652017-02-06 23:14:15 +0200463
David Ahern6de9c052019-04-05 16:30:36 -0700464 if (rt->rt_gw_family == AF_INET) {
David Ahern1550c172019-04-05 16:30:27 -0700465 pkey = (const __be32 *)&rt->rt_gw4;
David Ahern6de9c052019-04-05 16:30:36 -0700466 } else if (rt->rt_gw_family == AF_INET6) {
467 return __ipv6_confirm_neigh_stub(dev, &rt->rt_gw6);
468 } else if (!daddr ||
Julian Anastasov63fca652017-02-06 23:14:15 +0200469 (rt->rt_flags &
David Ahern6de9c052019-04-05 16:30:36 -0700470 (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) {
Julian Anastasov63fca652017-02-06 23:14:15 +0200471 return;
David Ahern6de9c052019-04-05 16:30:36 -0700472 }
Julian Anastasov63fca652017-02-06 23:14:15 +0200473 __ipv4_confirm_neigh(dev, *(__force u32 *)pkey);
474}
475
Eric Dumazet04ca6972014-07-26 08:58:10 +0200476#define IP_IDENTS_SZ 2048u
Eric Dumazet04ca6972014-07-26 08:58:10 +0200477
Eric Dumazet355b590c2015-05-01 10:37:49 -0700478static atomic_t *ip_idents __read_mostly;
479static u32 *ip_tstamps __read_mostly;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200480
481/* In order to protect privacy, we add a perturbation to identifiers
482 * if one generator is seldom used. This makes hard for an attacker
483 * to infer how many packets were sent between two points in time.
484 */
485u32 ip_idents_reserve(u32 hash, int segs)
486{
Eric Dumazet355b590c2015-05-01 10:37:49 -0700487 u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
488 atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
Mark Rutland6aa7de02017-10-23 14:07:29 -0700489 u32 old = READ_ONCE(*p_tstamp);
Eric Dumazet04ca6972014-07-26 08:58:10 +0200490 u32 now = (u32)jiffies;
Eric Dumazetadb03112016-09-20 18:06:17 -0700491 u32 new, delta = 0;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200492
Eric Dumazet355b590c2015-05-01 10:37:49 -0700493 if (old != now && cmpxchg(p_tstamp, old, now) == old)
Eric Dumazet04ca6972014-07-26 08:58:10 +0200494 delta = prandom_u32_max(now - old);
495
Eric Dumazetadb03112016-09-20 18:06:17 -0700496 /* Do not use atomic_add_return() as it makes UBSAN unhappy */
497 do {
498 old = (u32)atomic_read(p_id);
499 new = old + delta + segs;
500 } while (atomic_cmpxchg(p_id, old, new) != old);
501
502 return new - segs;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200503}
504EXPORT_SYMBOL(ip_idents_reserve);
Eric Dumazet73f156a2014-06-02 05:26:03 -0700505
Hannes Frederic Sowab6a77192015-03-25 17:07:44 +0100506void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700507{
Eric Dumazet73f156a2014-06-02 05:26:03 -0700508 u32 hash, id;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509
Eric Dumazetdf453702019-03-27 12:40:33 -0700510 /* Note the following code is not safe, but this is okay. */
511 if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key)))
512 get_random_bytes(&net->ipv4.ip_id_key,
513 sizeof(net->ipv4.ip_id_key));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700514
Eric Dumazetdf453702019-03-27 12:40:33 -0700515 hash = siphash_3u32((__force u32)iph->daddr,
Eric Dumazet04ca6972014-07-26 08:58:10 +0200516 (__force u32)iph->saddr,
Eric Dumazetdf453702019-03-27 12:40:33 -0700517 iph->protocol,
518 &net->ipv4.ip_id_key);
Eric Dumazet73f156a2014-06-02 05:26:03 -0700519 id = ip_idents_reserve(hash, segs);
520 iph->id = htons(id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700521}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000522EXPORT_SYMBOL(__ip_select_ident);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700523
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900524static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
525 const struct sock *sk,
David S. Miller4895c772012-07-17 04:19:00 -0700526 const struct iphdr *iph,
527 int oif, u8 tos,
528 u8 prot, u32 mark, int flow_flags)
529{
530 if (sk) {
531 const struct inet_sock *inet = inet_sk(sk);
532
533 oif = sk->sk_bound_dev_if;
534 mark = sk->sk_mark;
535 tos = RT_CONN_FLAGS(sk);
536 prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
537 }
538 flowi4_init_output(fl4, oif, mark, tos,
539 RT_SCOPE_UNIVERSE, prot,
540 flow_flags,
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900541 iph->daddr, iph->saddr, 0, 0,
542 sock_net_uid(net, sk));
David S. Miller4895c772012-07-17 04:19:00 -0700543}
544
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200545static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
546 const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700547{
Lorenzo Colittid109e612016-11-30 02:56:47 +0900548 const struct net *net = dev_net(skb->dev);
David S. Miller4895c772012-07-17 04:19:00 -0700549 const struct iphdr *iph = ip_hdr(skb);
550 int oif = skb->dev->ifindex;
551 u8 tos = RT_TOS(iph->tos);
552 u8 prot = iph->protocol;
553 u32 mark = skb->mark;
554
Lorenzo Colittid109e612016-11-30 02:56:47 +0900555 __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0);
David S. Miller4895c772012-07-17 04:19:00 -0700556}
557
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200558static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700559{
560 const struct inet_sock *inet = inet_sk(sk);
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200561 const struct ip_options_rcu *inet_opt;
David S. Miller4895c772012-07-17 04:19:00 -0700562 __be32 daddr = inet->inet_daddr;
563
564 rcu_read_lock();
565 inet_opt = rcu_dereference(inet->inet_opt);
566 if (inet_opt && inet_opt->opt.srr)
567 daddr = inet_opt->opt.faddr;
568 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
569 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
570 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
571 inet_sk_flowi_flags(sk),
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900572 daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
David S. Miller4895c772012-07-17 04:19:00 -0700573 rcu_read_unlock();
574}
575
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200576static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
577 const struct sk_buff *skb)
David S. Miller4895c772012-07-17 04:19:00 -0700578{
579 if (skb)
580 build_skb_flow_key(fl4, skb, sk);
581 else
582 build_sk_flow_key(fl4, sk);
583}
584
David S. Millerc5038a82012-07-31 15:02:02 -0700585static DEFINE_SPINLOCK(fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700586
Timo Teräs2ffae992013-06-27 10:27:05 +0300587static void fnhe_flush_routes(struct fib_nh_exception *fnhe)
588{
589 struct rtable *rt;
590
591 rt = rcu_dereference(fnhe->fnhe_rth_input);
592 if (rt) {
593 RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL);
Wei Wang95c47f92017-06-17 10:42:30 -0700594 dst_dev_put(&rt->dst);
Wei Wang08301062017-06-17 10:42:29 -0700595 dst_release(&rt->dst);
Timo Teräs2ffae992013-06-27 10:27:05 +0300596 }
597 rt = rcu_dereference(fnhe->fnhe_rth_output);
598 if (rt) {
599 RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL);
Wei Wang95c47f92017-06-17 10:42:30 -0700600 dst_dev_put(&rt->dst);
Wei Wang08301062017-06-17 10:42:29 -0700601 dst_release(&rt->dst);
Timo Teräs2ffae992013-06-27 10:27:05 +0300602 }
603}
604
Julian Anastasovaee06da2012-07-18 10:15:35 +0000605static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
David S. Miller4895c772012-07-17 04:19:00 -0700606{
607 struct fib_nh_exception *fnhe, *oldest;
608
609 oldest = rcu_dereference(hash->chain);
610 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
611 fnhe = rcu_dereference(fnhe->fnhe_next)) {
612 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
613 oldest = fnhe;
614 }
Timo Teräs2ffae992013-06-27 10:27:05 +0300615 fnhe_flush_routes(oldest);
David S. Miller4895c772012-07-17 04:19:00 -0700616 return oldest;
617}
618
David S. Millerd3a25c92012-07-17 13:23:08 -0700619static inline u32 fnhe_hashfun(__be32 daddr)
620{
Eric Dumazetd546c622014-09-04 08:21:31 -0700621 static u32 fnhe_hashrnd __read_mostly;
David S. Millerd3a25c92012-07-17 13:23:08 -0700622 u32 hval;
623
Eric Dumazetd546c622014-09-04 08:21:31 -0700624 net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd));
625 hval = jhash_1word((__force u32) daddr, fnhe_hashrnd);
626 return hash_32(hval, FNHE_HASH_SHIFT);
David S. Millerd3a25c92012-07-17 13:23:08 -0700627}
628
Timo Teräs387aa652013-05-27 20:46:31 +0000629static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
630{
631 rt->rt_pmtu = fnhe->fnhe_pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100632 rt->rt_mtu_locked = fnhe->fnhe_mtu_locked;
Timo Teräs387aa652013-05-27 20:46:31 +0000633 rt->dst.expires = fnhe->fnhe_expires;
634
635 if (fnhe->fnhe_gw) {
636 rt->rt_flags |= RTCF_REDIRECTED;
David Ahern1550c172019-04-05 16:30:27 -0700637 rt->rt_gw_family = AF_INET;
638 rt->rt_gw4 = fnhe->fnhe_gw;
Timo Teräs387aa652013-05-27 20:46:31 +0000639 }
640}
641
David Aherna5995e72019-04-30 07:45:50 -0700642static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr,
643 __be32 gw, u32 pmtu, bool lock,
644 unsigned long expires)
David S. Miller4895c772012-07-17 04:19:00 -0700645{
Julian Anastasovaee06da2012-07-18 10:15:35 +0000646 struct fnhe_hash_bucket *hash;
David S. Miller4895c772012-07-17 04:19:00 -0700647 struct fib_nh_exception *fnhe;
Timo Teräs387aa652013-05-27 20:46:31 +0000648 struct rtable *rt;
Xin Longcebe84c2017-11-17 14:27:18 +0800649 u32 genid, hval;
Timo Teräs387aa652013-05-27 20:46:31 +0000650 unsigned int i;
David S. Miller4895c772012-07-17 04:19:00 -0700651 int depth;
Xin Longcebe84c2017-11-17 14:27:18 +0800652
David Aherna5995e72019-04-30 07:45:50 -0700653 genid = fnhe_genid(dev_net(nhc->nhc_dev));
Xin Longcebe84c2017-11-17 14:27:18 +0800654 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -0700655
David S. Millerc5038a82012-07-31 15:02:02 -0700656 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000657
David Aherna5995e72019-04-30 07:45:50 -0700658 hash = rcu_dereference(nhc->nhc_exceptions);
David S. Miller4895c772012-07-17 04:19:00 -0700659 if (!hash) {
Kees Cook6396bb22018-06-12 14:03:40 -0700660 hash = kcalloc(FNHE_HASH_SIZE, sizeof(*hash), GFP_ATOMIC);
David S. Miller4895c772012-07-17 04:19:00 -0700661 if (!hash)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000662 goto out_unlock;
David Aherna5995e72019-04-30 07:45:50 -0700663 rcu_assign_pointer(nhc->nhc_exceptions, hash);
David S. Miller4895c772012-07-17 04:19:00 -0700664 }
665
David S. Miller4895c772012-07-17 04:19:00 -0700666 hash += hval;
667
668 depth = 0;
669 for (fnhe = rcu_dereference(hash->chain); fnhe;
670 fnhe = rcu_dereference(fnhe->fnhe_next)) {
671 if (fnhe->fnhe_daddr == daddr)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000672 break;
David S. Miller4895c772012-07-17 04:19:00 -0700673 depth++;
674 }
675
Julian Anastasovaee06da2012-07-18 10:15:35 +0000676 if (fnhe) {
Xin Longcebe84c2017-11-17 14:27:18 +0800677 if (fnhe->fnhe_genid != genid)
678 fnhe->fnhe_genid = genid;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000679 if (gw)
680 fnhe->fnhe_gw = gw;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100681 if (pmtu) {
Julian Anastasovaee06da2012-07-18 10:15:35 +0000682 fnhe->fnhe_pmtu = pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100683 fnhe->fnhe_mtu_locked = lock;
684 }
Xin Longe39d5242017-11-17 14:27:06 +0800685 fnhe->fnhe_expires = max(1UL, expires);
Timo Teräs387aa652013-05-27 20:46:31 +0000686 /* Update all cached dsts too */
Timo Teräs2ffae992013-06-27 10:27:05 +0300687 rt = rcu_dereference(fnhe->fnhe_rth_input);
688 if (rt)
689 fill_route_from_fnhe(rt, fnhe);
690 rt = rcu_dereference(fnhe->fnhe_rth_output);
Timo Teräs387aa652013-05-27 20:46:31 +0000691 if (rt)
692 fill_route_from_fnhe(rt, fnhe);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000693 } else {
694 if (depth > FNHE_RECLAIM_DEPTH)
695 fnhe = fnhe_oldest(hash);
696 else {
697 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
698 if (!fnhe)
699 goto out_unlock;
700
701 fnhe->fnhe_next = hash->chain;
702 rcu_assign_pointer(hash->chain, fnhe);
703 }
Xin Longcebe84c2017-11-17 14:27:18 +0800704 fnhe->fnhe_genid = genid;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000705 fnhe->fnhe_daddr = daddr;
706 fnhe->fnhe_gw = gw;
707 fnhe->fnhe_pmtu = pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100708 fnhe->fnhe_mtu_locked = lock;
Julian Anastasov94720e32018-05-02 09:41:19 +0300709 fnhe->fnhe_expires = max(1UL, expires);
Timo Teräs387aa652013-05-27 20:46:31 +0000710
711 /* Exception created; mark the cached routes for the nexthop
712 * stale, so anyone caching it rechecks if this exception
713 * applies to them.
714 */
David Ahern0f457a32019-04-30 07:45:48 -0700715 rt = rcu_dereference(nhc->nhc_rth_input);
Timo Teräs2ffae992013-06-27 10:27:05 +0300716 if (rt)
717 rt->dst.obsolete = DST_OBSOLETE_KILL;
718
Timo Teräs387aa652013-05-27 20:46:31 +0000719 for_each_possible_cpu(i) {
720 struct rtable __rcu **prt;
David Ahern0f457a32019-04-30 07:45:48 -0700721 prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i);
Timo Teräs387aa652013-05-27 20:46:31 +0000722 rt = rcu_dereference(*prt);
723 if (rt)
724 rt->dst.obsolete = DST_OBSOLETE_KILL;
725 }
David S. Miller4895c772012-07-17 04:19:00 -0700726 }
David S. Miller4895c772012-07-17 04:19:00 -0700727
David S. Miller4895c772012-07-17 04:19:00 -0700728 fnhe->fnhe_stamp = jiffies;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000729
730out_unlock:
David S. Millerc5038a82012-07-31 15:02:02 -0700731 spin_unlock_bh(&fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700732}
733
David S. Millerceb33202012-07-17 11:31:28 -0700734static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
735 bool kill_route)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736{
David S. Millere47a1852012-07-11 20:55:47 -0700737 __be32 new_gw = icmp_hdr(skb)->un.gateway;
David S. Miller94206122012-07-11 20:38:08 -0700738 __be32 old_gw = ip_hdr(skb)->saddr;
David S. Millere47a1852012-07-11 20:55:47 -0700739 struct net_device *dev = skb->dev;
David S. Millere47a1852012-07-11 20:55:47 -0700740 struct in_device *in_dev;
David S. Miller4895c772012-07-17 04:19:00 -0700741 struct fib_result res;
David S. Millere47a1852012-07-11 20:55:47 -0700742 struct neighbour *n;
Denis V. Lunev317805b2008-02-28 20:50:06 -0800743 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700744
David S. Miller94206122012-07-11 20:38:08 -0700745 switch (icmp_hdr(skb)->code & 7) {
746 case ICMP_REDIR_NET:
747 case ICMP_REDIR_NETTOS:
748 case ICMP_REDIR_HOST:
749 case ICMP_REDIR_HOSTTOS:
750 break;
751
752 default:
753 return;
754 }
755
David Ahern1550c172019-04-05 16:30:27 -0700756 if (rt->rt_gw_family != AF_INET || rt->rt_gw4 != old_gw)
David S. Millere47a1852012-07-11 20:55:47 -0700757 return;
758
759 in_dev = __in_dev_get_rcu(dev);
760 if (!in_dev)
761 return;
762
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900763 net = dev_net(dev);
Joe Perches9d4fb272009-11-23 10:41:23 -0800764 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
765 ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
766 ipv4_is_zeronet(new_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700767 goto reject_redirect;
768
769 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
770 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
771 goto reject_redirect;
772 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
773 goto reject_redirect;
774 } else {
Denis V. Lunev317805b2008-02-28 20:50:06 -0800775 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776 goto reject_redirect;
777 }
778
Stephen Suryaputra Lin969447f2016-11-10 11:16:15 -0500779 n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
780 if (!n)
781 n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
WANG Cong2c1a4312014-09-24 17:07:53 -0700782 if (!IS_ERR(n)) {
David S. Millere47a1852012-07-11 20:55:47 -0700783 if (!(n->nud_state & NUD_VALID)) {
784 neigh_event_send(n, NULL);
785 } else {
Andy Gospodarek0eeb0752015-06-23 13:45:37 -0400786 if (fib_lookup(net, fl4, &res, 0) == 0) {
David Aherneba618a2019-04-02 14:11:55 -0700787 struct fib_nh_common *nhc = FIB_RES_NHC(res);
David S. Miller4895c772012-07-17 04:19:00 -0700788
David Aherna5995e72019-04-30 07:45:50 -0700789 update_or_create_fnhe(nhc, fl4->daddr, new_gw,
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100790 0, false,
791 jiffies + ip_rt_gc_timeout);
David S. Miller4895c772012-07-17 04:19:00 -0700792 }
David S. Millerceb33202012-07-17 11:31:28 -0700793 if (kill_route)
794 rt->dst.obsolete = DST_OBSOLETE_KILL;
David S. Millere47a1852012-07-11 20:55:47 -0700795 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
796 }
797 neigh_release(n);
798 }
799 return;
800
801reject_redirect:
802#ifdef CONFIG_IP_ROUTE_VERBOSE
David S. Miller99ee0382012-07-12 07:40:05 -0700803 if (IN_DEV_LOG_MARTIANS(in_dev)) {
804 const struct iphdr *iph = (const struct iphdr *) skb->data;
805 __be32 daddr = iph->daddr;
806 __be32 saddr = iph->saddr;
807
David S. Millere47a1852012-07-11 20:55:47 -0700808 net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
809 " Advised path = %pI4 -> %pI4\n",
810 &old_gw, dev->name, &new_gw,
811 &saddr, &daddr);
David S. Miller99ee0382012-07-12 07:40:05 -0700812 }
David S. Millere47a1852012-07-11 20:55:47 -0700813#endif
814 ;
815}
816
David S. Miller4895c772012-07-17 04:19:00 -0700817static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
818{
819 struct rtable *rt;
820 struct flowi4 fl4;
Michal Kubecekf96ef982013-05-28 08:26:49 +0200821 const struct iphdr *iph = (const struct iphdr *) skb->data;
Lorenzo Colitti7d995692016-12-23 00:33:57 +0900822 struct net *net = dev_net(skb->dev);
Michal Kubecekf96ef982013-05-28 08:26:49 +0200823 int oif = skb->dev->ifindex;
824 u8 tos = RT_TOS(iph->tos);
825 u8 prot = iph->protocol;
826 u32 mark = skb->mark;
David S. Miller4895c772012-07-17 04:19:00 -0700827
828 rt = (struct rtable *) dst;
829
Lorenzo Colitti7d995692016-12-23 00:33:57 +0900830 __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
David S. Millerceb33202012-07-17 11:31:28 -0700831 __ip_do_redirect(rt, skb, &fl4, true);
David S. Miller4895c772012-07-17 04:19:00 -0700832}
833
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
835{
Eric Dumazetee6b9672008-03-05 18:30:47 -0800836 struct rtable *rt = (struct rtable *)dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700837 struct dst_entry *ret = dst;
838
839 if (rt) {
Timo Teräsd11a4dc2010-03-18 23:20:20 +0000840 if (dst->obsolete > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700841 ip_rt_put(rt);
842 ret = NULL;
David S. Miller59436342012-07-10 06:58:42 -0700843 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
844 rt->dst.expires) {
David S. Miller89aef892012-07-17 11:00:09 -0700845 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846 ret = NULL;
847 }
848 }
849 return ret;
850}
851
852/*
853 * Algorithm:
854 * 1. The first ip_rt_redirect_number redirects are sent
855 * with exponential backoff, then we stop sending them at all,
856 * assuming that the host ignores our redirects.
857 * 2. If we did not see packets requiring redirects
858 * during ip_rt_redirect_silence, we assume that the host
859 * forgot redirected route and start to send redirects again.
860 *
861 * This algorithm is much cheaper and more intelligent than dumb load limiting
862 * in icmp.c.
863 *
864 * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
865 * and "frag. need" (breaks PMTU discovery) in icmp.c.
866 */
867
868void ip_rt_send_redirect(struct sk_buff *skb)
869{
Eric Dumazet511c3f92009-06-02 05:14:27 +0000870 struct rtable *rt = skb_rtable(skb);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700871 struct in_device *in_dev;
David S. Miller92d86822011-02-04 15:55:25 -0800872 struct inet_peer *peer;
David S. Miller1d861aa2012-07-10 03:58:16 -0700873 struct net *net;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700874 int log_martians;
David Ahern192132b2015-08-27 16:07:03 -0700875 int vif;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876
Eric Dumazet30038fc2009-08-28 23:52:01 -0700877 rcu_read_lock();
Changli Gaod8d1f302010-06-10 23:31:35 -0700878 in_dev = __in_dev_get_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700879 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
880 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700881 return;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700882 }
883 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
David Ahern385add92015-09-29 20:07:13 -0700884 vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700885 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886
David S. Miller1d861aa2012-07-10 03:58:16 -0700887 net = dev_net(rt->dst.dev);
David Ahern192132b2015-08-27 16:07:03 -0700888 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800889 if (!peer) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000890 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
891 rt_nexthop(rt, ip_hdr(skb)->daddr));
David S. Miller92d86822011-02-04 15:55:25 -0800892 return;
893 }
894
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895 /* No redirected packets during ip_rt_redirect_silence;
896 * reset the algorithm.
897 */
Lorenzo Bianconic09551c62019-02-06 19:18:04 +0100898 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) {
David S. Miller92d86822011-02-04 15:55:25 -0800899 peer->rate_tokens = 0;
Lorenzo Bianconic09551c62019-02-06 19:18:04 +0100900 peer->n_redirects = 0;
901 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902
903 /* Too many ignored redirects; do not send anything
Changli Gaod8d1f302010-06-10 23:31:35 -0700904 * set dst.rate_last to the last seen redirected packet.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905 */
Lorenzo Bianconic09551c62019-02-06 19:18:04 +0100906 if (peer->n_redirects >= ip_rt_redirect_number) {
David S. Miller92d86822011-02-04 15:55:25 -0800907 peer->rate_last = jiffies;
David S. Miller1d861aa2012-07-10 03:58:16 -0700908 goto out_put_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909 }
910
911 /* Check for load limit; set rate_last to the latest sent
912 * redirect.
913 */
David S. Miller92d86822011-02-04 15:55:25 -0800914 if (peer->rate_tokens == 0 ||
Li Yewang14fb8a72006-12-18 00:26:35 -0800915 time_after(jiffies,
David S. Miller92d86822011-02-04 15:55:25 -0800916 (peer->rate_last +
917 (ip_rt_redirect_load << peer->rate_tokens)))) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000918 __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
919
920 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
David S. Miller92d86822011-02-04 15:55:25 -0800921 peer->rate_last = jiffies;
922 ++peer->rate_tokens;
Lorenzo Bianconic09551c62019-02-06 19:18:04 +0100923 ++peer->n_redirects;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700924#ifdef CONFIG_IP_ROUTE_VERBOSE
Eric Dumazet30038fc2009-08-28 23:52:01 -0700925 if (log_martians &&
Joe Perchese87cc472012-05-13 21:56:26 +0000926 peer->rate_tokens == ip_rt_redirect_number)
927 net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
David S. Miller92101b32012-07-23 16:29:00 -0700928 &ip_hdr(skb)->saddr, inet_iif(skb),
Julian Anastasove81da0e2012-10-08 11:41:15 +0000929 &ip_hdr(skb)->daddr, &gw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700930#endif
931 }
David S. Miller1d861aa2012-07-10 03:58:16 -0700932out_put_peer:
933 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934}
935
936static int ip_error(struct sk_buff *skb)
937{
Eric Dumazet511c3f92009-06-02 05:14:27 +0000938 struct rtable *rt = skb_rtable(skb);
Stephen Suryaputrae2c0dc12018-02-28 12:20:44 -0500939 struct net_device *dev = skb->dev;
940 struct in_device *in_dev;
David S. Miller92d86822011-02-04 15:55:25 -0800941 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700942 unsigned long now;
David S. Miller251da412012-06-26 16:27:09 -0700943 struct net *net;
David S. Miller92d86822011-02-04 15:55:25 -0800944 bool send;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700945 int code;
946
Stephen Suryaputrae2c0dc12018-02-28 12:20:44 -0500947 if (netif_is_l3_master(skb->dev)) {
948 dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif);
949 if (!dev)
950 goto out;
951 }
952
953 in_dev = __in_dev_get_rcu(dev);
954
Eric W. Biederman381c7592015-05-22 04:58:12 -0500955 /* IP on this device is disabled. */
956 if (!in_dev)
957 goto out;
958
David S. Miller251da412012-06-26 16:27:09 -0700959 net = dev_net(rt->dst.dev);
960 if (!IN_DEV_FORWARD(in_dev)) {
961 switch (rt->dst.error) {
962 case EHOSTUNREACH:
Eric Dumazetb45386e2016-04-27 16:44:35 -0700963 __IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS);
David S. Miller251da412012-06-26 16:27:09 -0700964 break;
965
966 case ENETUNREACH:
Eric Dumazetb45386e2016-04-27 16:44:35 -0700967 __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
David S. Miller251da412012-06-26 16:27:09 -0700968 break;
969 }
970 goto out;
971 }
972
Changli Gaod8d1f302010-06-10 23:31:35 -0700973 switch (rt->dst.error) {
Joe Perches4500ebf2011-07-01 09:43:07 +0000974 case EINVAL:
975 default:
976 goto out;
977 case EHOSTUNREACH:
978 code = ICMP_HOST_UNREACH;
979 break;
980 case ENETUNREACH:
981 code = ICMP_NET_UNREACH;
Eric Dumazetb45386e2016-04-27 16:44:35 -0700982 __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
Joe Perches4500ebf2011-07-01 09:43:07 +0000983 break;
984 case EACCES:
985 code = ICMP_PKT_FILTERED;
986 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987 }
988
David Ahern192132b2015-08-27 16:07:03 -0700989 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
David Ahern385add92015-09-29 20:07:13 -0700990 l3mdev_master_ifindex(skb->dev), 1);
David S. Miller92d86822011-02-04 15:55:25 -0800991
992 send = true;
993 if (peer) {
994 now = jiffies;
995 peer->rate_tokens += now - peer->rate_last;
996 if (peer->rate_tokens > ip_rt_error_burst)
997 peer->rate_tokens = ip_rt_error_burst;
998 peer->rate_last = now;
999 if (peer->rate_tokens >= ip_rt_error_cost)
1000 peer->rate_tokens -= ip_rt_error_cost;
1001 else
1002 send = false;
David S. Miller1d861aa2012-07-10 03:58:16 -07001003 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004 }
David S. Miller92d86822011-02-04 15:55:25 -08001005 if (send)
1006 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007
1008out: kfree_skb(skb);
1009 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001010}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001011
Steffen Klassertd851c122012-10-07 22:47:25 +00001012static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013{
Steffen Klassertd851c122012-10-07 22:47:25 +00001014 struct dst_entry *dst = &rt->dst;
Sabrina Dubroca28d35bc2018-10-09 17:48:15 +02001015 u32 old_mtu = ipv4_mtu(dst);
David S. Miller4895c772012-07-17 04:19:00 -07001016 struct fib_result res;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001017 bool lock = false;
David S. Miller2c8cec52011-02-09 20:42:07 -08001018
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001019 if (ip_mtu_locked(dst))
Steffen Klassertfa1e4922013-01-16 20:58:10 +00001020 return;
1021
Sabrina Dubroca28d35bc2018-10-09 17:48:15 +02001022 if (old_mtu < mtu)
Li Wei3cdaa5b2015-01-29 16:09:03 +08001023 return;
1024
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001025 if (mtu < ip_rt_min_pmtu) {
1026 lock = true;
Sabrina Dubroca28d35bc2018-10-09 17:48:15 +02001027 mtu = min(old_mtu, ip_rt_min_pmtu);
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001028 }
Eric Dumazetfe6fe792011-06-08 06:07:07 +00001029
Sabrina Dubroca28d35bc2018-10-09 17:48:15 +02001030 if (rt->rt_pmtu == mtu && !lock &&
Timo Teräsf0162292013-05-27 20:46:32 +00001031 time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
1032 return;
1033
Eric Dumazetc5ae7d42012-08-28 12:33:07 +00001034 rcu_read_lock();
Andy Gospodarek0eeb0752015-06-23 13:45:37 -04001035 if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
David Aherneba618a2019-04-02 14:11:55 -07001036 struct fib_nh_common *nhc = FIB_RES_NHC(res);
David S. Miller4895c772012-07-17 04:19:00 -07001037
David Aherna5995e72019-04-30 07:45:50 -07001038 update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
Julian Anastasovaee06da2012-07-18 10:15:35 +00001039 jiffies + ip_rt_mtu_expires);
David S. Miller4895c772012-07-17 04:19:00 -07001040 }
Eric Dumazetc5ae7d42012-08-28 12:33:07 +00001041 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001042}
1043
David S. Miller4895c772012-07-17 04:19:00 -07001044static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1045 struct sk_buff *skb, u32 mtu)
1046{
1047 struct rtable *rt = (struct rtable *) dst;
1048 struct flowi4 fl4;
1049
1050 ip_rt_build_flow_key(&fl4, sk, skb);
Steffen Klassertd851c122012-10-07 22:47:25 +00001051 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller4895c772012-07-17 04:19:00 -07001052}
1053
David S. Miller36393392012-06-14 22:21:46 -07001054void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
Maciej Żenczykowskid888f392018-09-25 20:56:26 -07001055 int oif, u8 protocol)
David S. Miller36393392012-06-14 22:21:46 -07001056{
David S. Miller4895c772012-07-17 04:19:00 -07001057 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Miller36393392012-06-14 22:21:46 -07001058 struct flowi4 fl4;
1059 struct rtable *rt;
Maciej Żenczykowskid888f392018-09-25 20:56:26 -07001060 u32 mark = IP4_REPLY_MARK(net, skb->mark);
Lorenzo Colitti1b3c61d2014-05-13 10:17:34 -07001061
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001062 __build_flow_key(net, &fl4, NULL, iph, oif,
Maciej Żenczykowskid888f392018-09-25 20:56:26 -07001063 RT_TOS(iph->tos), protocol, mark, 0);
David S. Miller36393392012-06-14 22:21:46 -07001064 rt = __ip_route_output_key(net, &fl4);
1065 if (!IS_ERR(rt)) {
David S. Miller4895c772012-07-17 04:19:00 -07001066 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller36393392012-06-14 22:21:46 -07001067 ip_rt_put(rt);
1068 }
1069}
1070EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
1071
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001072static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
David S. Miller36393392012-06-14 22:21:46 -07001073{
David S. Miller4895c772012-07-17 04:19:00 -07001074 const struct iphdr *iph = (const struct iphdr *) skb->data;
1075 struct flowi4 fl4;
1076 struct rtable *rt;
David S. Miller36393392012-06-14 22:21:46 -07001077
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001078 __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0);
Lorenzo Colitti1b3c61d2014-05-13 10:17:34 -07001079
1080 if (!fl4.flowi4_mark)
1081 fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
1082
David S. Miller4895c772012-07-17 04:19:00 -07001083 rt = __ip_route_output_key(sock_net(sk), &fl4);
1084 if (!IS_ERR(rt)) {
1085 __ip_rt_update_pmtu(rt, &fl4, mtu);
1086 ip_rt_put(rt);
1087 }
David S. Miller36393392012-06-14 22:21:46 -07001088}
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001089
1090void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1091{
1092 const struct iphdr *iph = (const struct iphdr *) skb->data;
1093 struct flowi4 fl4;
1094 struct rtable *rt;
Eric Dumazet7f502362014-06-30 01:26:23 -07001095 struct dst_entry *odst = NULL;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001096 bool new = false;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001097 struct net *net = sock_net(sk);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001098
1099 bh_lock_sock(sk);
Hannes Frederic Sowa482fc602013-11-05 02:24:17 +01001100
1101 if (!ip_sk_accept_pmtu(sk))
1102 goto out;
1103
Eric Dumazet7f502362014-06-30 01:26:23 -07001104 odst = sk_dst_get(sk);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001105
Eric Dumazet7f502362014-06-30 01:26:23 -07001106 if (sock_owned_by_user(sk) || !odst) {
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001107 __ipv4_sk_update_pmtu(skb, sk, mtu);
1108 goto out;
1109 }
1110
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001111 __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001112
Eric Dumazet7f502362014-06-30 01:26:23 -07001113 rt = (struct rtable *)odst;
Ian Morris51456b22015-04-03 09:17:26 +01001114 if (odst->obsolete && !odst->ops->check(odst, 0)) {
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001115 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1116 if (IS_ERR(rt))
1117 goto out;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001118
1119 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001120 }
1121
David Miller0f6c4802017-11-28 15:40:46 -05001122 __ip_rt_update_pmtu((struct rtable *) xfrm_dst_path(&rt->dst), &fl4, mtu);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001123
Eric Dumazet7f502362014-06-30 01:26:23 -07001124 if (!dst_check(&rt->dst, 0)) {
Steffen Klassertb44108d2013-01-22 00:01:28 +00001125 if (new)
1126 dst_release(&rt->dst);
1127
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001128 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1129 if (IS_ERR(rt))
1130 goto out;
1131
Steffen Klassertb44108d2013-01-22 00:01:28 +00001132 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001133 }
1134
Steffen Klassertb44108d2013-01-22 00:01:28 +00001135 if (new)
Eric Dumazet7f502362014-06-30 01:26:23 -07001136 sk_dst_set(sk, &rt->dst);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001137
1138out:
1139 bh_unlock_sock(sk);
Eric Dumazet7f502362014-06-30 01:26:23 -07001140 dst_release(odst);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001141}
David S. Miller36393392012-06-14 22:21:46 -07001142EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
David S. Millerf39925d2011-02-09 22:00:16 -08001143
David S. Millerb42597e2012-07-11 21:25:45 -07001144void ipv4_redirect(struct sk_buff *skb, struct net *net,
Maciej Żenczykowski1042caa2018-09-25 20:56:27 -07001145 int oif, u8 protocol)
David S. Millerb42597e2012-07-11 21:25:45 -07001146{
David S. Miller4895c772012-07-17 04:19:00 -07001147 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Millerb42597e2012-07-11 21:25:45 -07001148 struct flowi4 fl4;
1149 struct rtable *rt;
1150
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001151 __build_flow_key(net, &fl4, NULL, iph, oif,
Maciej Żenczykowski1042caa2018-09-25 20:56:27 -07001152 RT_TOS(iph->tos), protocol, 0, 0);
David S. Millerb42597e2012-07-11 21:25:45 -07001153 rt = __ip_route_output_key(net, &fl4);
1154 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001155 __ip_do_redirect(rt, skb, &fl4, false);
David S. Millerb42597e2012-07-11 21:25:45 -07001156 ip_rt_put(rt);
1157 }
1158}
1159EXPORT_SYMBOL_GPL(ipv4_redirect);
1160
1161void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
1162{
David S. Miller4895c772012-07-17 04:19:00 -07001163 const struct iphdr *iph = (const struct iphdr *) skb->data;
1164 struct flowi4 fl4;
1165 struct rtable *rt;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001166 struct net *net = sock_net(sk);
David S. Millerb42597e2012-07-11 21:25:45 -07001167
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001168 __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
1169 rt = __ip_route_output_key(net, &fl4);
David S. Miller4895c772012-07-17 04:19:00 -07001170 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001171 __ip_do_redirect(rt, skb, &fl4, false);
David S. Miller4895c772012-07-17 04:19:00 -07001172 ip_rt_put(rt);
1173 }
David S. Millerb42597e2012-07-11 21:25:45 -07001174}
1175EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1176
David S. Millerefbc368d2011-12-01 13:38:59 -05001177static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1178{
1179 struct rtable *rt = (struct rtable *) dst;
1180
David S. Millerceb33202012-07-17 11:31:28 -07001181 /* All IPV4 dsts are created with ->obsolete set to the value
1182 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1183 * into this function always.
1184 *
Timo Teräs387aa652013-05-27 20:46:31 +00001185 * When a PMTU/redirect information update invalidates a route,
1186 * this is indicated by setting obsolete to DST_OBSOLETE_KILL or
Julian Wiedmann02afc7a2019-03-20 20:02:56 +01001187 * DST_OBSOLETE_DEAD.
David S. Millerceb33202012-07-17 11:31:28 -07001188 */
Timo Teräs387aa652013-05-27 20:46:31 +00001189 if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
David S. Millerefbc368d2011-12-01 13:38:59 -05001190 return NULL;
Timo Teräsd11a4dc2010-03-18 23:20:20 +00001191 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192}
1193
Eric Dumazet20ff83f2019-04-24 08:04:05 -07001194static void ipv4_send_dest_unreach(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195{
Stephen Suryaputraed0de452019-04-12 16:19:27 -04001196 struct ip_options opt;
Eric Dumazetc543cb42019-04-13 17:32:21 -07001197 int res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198
Stephen Suryaputraed0de452019-04-12 16:19:27 -04001199 /* Recompile ip options since IPCB may not be valid anymore.
Eric Dumazet20ff83f2019-04-24 08:04:05 -07001200 * Also check we have a reasonable ipv4 header.
Stephen Suryaputraed0de452019-04-12 16:19:27 -04001201 */
Eric Dumazet20ff83f2019-04-24 08:04:05 -07001202 if (!pskb_network_may_pull(skb, sizeof(struct iphdr)) ||
1203 ip_hdr(skb)->version != 4 || ip_hdr(skb)->ihl < 5)
Stephen Suryaputraed0de452019-04-12 16:19:27 -04001204 return;
1205
Eric Dumazet20ff83f2019-04-24 08:04:05 -07001206 memset(&opt, 0, sizeof(opt));
1207 if (ip_hdr(skb)->ihl > 5) {
1208 if (!pskb_network_may_pull(skb, ip_hdr(skb)->ihl * 4))
1209 return;
1210 opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr);
1211
1212 rcu_read_lock();
1213 res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL);
1214 rcu_read_unlock();
1215
1216 if (res)
1217 return;
1218 }
Stephen Suryaputraed0de452019-04-12 16:19:27 -04001219 __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt);
Eric Dumazet20ff83f2019-04-24 08:04:05 -07001220}
1221
1222static void ipv4_link_failure(struct sk_buff *skb)
1223{
1224 struct rtable *rt;
1225
1226 ipv4_send_dest_unreach(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227
Eric Dumazet511c3f92009-06-02 05:14:27 +00001228 rt = skb_rtable(skb);
David S. Miller59436342012-07-10 06:58:42 -07001229 if (rt)
1230 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231}
1232
Eric W. Biedermanede20592015-10-07 16:48:47 -05001233static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234{
Joe Perches91df42b2012-05-15 14:11:54 +00001235 pr_debug("%s: %pI4 -> %pI4, %s\n",
1236 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1237 skb->dev ? skb->dev->name : "?");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001238 kfree_skb(skb);
Dave Jonesc378a9c2011-05-21 07:16:42 +00001239 WARN_ON(1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240 return 0;
1241}
1242
1243/*
1244 We do not cache source address of outgoing interface,
1245 because it is used only by IP RR, TS and SRR options,
1246 so that it out of fast path.
1247
1248 BTW remember: "addr" is allowed to be not aligned
1249 in IP options!
1250 */
1251
David S. Miller8e363602011-05-13 17:29:41 -04001252void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001253{
Al Viroa61ced52006-09-26 21:27:54 -07001254 __be32 src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001255
David S. Millerc7537962010-11-11 17:07:48 -08001256 if (rt_is_output_route(rt))
David S. Millerc5be24f2011-05-13 18:01:21 -04001257 src = ip_hdr(skb)->saddr;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001258 else {
David S. Miller8e363602011-05-13 17:29:41 -04001259 struct fib_result res;
Maciej Żenczykowskie351bb62018-09-29 23:44:46 -07001260 struct iphdr *iph = ip_hdr(skb);
1261 struct flowi4 fl4 = {
1262 .daddr = iph->daddr,
1263 .saddr = iph->saddr,
1264 .flowi4_tos = RT_TOS(iph->tos),
1265 .flowi4_oif = rt->dst.dev->ifindex,
1266 .flowi4_iif = skb->dev->ifindex,
1267 .flowi4_mark = skb->mark,
1268 };
David S. Miller5e2b61f2011-03-04 21:47:09 -08001269
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001270 rcu_read_lock();
Andy Gospodarek0eeb0752015-06-23 13:45:37 -04001271 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0)
David Aherneba618a2019-04-02 14:11:55 -07001272 src = fib_result_prefsrc(dev_net(rt->dst.dev), &res);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001273 else
David S. Millerf8126f12012-07-13 05:03:45 -07001274 src = inet_select_addr(rt->dst.dev,
1275 rt_nexthop(rt, iph->daddr),
1276 RT_SCOPE_UNIVERSE);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001277 rcu_read_unlock();
1278 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279 memcpy(addr, &src, 4);
1280}
1281
Patrick McHardyc7066f72011-01-14 13:36:42 +01001282#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001283static void set_class_tag(struct rtable *rt, u32 tag)
1284{
Changli Gaod8d1f302010-06-10 23:31:35 -07001285 if (!(rt->dst.tclassid & 0xFFFF))
1286 rt->dst.tclassid |= tag & 0xFFFF;
1287 if (!(rt->dst.tclassid & 0xFFFF0000))
1288 rt->dst.tclassid |= tag & 0xFFFF0000;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001289}
1290#endif
1291
David S. Miller0dbaee32010-12-13 12:52:14 -08001292static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1293{
Gao Feng7ed14d92017-04-12 12:34:03 +08001294 unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
Eric Dumazet164a5e72017-10-18 17:02:03 -07001295 unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
Gao Feng7ed14d92017-04-12 12:34:03 +08001296 ip_rt_min_advmss);
David S. Miller0dbaee32010-12-13 12:52:14 -08001297
Gao Feng7ed14d92017-04-12 12:34:03 +08001298 return min(advmss, IPV4_MAX_PMTU - header_size);
David S. Miller0dbaee32010-12-13 12:52:14 -08001299}
1300
Steffen Klassertebb762f2011-11-23 02:12:51 +00001301static unsigned int ipv4_mtu(const struct dst_entry *dst)
David S. Millerd33e4552010-12-14 13:01:14 -08001302{
Steffen Klassert261663b2011-11-23 02:14:50 +00001303 const struct rtable *rt = (const struct rtable *) dst;
David S. Miller59436342012-07-10 06:58:42 -07001304 unsigned int mtu = rt->rt_pmtu;
1305
Alexander Duyck98d75c32012-08-27 06:30:01 +00001306 if (!mtu || time_after_eq(jiffies, rt->dst.expires))
David S. Miller59436342012-07-10 06:58:42 -07001307 mtu = dst_metric_raw(dst, RTAX_MTU);
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001308
Steffen Klassert38d523e2013-01-16 20:55:01 +00001309 if (mtu)
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001310 return mtu;
1311
Eric Dumazetc780a042017-08-16 11:09:12 -07001312 mtu = READ_ONCE(dst->dev->mtu);
David S. Millerd33e4552010-12-14 13:01:14 -08001313
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001314 if (unlikely(ip_mtu_locked(dst))) {
David Ahern1550c172019-04-05 16:30:27 -07001315 if (rt->rt_gw_family && mtu > 576)
David S. Millerd33e4552010-12-14 13:01:14 -08001316 mtu = 576;
1317 }
1318
Roopa Prabhu14972cb2016-08-24 20:10:43 -07001319 mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
1320
1321 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
David S. Millerd33e4552010-12-14 13:01:14 -08001322}
1323
David Aherna5995e72019-04-30 07:45:50 -07001324static void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr)
Julian Anastasov94720e32018-05-02 09:41:19 +03001325{
1326 struct fnhe_hash_bucket *hash;
1327 struct fib_nh_exception *fnhe, __rcu **fnhe_p;
1328 u32 hval = fnhe_hashfun(daddr);
1329
1330 spin_lock_bh(&fnhe_lock);
1331
David Aherna5995e72019-04-30 07:45:50 -07001332 hash = rcu_dereference_protected(nhc->nhc_exceptions,
Julian Anastasov94720e32018-05-02 09:41:19 +03001333 lockdep_is_held(&fnhe_lock));
1334 hash += hval;
1335
1336 fnhe_p = &hash->chain;
1337 fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
1338 while (fnhe) {
1339 if (fnhe->fnhe_daddr == daddr) {
1340 rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
1341 fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
Xin Longee60ad22019-03-08 14:50:54 +08001342 /* set fnhe_daddr to 0 to ensure it won't bind with
1343 * new dsts in rt_bind_exception().
1344 */
1345 fnhe->fnhe_daddr = 0;
Julian Anastasov94720e32018-05-02 09:41:19 +03001346 fnhe_flush_routes(fnhe);
1347 kfree_rcu(fnhe, rcu);
1348 break;
1349 }
1350 fnhe_p = &fnhe->fnhe_next;
1351 fnhe = rcu_dereference_protected(fnhe->fnhe_next,
1352 lockdep_is_held(&fnhe_lock));
1353 }
1354
1355 spin_unlock_bh(&fnhe_lock);
1356}
1357
David Aherna5995e72019-04-30 07:45:50 -07001358static struct fib_nh_exception *find_exception(struct fib_nh_common *nhc,
1359 __be32 daddr)
David S. Miller4895c772012-07-17 04:19:00 -07001360{
David Aherna5995e72019-04-30 07:45:50 -07001361 struct fnhe_hash_bucket *hash = rcu_dereference(nhc->nhc_exceptions);
David S. Miller4895c772012-07-17 04:19:00 -07001362 struct fib_nh_exception *fnhe;
1363 u32 hval;
1364
David S. Millerf2bb4be2012-07-17 12:20:47 -07001365 if (!hash)
1366 return NULL;
1367
David S. Millerd3a25c92012-07-17 13:23:08 -07001368 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -07001369
1370 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1371 fnhe = rcu_dereference(fnhe->fnhe_next)) {
Julian Anastasov94720e32018-05-02 09:41:19 +03001372 if (fnhe->fnhe_daddr == daddr) {
1373 if (fnhe->fnhe_expires &&
1374 time_after(jiffies, fnhe->fnhe_expires)) {
David Aherna5995e72019-04-30 07:45:50 -07001375 ip_del_fnhe(nhc, daddr);
Julian Anastasov94720e32018-05-02 09:41:19 +03001376 break;
1377 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001378 return fnhe;
Julian Anastasov94720e32018-05-02 09:41:19 +03001379 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001380 }
1381 return NULL;
1382}
David S. Miller4895c772012-07-17 04:19:00 -07001383
David Ahern50d889b2018-05-21 09:08:13 -07001384/* MTU selection:
1385 * 1. mtu on route is locked - use it
1386 * 2. mtu from nexthop exception
1387 * 3. mtu from egress device
1388 */
1389
1390u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
1391{
David Aherneba618a2019-04-02 14:11:55 -07001392 struct fib_nh_common *nhc = res->nhc;
1393 struct net_device *dev = nhc->nhc_dev;
David Ahern50d889b2018-05-21 09:08:13 -07001394 struct fib_info *fi = res->fi;
David Ahern50d889b2018-05-21 09:08:13 -07001395 u32 mtu = 0;
1396
1397 if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu ||
1398 fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU))
1399 mtu = fi->fib_mtu;
1400
1401 if (likely(!mtu)) {
1402 struct fib_nh_exception *fnhe;
1403
David Aherna5995e72019-04-30 07:45:50 -07001404 fnhe = find_exception(nhc, daddr);
David Ahern50d889b2018-05-21 09:08:13 -07001405 if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires))
1406 mtu = fnhe->fnhe_pmtu;
1407 }
1408
1409 if (likely(!mtu))
1410 mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU);
1411
David Aherneba618a2019-04-02 14:11:55 -07001412 return mtu - lwtunnel_headroom(nhc->nhc_lwtstate, mtu);
David Ahern50d889b2018-05-21 09:08:13 -07001413}
1414
David S. Millercaacf052012-07-31 15:06:50 -07001415static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
Wei Wanga4c2fd72017-06-17 10:42:42 -07001416 __be32 daddr, const bool do_cache)
David S. Millerf2bb4be2012-07-17 12:20:47 -07001417{
David S. Millercaacf052012-07-31 15:06:50 -07001418 bool ret = false;
1419
David S. Millerc5038a82012-07-31 15:02:02 -07001420 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +00001421
David S. Millerc5038a82012-07-31 15:02:02 -07001422 if (daddr == fnhe->fnhe_daddr) {
Timo Teräs2ffae992013-06-27 10:27:05 +03001423 struct rtable __rcu **porig;
1424 struct rtable *orig;
Timo Teräs5aad1de2013-05-27 20:46:33 +00001425 int genid = fnhe_genid(dev_net(rt->dst.dev));
Timo Teräs2ffae992013-06-27 10:27:05 +03001426
1427 if (rt_is_input_route(rt))
1428 porig = &fnhe->fnhe_rth_input;
1429 else
1430 porig = &fnhe->fnhe_rth_output;
1431 orig = rcu_dereference(*porig);
Timo Teräs5aad1de2013-05-27 20:46:33 +00001432
1433 if (fnhe->fnhe_genid != genid) {
1434 fnhe->fnhe_genid = genid;
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001435 fnhe->fnhe_gw = 0;
1436 fnhe->fnhe_pmtu = 0;
1437 fnhe->fnhe_expires = 0;
Hangbin Liu0e8411e42018-05-09 18:06:44 +08001438 fnhe->fnhe_mtu_locked = false;
Timo Teräs2ffae992013-06-27 10:27:05 +03001439 fnhe_flush_routes(fnhe);
1440 orig = NULL;
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001441 }
Timo Teräs387aa652013-05-27 20:46:31 +00001442 fill_route_from_fnhe(rt, fnhe);
David Ahern1550c172019-04-05 16:30:27 -07001443 if (!rt->rt_gw4) {
1444 rt->rt_gw4 = daddr;
1445 rt->rt_gw_family = AF_INET;
1446 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001447
Wei Wanga4c2fd72017-06-17 10:42:42 -07001448 if (do_cache) {
Wei Wang08301062017-06-17 10:42:29 -07001449 dst_hold(&rt->dst);
Timo Teräs2ffae992013-06-27 10:27:05 +03001450 rcu_assign_pointer(*porig, rt);
Wei Wang08301062017-06-17 10:42:29 -07001451 if (orig) {
Wei Wang95c47f92017-06-17 10:42:30 -07001452 dst_dev_put(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001453 dst_release(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001454 }
Timo Teräs2ffae992013-06-27 10:27:05 +03001455 ret = true;
1456 }
David S. Millerc5038a82012-07-31 15:02:02 -07001457
1458 fnhe->fnhe_stamp = jiffies;
David S. Millerc5038a82012-07-31 15:02:02 -07001459 }
1460 spin_unlock_bh(&fnhe_lock);
David S. Millercaacf052012-07-31 15:06:50 -07001461
1462 return ret;
Eric Dumazet54764bb2012-07-31 01:08:23 +00001463}
1464
David Ahern87063a1f2019-04-30 07:45:49 -07001465static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt)
David S. Millerf2bb4be2012-07-17 12:20:47 -07001466{
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001467 struct rtable *orig, *prev, **p;
David S. Millercaacf052012-07-31 15:06:50 -07001468 bool ret = true;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001469
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001470 if (rt_is_input_route(rt)) {
David Ahern0f457a32019-04-30 07:45:48 -07001471 p = (struct rtable **)&nhc->nhc_rth_input;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001472 } else {
David Ahern0f457a32019-04-30 07:45:48 -07001473 p = (struct rtable **)raw_cpu_ptr(nhc->nhc_pcpu_rth_output);
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001474 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001475 orig = *p;
1476
Wei Wang08301062017-06-17 10:42:29 -07001477 /* hold dst before doing cmpxchg() to avoid race condition
1478 * on this dst
1479 */
1480 dst_hold(&rt->dst);
David S. Millerf2bb4be2012-07-17 12:20:47 -07001481 prev = cmpxchg(p, orig, rt);
1482 if (prev == orig) {
Wei Wang08301062017-06-17 10:42:29 -07001483 if (orig) {
Wei Wang95c47f92017-06-17 10:42:30 -07001484 dst_dev_put(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001485 dst_release(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001486 }
1487 } else {
1488 dst_release(&rt->dst);
David S. Millercaacf052012-07-31 15:06:50 -07001489 ret = false;
Wei Wang08301062017-06-17 10:42:29 -07001490 }
David S. Millercaacf052012-07-31 15:06:50 -07001491
1492 return ret;
1493}
1494
Eric Dumazet5055c372015-01-14 15:17:06 -08001495struct uncached_list {
1496 spinlock_t lock;
1497 struct list_head head;
1498};
1499
1500static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
David S. Millercaacf052012-07-31 15:06:50 -07001501
Xin Long510c3212018-02-14 19:06:02 +08001502void rt_add_uncached_list(struct rtable *rt)
David S. Millercaacf052012-07-31 15:06:50 -07001503{
Eric Dumazet5055c372015-01-14 15:17:06 -08001504 struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list);
1505
1506 rt->rt_uncached_list = ul;
1507
1508 spin_lock_bh(&ul->lock);
1509 list_add_tail(&rt->rt_uncached, &ul->head);
1510 spin_unlock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001511}
1512
Xin Long510c3212018-02-14 19:06:02 +08001513void rt_del_uncached_list(struct rtable *rt)
David S. Millercaacf052012-07-31 15:06:50 -07001514{
Eric Dumazet78df76a2012-08-24 05:40:47 +00001515 if (!list_empty(&rt->rt_uncached)) {
Eric Dumazet5055c372015-01-14 15:17:06 -08001516 struct uncached_list *ul = rt->rt_uncached_list;
1517
1518 spin_lock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001519 list_del(&rt->rt_uncached);
Eric Dumazet5055c372015-01-14 15:17:06 -08001520 spin_unlock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001521 }
1522}
1523
Xin Long510c3212018-02-14 19:06:02 +08001524static void ipv4_dst_destroy(struct dst_entry *dst)
1525{
Xin Long510c3212018-02-14 19:06:02 +08001526 struct rtable *rt = (struct rtable *)dst;
1527
David Ahern1620a332018-10-04 20:07:54 -07001528 ip_dst_metrics_put(dst);
Xin Long510c3212018-02-14 19:06:02 +08001529 rt_del_uncached_list(rt);
1530}
1531
David S. Millercaacf052012-07-31 15:06:50 -07001532void rt_flush_dev(struct net_device *dev)
1533{
Eric Dumazet5055c372015-01-14 15:17:06 -08001534 struct net *net = dev_net(dev);
1535 struct rtable *rt;
1536 int cpu;
David S. Millercaacf052012-07-31 15:06:50 -07001537
Eric Dumazet5055c372015-01-14 15:17:06 -08001538 for_each_possible_cpu(cpu) {
1539 struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
1540
1541 spin_lock_bh(&ul->lock);
1542 list_for_each_entry(rt, &ul->head, rt_uncached) {
David S. Millercaacf052012-07-31 15:06:50 -07001543 if (rt->dst.dev != dev)
1544 continue;
1545 rt->dst.dev = net->loopback_dev;
1546 dev_hold(rt->dst.dev);
1547 dev_put(dev);
1548 }
Eric Dumazet5055c372015-01-14 15:17:06 -08001549 spin_unlock_bh(&ul->lock);
David S. Miller4895c772012-07-17 04:19:00 -07001550 }
1551}
1552
Eric Dumazet4331deb2012-07-25 05:11:23 +00001553static bool rt_cache_valid(const struct rtable *rt)
David S. Millerd2d68ba92012-07-17 12:58:50 -07001554{
Eric Dumazet4331deb2012-07-25 05:11:23 +00001555 return rt &&
1556 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1557 !rt_is_expired(rt);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001558}
1559
David S. Millerf2bb4be2012-07-17 12:20:47 -07001560static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
David S. Miller5e2b61f2011-03-04 21:47:09 -08001561 const struct fib_result *res,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001562 struct fib_nh_exception *fnhe,
Wei Wanga4c2fd72017-06-17 10:42:42 -07001563 struct fib_info *fi, u16 type, u32 itag,
1564 const bool do_cache)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001565{
David S. Millercaacf052012-07-31 15:06:50 -07001566 bool cached = false;
1567
Linus Torvalds1da177e2005-04-16 15:20:36 -07001568 if (fi) {
David Aherneba618a2019-04-02 14:11:55 -07001569 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
David S. Miller4895c772012-07-17 04:19:00 -07001570
David Ahern0f5f7d72019-04-05 16:30:29 -07001571 if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) {
1572 rt->rt_gw_family = nhc->nhc_gw_family;
1573 /* only INET and INET6 are supported */
1574 if (likely(nhc->nhc_gw_family == AF_INET))
1575 rt->rt_gw4 = nhc->nhc_gw.ipv4;
1576 else
1577 rt->rt_gw6 = nhc->nhc_gw.ipv6;
Julian Anastasov155e8332012-10-08 11:41:18 +00001578 }
David Ahern0f5f7d72019-04-05 16:30:29 -07001579
David Aherne1255ed2018-10-04 20:07:53 -07001580 ip_dst_init_metrics(&rt->dst, fi->fib_metrics);
1581
Patrick McHardyc7066f72011-01-14 13:36:42 +01001582#ifdef CONFIG_IP_ROUTE_CLASSID
David Ahern87063a1f2019-04-30 07:45:49 -07001583 {
1584 struct fib_nh *nh;
1585
1586 nh = container_of(nhc, struct fib_nh, nh_common);
1587 rt->dst.tclassid = nh->nh_tclassid;
1588 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001589#endif
David Ahern87063a1f2019-04-30 07:45:49 -07001590 rt->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);
David S. Millerc5038a82012-07-31 15:02:02 -07001591 if (unlikely(fnhe))
Wei Wanga4c2fd72017-06-17 10:42:42 -07001592 cached = rt_bind_exception(rt, fnhe, daddr, do_cache);
1593 else if (do_cache)
David Ahern87063a1f2019-04-30 07:45:49 -07001594 cached = rt_cache_route(nhc, rt);
Julian Anastasov155e8332012-10-08 11:41:18 +00001595 if (unlikely(!cached)) {
1596 /* Routes we intend to cache in nexthop exception or
1597 * FIB nexthop have the DST_NOCACHE bit clear.
1598 * However, if we are unsuccessful at storing this
1599 * route into the cache we really need to set it.
1600 */
David Ahern1550c172019-04-05 16:30:27 -07001601 if (!rt->rt_gw4) {
1602 rt->rt_gw_family = AF_INET;
1603 rt->rt_gw4 = daddr;
1604 }
Julian Anastasov155e8332012-10-08 11:41:18 +00001605 rt_add_uncached_list(rt);
1606 }
1607 } else
David S. Millercaacf052012-07-31 15:06:50 -07001608 rt_add_uncached_list(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001609
Patrick McHardyc7066f72011-01-14 13:36:42 +01001610#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001611#ifdef CONFIG_IP_MULTIPLE_TABLES
David S. Miller85b91b02012-07-13 08:21:29 -07001612 set_class_tag(rt, res->tclassid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001613#endif
1614 set_class_tag(rt, itag);
1615#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001616}
1617
David Ahern9ab179d2016-04-07 11:10:06 -07001618struct rtable *rt_dst_alloc(struct net_device *dev,
1619 unsigned int flags, u16 type,
1620 bool nopolicy, bool noxfrm, bool will_cache)
David S. Miller0c4dcd52011-02-17 15:42:37 -08001621{
David Ahernd08c4f32015-09-02 13:58:34 -07001622 struct rtable *rt;
1623
1624 rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
Wei Wanga4c2fd72017-06-17 10:42:42 -07001625 (will_cache ? 0 : DST_HOST) |
David Ahernd08c4f32015-09-02 13:58:34 -07001626 (nopolicy ? DST_NOPOLICY : 0) |
Wei Wangb2a9c0e2017-06-17 10:42:41 -07001627 (noxfrm ? DST_NOXFRM : 0));
David Ahernd08c4f32015-09-02 13:58:34 -07001628
1629 if (rt) {
1630 rt->rt_genid = rt_genid_ipv4(dev_net(dev));
1631 rt->rt_flags = flags;
1632 rt->rt_type = type;
1633 rt->rt_is_input = 0;
1634 rt->rt_iif = 0;
1635 rt->rt_pmtu = 0;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001636 rt->rt_mtu_locked = 0;
David Ahern1550c172019-04-05 16:30:27 -07001637 rt->rt_gw_family = 0;
1638 rt->rt_gw4 = 0;
David Ahernd08c4f32015-09-02 13:58:34 -07001639 INIT_LIST_HEAD(&rt->rt_uncached);
1640
1641 rt->dst.output = ip_output;
1642 if (flags & RTCF_LOCAL)
1643 rt->dst.input = ip_local_deliver;
1644 }
1645
1646 return rt;
David S. Miller0c4dcd52011-02-17 15:42:37 -08001647}
David Ahern9ab179d2016-04-07 11:10:06 -07001648EXPORT_SYMBOL(rt_dst_alloc);
David S. Miller0c4dcd52011-02-17 15:42:37 -08001649
Eric Dumazet96d36222010-06-02 19:21:31 +00001650/* called in rcu_read_lock() section */
Paolo Abenibc044e82017-09-28 15:51:37 +02001651int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1652 u8 tos, struct net_device *dev,
1653 struct in_device *in_dev, u32 *itag)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001654{
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001655 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001656
1657 /* Primary sanity checks. */
Ian Morris51456b22015-04-03 09:17:26 +01001658 if (!in_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001659 return -EINVAL;
1660
Jan Engelhardt1e637c72008-01-21 03:18:08 -08001661 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
Thomas Grafd0daebc32012-06-12 00:44:01 +00001662 skb->protocol != htons(ETH_P_IP))
Paolo Abenibc044e82017-09-28 15:51:37 +02001663 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001664
Alexander Duyck75fea732015-09-28 11:10:38 -07001665 if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
Paolo Abenibc044e82017-09-28 15:51:37 +02001666 return -EINVAL;
Thomas Grafd0daebc32012-06-12 00:44:01 +00001667
Joe Perchesf97c1e02007-12-16 13:45:43 -08001668 if (ipv4_is_zeronet(saddr)) {
Edward Chron1d2f4eb2019-01-31 15:00:40 -08001669 if (!ipv4_is_local_multicast(daddr) &&
1670 ip_hdr(skb)->protocol != IPPROTO_IGMP)
Paolo Abenibc044e82017-09-28 15:51:37 +02001671 return -EINVAL;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001672 } else {
David S. Miller9e56e382012-06-28 18:54:02 -07001673 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
Paolo Abenibc044e82017-09-28 15:51:37 +02001674 in_dev, itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001675 if (err < 0)
Paolo Abenibc044e82017-09-28 15:51:37 +02001676 return err;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001677 }
Paolo Abenibc044e82017-09-28 15:51:37 +02001678 return 0;
1679}
1680
1681/* called in rcu_read_lock() section */
1682static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1683 u8 tos, struct net_device *dev, int our)
1684{
1685 struct in_device *in_dev = __in_dev_get_rcu(dev);
1686 unsigned int flags = RTCF_MULTICAST;
1687 struct rtable *rth;
1688 u32 itag = 0;
1689 int err;
1690
1691 err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag);
1692 if (err)
1693 return err;
1694
David Ahernd08c4f32015-09-02 13:58:34 -07001695 if (our)
1696 flags |= RTCF_LOCAL;
1697
1698 rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001699 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001700 if (!rth)
Paolo Abenibc044e82017-09-28 15:51:37 +02001701 return -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001702
Patrick McHardyc7066f72011-01-14 13:36:42 +01001703#ifdef CONFIG_IP_ROUTE_CLASSID
Changli Gaod8d1f302010-06-10 23:31:35 -07001704 rth->dst.tclassid = itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001705#endif
David S. Millercf911662011-04-28 14:31:47 -07001706 rth->dst.output = ip_rt_bug;
David S. Miller9917e1e82012-07-17 14:44:26 -07001707 rth->rt_is_input= 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001708
1709#ifdef CONFIG_IP_MROUTE
Joe Perchesf97c1e02007-12-16 13:45:43 -08001710 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
Changli Gaod8d1f302010-06-10 23:31:35 -07001711 rth->dst.input = ip_mr_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001712#endif
1713 RT_CACHE_STAT_INC(in_slow_mc);
1714
David S. Miller89aef892012-07-17 11:00:09 -07001715 skb_dst_set(skb, &rth->dst);
1716 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001717}
1718
1719
1720static void ip_handle_martian_source(struct net_device *dev,
1721 struct in_device *in_dev,
1722 struct sk_buff *skb,
Al Viro9e12bb22006-09-26 21:25:20 -07001723 __be32 daddr,
1724 __be32 saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001725{
1726 RT_CACHE_STAT_INC(in_martian_src);
1727#ifdef CONFIG_IP_ROUTE_VERBOSE
1728 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1729 /*
1730 * RFC1812 recommendation, if source is martian,
1731 * the only hint is MAC header.
1732 */
Joe Perches058bd4d2012-03-11 18:36:11 +00001733 pr_warn("martian source %pI4 from %pI4, on dev %s\n",
Harvey Harrison673d57e2008-10-31 00:53:57 -07001734 &daddr, &saddr, dev->name);
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001735 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
Joe Perches058bd4d2012-03-11 18:36:11 +00001736 print_hex_dump(KERN_WARNING, "ll header: ",
1737 DUMP_PREFIX_OFFSET, 16, 1,
1738 skb_mac_header(skb),
David S. Millerb2c85102018-11-20 10:15:36 -08001739 dev->hard_header_len, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001740 }
1741 }
1742#endif
1743}
1744
Eric Dumazet47360222010-06-03 04:13:21 +00001745/* called in rcu_read_lock() section */
Stephen Hemminger5969f712008-04-10 01:52:09 -07001746static int __mkroute_input(struct sk_buff *skb,
David S. Miller982721f2011-02-16 21:44:24 -08001747 const struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001748 struct in_device *in_dev,
David S. Millerc6cffba2012-07-26 11:14:38 +00001749 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001750{
David Aherneba618a2019-04-02 14:11:55 -07001751 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
1752 struct net_device *dev = nhc->nhc_dev;
Timo Teräs2ffae992013-06-27 10:27:05 +03001753 struct fib_nh_exception *fnhe;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001754 struct rtable *rth;
1755 int err;
1756 struct in_device *out_dev;
David S. Millerd2d68ba92012-07-17 12:58:50 -07001757 bool do_cache;
Li RongQingfbdc0ad2014-05-22 16:36:55 +08001758 u32 itag = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001759
1760 /* get a working reference to the output device */
David Aherneba618a2019-04-02 14:11:55 -07001761 out_dev = __in_dev_get_rcu(dev);
Ian Morris51456b22015-04-03 09:17:26 +01001762 if (!out_dev) {
Joe Perchese87cc472012-05-13 21:56:26 +00001763 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764 return -EINVAL;
1765 }
1766
Michael Smith5c04c812011-04-07 04:51:50 +00001767 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
David S. Miller9e56e382012-06-28 18:54:02 -07001768 in_dev->dev, in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001769 if (err < 0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001770 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001771 saddr);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001772
Linus Torvalds1da177e2005-04-16 15:20:36 -07001773 goto cleanup;
1774 }
1775
Julian Anastasove81da0e2012-10-08 11:41:15 +00001776 do_cache = res->fi && !itag;
1777 if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
David Aherneba618a2019-04-02 14:11:55 -07001778 skb->protocol == htons(ETH_P_IP)) {
David Ahernbdf00462019-04-05 16:30:26 -07001779 __be32 gw;
David Aherneba618a2019-04-02 14:11:55 -07001780
David Ahernbdf00462019-04-05 16:30:26 -07001781 gw = nhc->nhc_gw_family == AF_INET ? nhc->nhc_gw.ipv4 : 0;
David Aherneba618a2019-04-02 14:11:55 -07001782 if (IN_DEV_SHARED_MEDIA(out_dev) ||
1783 inet_addr_onlink(out_dev, saddr, gw))
1784 IPCB(skb)->flags |= IPSKB_DOREDIRECT;
1785 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001786
1787 if (skb->protocol != htons(ETH_P_IP)) {
1788 /* Not IP (i.e. ARP). Do not create route, if it is
1789 * invalid for proxy arp. DNAT routes are always valid.
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001790 *
1791 * Proxy arp feature have been extended to allow, ARP
1792 * replies back to the same interface, to support
1793 * Private VLAN switch technologies. See arp.c.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001794 */
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001795 if (out_dev == in_dev &&
1796 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001797 err = -EINVAL;
1798 goto cleanup;
1799 }
1800 }
1801
David Aherna5995e72019-04-30 07:45:50 -07001802 fnhe = find_exception(nhc, daddr);
Julian Anastasove81da0e2012-10-08 11:41:15 +00001803 if (do_cache) {
Julian Anastasov94720e32018-05-02 09:41:19 +03001804 if (fnhe)
Timo Teräs2ffae992013-06-27 10:27:05 +03001805 rth = rcu_dereference(fnhe->fnhe_rth_input);
Julian Anastasov94720e32018-05-02 09:41:19 +03001806 else
David Ahern0f457a32019-04-30 07:45:48 -07001807 rth = rcu_dereference(nhc->nhc_rth_input);
Julian Anastasove81da0e2012-10-08 11:41:15 +00001808 if (rt_cache_valid(rth)) {
1809 skb_dst_set_noref(skb, &rth->dst);
1810 goto out;
David S. Millerd2d68ba92012-07-17 12:58:50 -07001811 }
1812 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001813
David Ahernd08c4f32015-09-02 13:58:34 -07001814 rth = rt_dst_alloc(out_dev->dev, 0, res->type,
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001815 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerd2d68ba92012-07-17 12:58:50 -07001816 IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001817 if (!rth) {
1818 err = -ENOBUFS;
1819 goto cleanup;
1820 }
1821
David S. Miller9917e1e82012-07-17 14:44:26 -07001822 rth->rt_is_input = 1;
Duan Jionga6254862014-02-17 15:23:43 +08001823 RT_CACHE_STAT_INC(in_slow_tot);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001824
Changli Gaod8d1f302010-06-10 23:31:35 -07001825 rth->dst.input = ip_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001826
Wei Wanga4c2fd72017-06-17 10:42:42 -07001827 rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag,
1828 do_cache);
David Ahern99428952018-02-13 20:32:04 -08001829 lwtunnel_set_redirect(&rth->dst);
David S. Millerc6cffba2012-07-26 11:14:38 +00001830 skb_dst_set(skb, &rth->dst);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001831out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001832 err = 0;
1833 cleanup:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001834 return err;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001835}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001836
Peter Nørlund79a13152015-09-30 10:12:22 +02001837#ifdef CONFIG_IP_ROUTE_MULTIPATH
Peter Nørlund79a13152015-09-30 10:12:22 +02001838/* To make ICMP packets follow the right flow, the multipath hash is
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001839 * calculated from the inner IP addresses.
Peter Nørlund79a13152015-09-30 10:12:22 +02001840 */
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001841static void ip_multipath_l3_keys(const struct sk_buff *skb,
1842 struct flow_keys *hash_keys)
Peter Nørlund79a13152015-09-30 10:12:22 +02001843{
1844 const struct iphdr *outer_iph = ip_hdr(skb);
David Ahern6f74b6c2018-03-02 08:32:13 -08001845 const struct iphdr *key_iph = outer_iph;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001846 const struct iphdr *inner_iph;
Peter Nørlund79a13152015-09-30 10:12:22 +02001847 const struct icmphdr *icmph;
1848 struct iphdr _inner_iph;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001849 struct icmphdr _icmph;
1850
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001851 if (likely(outer_iph->protocol != IPPROTO_ICMP))
David Ahern6f74b6c2018-03-02 08:32:13 -08001852 goto out;
Peter Nørlund79a13152015-09-30 10:12:22 +02001853
1854 if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
David Ahern6f74b6c2018-03-02 08:32:13 -08001855 goto out;
Peter Nørlund79a13152015-09-30 10:12:22 +02001856
1857 icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
1858 &_icmph);
1859 if (!icmph)
David Ahern6f74b6c2018-03-02 08:32:13 -08001860 goto out;
Peter Nørlund79a13152015-09-30 10:12:22 +02001861
1862 if (icmph->type != ICMP_DEST_UNREACH &&
1863 icmph->type != ICMP_REDIRECT &&
1864 icmph->type != ICMP_TIME_EXCEEDED &&
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001865 icmph->type != ICMP_PARAMETERPROB)
David Ahern6f74b6c2018-03-02 08:32:13 -08001866 goto out;
Peter Nørlund79a13152015-09-30 10:12:22 +02001867
1868 inner_iph = skb_header_pointer(skb,
1869 outer_iph->ihl * 4 + sizeof(_icmph),
1870 sizeof(_inner_iph), &_inner_iph);
1871 if (!inner_iph)
David Ahern6f74b6c2018-03-02 08:32:13 -08001872 goto out;
1873
1874 key_iph = inner_iph;
1875out:
1876 hash_keys->addrs.v4addrs.src = key_iph->saddr;
1877 hash_keys->addrs.v4addrs.dst = key_iph->daddr;
Peter Nørlund79a13152015-09-30 10:12:22 +02001878}
1879
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001880/* if skb is set it will be used and fl4 can be NULL */
David Ahern7efc0b62018-03-02 08:32:12 -08001881int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001882 const struct sk_buff *skb, struct flow_keys *flkeys)
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001883{
Ido Schimmel2a8e4992019-03-01 13:38:43 +00001884 u32 multipath_hash = fl4 ? fl4->flowi4_multipath_hash : 0;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001885 struct flow_keys hash_keys;
1886 u32 mhash;
1887
1888 switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
1889 case 0:
1890 memset(&hash_keys, 0, sizeof(hash_keys));
1891 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1892 if (skb) {
1893 ip_multipath_l3_keys(skb, &hash_keys);
1894 } else {
1895 hash_keys.addrs.v4addrs.src = fl4->saddr;
1896 hash_keys.addrs.v4addrs.dst = fl4->daddr;
1897 }
1898 break;
1899 case 1:
1900 /* skb is currently provided only when forwarding */
1901 if (skb) {
1902 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
1903 struct flow_keys keys;
1904
1905 /* short-circuit if we already have L4 hash present */
1906 if (skb->l4_hash)
1907 return skb_get_hash_raw(skb) >> 1;
David Ahernec7127a2018-03-02 08:32:14 -08001908
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001909 memset(&hash_keys, 0, sizeof(hash_keys));
David Ahern1fe4b112018-02-21 11:00:54 -08001910
David Ahernec7127a2018-03-02 08:32:14 -08001911 if (!flkeys) {
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001912 skb_flow_dissect_flow_keys(skb, &keys, flag);
David Ahernec7127a2018-03-02 08:32:14 -08001913 flkeys = &keys;
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001914 }
David Ahernec7127a2018-03-02 08:32:14 -08001915
1916 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1917 hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
1918 hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
1919 hash_keys.ports.src = flkeys->ports.src;
1920 hash_keys.ports.dst = flkeys->ports.dst;
1921 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001922 } else {
1923 memset(&hash_keys, 0, sizeof(hash_keys));
1924 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1925 hash_keys.addrs.v4addrs.src = fl4->saddr;
1926 hash_keys.addrs.v4addrs.dst = fl4->daddr;
1927 hash_keys.ports.src = fl4->fl4_sport;
1928 hash_keys.ports.dst = fl4->fl4_dport;
1929 hash_keys.basic.ip_proto = fl4->flowi4_proto;
1930 }
1931 break;
1932 }
1933 mhash = flow_hash_from_keys(&hash_keys);
1934
wenxu24ba1442019-02-24 11:36:20 +08001935 if (multipath_hash)
1936 mhash = jhash_2words(mhash, multipath_hash, 0);
1937
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001938 return mhash >> 1;
1939}
Peter Nørlund79a13152015-09-30 10:12:22 +02001940#endif /* CONFIG_IP_ROUTE_MULTIPATH */
1941
Stephen Hemminger5969f712008-04-10 01:52:09 -07001942static int ip_mkroute_input(struct sk_buff *skb,
1943 struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001944 struct in_device *in_dev,
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001945 __be32 daddr, __be32 saddr, u32 tos,
1946 struct flow_keys *hkeys)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001947{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001948#ifdef CONFIG_IP_ROUTE_MULTIPATH
Peter Nørlund0e884c72015-09-30 10:12:21 +02001949 if (res->fi && res->fi->fib_nhs > 1) {
David Ahern7efc0b62018-03-02 08:32:12 -08001950 int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
Peter Nørlund0e884c72015-09-30 10:12:21 +02001951
Peter Nørlund0e884c72015-09-30 10:12:21 +02001952 fib_select_multipath(res, h);
1953 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001954#endif
1955
1956 /* create a routing cache entry */
David S. Millerc6cffba2012-07-26 11:14:38 +00001957 return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001958}
1959
Linus Torvalds1da177e2005-04-16 15:20:36 -07001960/*
1961 * NOTE. We drop all the packets that has local source
1962 * addresses, because every properly looped back packet
1963 * must have correct destination already attached by output routine.
1964 *
1965 * Such approach solves two big problems:
1966 * 1. Not simplex devices are handled properly.
1967 * 2. IP spoofing attempts are filtered with 100% of guarantee.
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001968 * called with rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001969 */
1970
Al Viro9e12bb22006-09-26 21:25:20 -07001971static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
David Ahern5510cdf2017-05-25 10:42:34 -07001972 u8 tos, struct net_device *dev,
1973 struct fib_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001974{
Eric Dumazet96d36222010-06-02 19:21:31 +00001975 struct in_device *in_dev = __in_dev_get_rcu(dev);
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001976 struct flow_keys *flkeys = NULL, _flkeys;
1977 struct net *net = dev_net(dev);
Thomas Graf1b7179d2015-07-21 10:43:59 +02001978 struct ip_tunnel_info *tun_info;
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001979 int err = -EINVAL;
Eric Dumazet95c96172012-04-15 05:58:06 +00001980 unsigned int flags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001981 u32 itag = 0;
Eric Dumazet95c96172012-04-15 05:58:06 +00001982 struct rtable *rth;
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001983 struct flowi4 fl4;
Xin Long0a904782019-06-02 19:10:24 +08001984 bool do_cache = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001985
1986 /* IP on this device is disabled. */
1987
1988 if (!in_dev)
1989 goto out;
1990
1991 /* Check for the most weird martians, which can be not detected
1992 by fib_lookup.
1993 */
1994
Jiri Benc61adedf2015-08-20 13:56:25 +02001995 tun_info = skb_tunnel_info(skb);
Jiri Benc46fa0622015-08-28 20:48:19 +02001996 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
Thomas Graf1b7179d2015-07-21 10:43:59 +02001997 fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id;
1998 else
1999 fl4.flowi4_tun_key.tun_id = 0;
Thomas Graff38a9eb2015-07-21 10:43:56 +02002000 skb_dst_drop(skb);
2001
Thomas Grafd0daebc32012-06-12 00:44:01 +00002002 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002003 goto martian_source;
2004
David Ahern5510cdf2017-05-25 10:42:34 -07002005 res->fi = NULL;
2006 res->table = NULL;
Andy Walls27a954b2010-10-17 15:11:22 +00002007 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002008 goto brd_input;
2009
2010 /* Accept zero addresses only to limited broadcast;
2011 * I even do not know to fix it or not. Waiting for complains :-)
2012 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08002013 if (ipv4_is_zeronet(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002014 goto martian_source;
2015
Thomas Grafd0daebc32012-06-12 00:44:01 +00002016 if (ipv4_is_zeronet(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002017 goto martian_destination;
2018
Eric Dumazet9eb43e72012-08-03 21:27:25 +00002019 /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
2020 * and call it once if daddr or/and saddr are loopback addresses
2021 */
2022 if (ipv4_is_loopback(daddr)) {
2023 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00002024 goto martian_destination;
Eric Dumazet9eb43e72012-08-03 21:27:25 +00002025 } else if (ipv4_is_loopback(saddr)) {
2026 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00002027 goto martian_source;
2028 }
2029
Linus Torvalds1da177e2005-04-16 15:20:36 -07002030 /*
2031 * Now we are ready to route packet.
2032 */
David S. Miller68a5e3d2011-03-11 20:07:33 -05002033 fl4.flowi4_oif = 0;
David Aherne0d56fd2016-09-10 12:09:57 -07002034 fl4.flowi4_iif = dev->ifindex;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002035 fl4.flowi4_mark = skb->mark;
2036 fl4.flowi4_tos = tos;
2037 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
David Ahernb84f7872015-09-29 19:07:07 -07002038 fl4.flowi4_flags = 0;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002039 fl4.daddr = daddr;
2040 fl4.saddr = saddr;
Julian Anastasov8bcfd092017-02-26 15:50:52 +02002041 fl4.flowi4_uid = sock_net_uid(net, NULL);
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002042
David Ahern5a847a62018-05-16 13:36:40 -07002043 if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) {
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002044 flkeys = &_flkeys;
David Ahern5a847a62018-05-16 13:36:40 -07002045 } else {
2046 fl4.flowi4_proto = 0;
2047 fl4.fl4_sport = 0;
2048 fl4.fl4_dport = 0;
2049 }
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002050
David Ahern5510cdf2017-05-25 10:42:34 -07002051 err = fib_lookup(net, &fl4, res, 0);
Duan Jiongcd0f0b92014-02-14 18:26:22 +08002052 if (err != 0) {
2053 if (!IN_DEV_FORWARD(in_dev))
2054 err = -EHOSTUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002055 goto no_route;
Duan Jiongcd0f0b92014-02-14 18:26:22 +08002056 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002057
Xin Long5cbf7772018-07-27 16:37:28 +08002058 if (res->type == RTN_BROADCAST) {
2059 if (IN_DEV_BFORWARD(in_dev))
2060 goto make_route;
Xin Long0a904782019-06-02 19:10:24 +08002061 /* not do cache if bc_forwarding is enabled */
2062 if (IPV4_DEVCONF_ALL(net, BC_FORWARDING))
2063 do_cache = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002064 goto brd_input;
Xin Long5cbf7772018-07-27 16:37:28 +08002065 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002066
David Ahern5510cdf2017-05-25 10:42:34 -07002067 if (res->type == RTN_LOCAL) {
Michael Smith5c04c812011-04-07 04:51:50 +00002068 err = fib_validate_source(skb, saddr, daddr, tos,
Cong Wang0d5edc62014-04-15 16:25:35 -07002069 0, dev, in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00002070 if (err < 0)
David Ahern0d753962015-09-28 11:10:44 -07002071 goto martian_source;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002072 goto local_input;
2073 }
2074
Duan Jiongcd0f0b92014-02-14 18:26:22 +08002075 if (!IN_DEV_FORWARD(in_dev)) {
2076 err = -EHOSTUNREACH;
David S. Miller251da412012-06-26 16:27:09 -07002077 goto no_route;
Duan Jiongcd0f0b92014-02-14 18:26:22 +08002078 }
David Ahern5510cdf2017-05-25 10:42:34 -07002079 if (res->type != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002080 goto martian_destination;
2081
Xin Long5cbf7772018-07-27 16:37:28 +08002082make_route:
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002083 err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002084out: return err;
2085
2086brd_input:
2087 if (skb->protocol != htons(ETH_P_IP))
2088 goto e_inval;
2089
David S. Miller41347dc2012-06-28 04:05:27 -07002090 if (!ipv4_is_zeronet(saddr)) {
David S. Miller9e56e382012-06-28 18:54:02 -07002091 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
2092 in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002093 if (err < 0)
David Ahern0d753962015-09-28 11:10:44 -07002094 goto martian_source;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002095 }
2096 flags |= RTCF_BROADCAST;
David Ahern5510cdf2017-05-25 10:42:34 -07002097 res->type = RTN_BROADCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002098 RT_CACHE_STAT_INC(in_brd);
2099
2100local_input:
Xin Long0a904782019-06-02 19:10:24 +08002101 do_cache &= res->fi && !itag;
2102 if (do_cache) {
2103 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
David Aherneba618a2019-04-02 14:11:55 -07002104
Xin Long0a904782019-06-02 19:10:24 +08002105 rth = rcu_dereference(nhc->nhc_rth_input);
2106 if (rt_cache_valid(rth)) {
2107 skb_dst_set_noref(skb, &rth->dst);
2108 err = 0;
2109 goto out;
David S. Millerd2d68ba92012-07-17 12:58:50 -07002110 }
2111 }
2112
David Ahernf5a0aab2016-12-29 15:29:03 -08002113 rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
David Ahern5510cdf2017-05-25 10:42:34 -07002114 flags | RTCF_LOCAL, res->type,
David S. Millerd2d68ba92012-07-17 12:58:50 -07002115 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002116 if (!rth)
2117 goto e_nobufs;
2118
Changli Gaod8d1f302010-06-10 23:31:35 -07002119 rth->dst.output= ip_rt_bug;
David S. Millercf911662011-04-28 14:31:47 -07002120#ifdef CONFIG_IP_ROUTE_CLASSID
2121 rth->dst.tclassid = itag;
2122#endif
David S. Miller9917e1e82012-07-17 14:44:26 -07002123 rth->rt_is_input = 1;
Roopa Prabhu571e7222015-07-21 10:43:47 +02002124
Duan Jionga6254862014-02-17 15:23:43 +08002125 RT_CACHE_STAT_INC(in_slow_tot);
David Ahern5510cdf2017-05-25 10:42:34 -07002126 if (res->type == RTN_UNREACHABLE) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002127 rth->dst.input= ip_error;
2128 rth->dst.error= -err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002129 rth->rt_flags &= ~RTCF_LOCAL;
2130 }
Thomas Grafefd85702016-11-30 17:10:09 +01002131
Alexei Starovoitovdcdfdf52013-11-19 19:12:34 -08002132 if (do_cache) {
David Aherneba618a2019-04-02 14:11:55 -07002133 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
Thomas Grafefd85702016-11-30 17:10:09 +01002134
David Aherneba618a2019-04-02 14:11:55 -07002135 rth->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);
Thomas Grafefd85702016-11-30 17:10:09 +01002136 if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
2137 WARN_ON(rth->dst.input == lwtunnel_input);
2138 rth->dst.lwtstate->orig_input = rth->dst.input;
2139 rth->dst.input = lwtunnel_input;
2140 }
2141
David Ahern87063a1f2019-04-30 07:45:49 -07002142 if (unlikely(!rt_cache_route(nhc, rth)))
Alexei Starovoitovdcdfdf52013-11-19 19:12:34 -08002143 rt_add_uncached_list(rth);
Alexei Starovoitovdcdfdf52013-11-19 19:12:34 -08002144 }
David S. Miller89aef892012-07-17 11:00:09 -07002145 skb_dst_set(skb, &rth->dst);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002146 err = 0;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002147 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002148
2149no_route:
2150 RT_CACHE_STAT_INC(in_no_route);
David Ahern5510cdf2017-05-25 10:42:34 -07002151 res->type = RTN_UNREACHABLE;
2152 res->fi = NULL;
2153 res->table = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002154 goto local_input;
2155
2156 /*
2157 * Do not cache martian addresses: they should be logged (RFC1812)
2158 */
2159martian_destination:
2160 RT_CACHE_STAT_INC(in_martian_dst);
2161#ifdef CONFIG_IP_ROUTE_VERBOSE
Joe Perchese87cc472012-05-13 21:56:26 +00002162 if (IN_DEV_LOG_MARTIANS(in_dev))
2163 net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
2164 &daddr, &saddr, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002165#endif
Dietmar Eggemann2c2910a2005-06-28 13:06:23 -07002166
Linus Torvalds1da177e2005-04-16 15:20:36 -07002167e_inval:
2168 err = -EINVAL;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002169 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002170
2171e_nobufs:
2172 err = -ENOBUFS;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002173 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002174
2175martian_source:
2176 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002177 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002178}
2179
David S. Millerc6cffba2012-07-26 11:14:38 +00002180int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2181 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002182{
David Ahern5510cdf2017-05-25 10:42:34 -07002183 struct fib_result res;
2184 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002185
Julian Anastasov6e280992017-02-26 17:14:35 +02002186 tos &= IPTOS_RT_MASK;
Eric Dumazet96d36222010-06-02 19:21:31 +00002187 rcu_read_lock();
David Ahern5510cdf2017-05-25 10:42:34 -07002188 err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res);
2189 rcu_read_unlock();
Eric Dumazet96d36222010-06-02 19:21:31 +00002190
David Ahern5510cdf2017-05-25 10:42:34 -07002191 return err;
2192}
2193EXPORT_SYMBOL(ip_route_input_noref);
2194
2195/* called with rcu_read_lock held */
2196int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2197 u8 tos, struct net_device *dev, struct fib_result *res)
2198{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002199 /* Multicast recognition logic is moved from route cache to here.
2200 The problem was that too many Ethernet cards have broken/missing
2201 hardware multicast filters :-( As result the host on multicasting
2202 network acquires a lot of useless route cache entries, sort of
2203 SDR messages from all the world. Now we try to get rid of them.
2204 Really, provided software IP multicast filter is organized
2205 reasonably (at least, hashed), it does not result in a slowdown
2206 comparing with route cache reject entries.
2207 Note, that multicast routers are not affected, because
2208 route cache entry is created eventually.
2209 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08002210 if (ipv4_is_multicast(daddr)) {
Eric Dumazet96d36222010-06-02 19:21:31 +00002211 struct in_device *in_dev = __in_dev_get_rcu(dev);
David Aherne58e4152016-10-31 15:54:00 -07002212 int our = 0;
David Ahern5510cdf2017-05-25 10:42:34 -07002213 int err = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002214
Paolo Abeni22c74762019-03-06 10:42:53 +01002215 if (!in_dev)
2216 return err;
2217 our = ip_check_mc_rcu(in_dev, daddr, saddr,
2218 ip_hdr(skb)->protocol);
David Aherne58e4152016-10-31 15:54:00 -07002219
2220 /* check l3 master if no match yet */
Paolo Abeni22c74762019-03-06 10:42:53 +01002221 if (!our && netif_is_l3_slave(dev)) {
David Aherne58e4152016-10-31 15:54:00 -07002222 struct in_device *l3_in_dev;
2223
2224 l3_in_dev = __in_dev_get_rcu(skb->dev);
2225 if (l3_in_dev)
2226 our = ip_check_mc_rcu(l3_in_dev, daddr, saddr,
2227 ip_hdr(skb)->protocol);
2228 }
2229
David Aherne58e4152016-10-31 15:54:00 -07002230 if (our
Linus Torvalds1da177e2005-04-16 15:20:36 -07002231#ifdef CONFIG_IP_MROUTE
David Aherne58e4152016-10-31 15:54:00 -07002232 ||
2233 (!ipv4_is_local_multicast(daddr) &&
2234 IN_DEV_MFORWARD(in_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002235#endif
David Aherne58e4152016-10-31 15:54:00 -07002236 ) {
David Ahern5510cdf2017-05-25 10:42:34 -07002237 err = ip_route_input_mc(skb, daddr, saddr,
David Aherne58e4152016-10-31 15:54:00 -07002238 tos, dev, our);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002239 }
David Ahern5510cdf2017-05-25 10:42:34 -07002240 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002241 }
David Ahern5510cdf2017-05-25 10:42:34 -07002242
2243 return ip_route_input_slow(skb, daddr, saddr, tos, dev, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002244}
2245
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002246/* called with rcu_read_lock() */
David S. Miller982721f2011-02-16 21:44:24 -08002247static struct rtable *__mkroute_output(const struct fib_result *res,
David Miller1a00fee2012-07-01 02:02:56 +00002248 const struct flowi4 *fl4, int orig_oif,
Julian Anastasovf61759e2011-12-02 11:39:42 +00002249 struct net_device *dev_out,
David S. Miller5ada5522011-02-17 15:29:00 -08002250 unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002251{
David S. Miller982721f2011-02-16 21:44:24 -08002252 struct fib_info *fi = res->fi;
David S. Millerf2bb4be2012-07-17 12:20:47 -07002253 struct fib_nh_exception *fnhe;
David S. Miller5ada5522011-02-17 15:29:00 -08002254 struct in_device *in_dev;
David S. Miller982721f2011-02-16 21:44:24 -08002255 u16 type = res->type;
David S. Miller5ada5522011-02-17 15:29:00 -08002256 struct rtable *rth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00002257 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002258
Thomas Grafd0daebc32012-06-12 00:44:01 +00002259 in_dev = __in_dev_get_rcu(dev_out);
2260 if (!in_dev)
David S. Miller5ada5522011-02-17 15:29:00 -08002261 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002262
Thomas Grafd0daebc32012-06-12 00:44:01 +00002263 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
David Ahern5f02ce242016-09-10 12:09:54 -07002264 if (ipv4_is_loopback(fl4->saddr) &&
2265 !(dev_out->flags & IFF_LOOPBACK) &&
2266 !netif_is_l3_master(dev_out))
Thomas Grafd0daebc32012-06-12 00:44:01 +00002267 return ERR_PTR(-EINVAL);
2268
David S. Miller68a5e3d2011-03-11 20:07:33 -05002269 if (ipv4_is_lbcast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08002270 type = RTN_BROADCAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002271 else if (ipv4_is_multicast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08002272 type = RTN_MULTICAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002273 else if (ipv4_is_zeronet(fl4->daddr))
David S. Miller5ada5522011-02-17 15:29:00 -08002274 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002275
2276 if (dev_out->flags & IFF_LOOPBACK)
2277 flags |= RTCF_LOCAL;
2278
Julian Anastasov63617422012-11-22 23:04:14 +02002279 do_cache = true;
David S. Miller982721f2011-02-16 21:44:24 -08002280 if (type == RTN_BROADCAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002281 flags |= RTCF_BROADCAST | RTCF_LOCAL;
David S. Miller982721f2011-02-16 21:44:24 -08002282 fi = NULL;
2283 } else if (type == RTN_MULTICAST) {
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00002284 flags |= RTCF_MULTICAST | RTCF_LOCAL;
David S. Miller813b3b52011-04-28 14:48:42 -07002285 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
2286 fl4->flowi4_proto))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002287 flags &= ~RTCF_LOCAL;
Julian Anastasov63617422012-11-22 23:04:14 +02002288 else
2289 do_cache = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002290 /* If multicast route do not exist use
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00002291 * default one, but do not gateway in this case.
2292 * Yes, it is hack.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002293 */
David S. Miller982721f2011-02-16 21:44:24 -08002294 if (fi && res->prefixlen < 4)
2295 fi = NULL;
Chris Friesend6d5e992016-04-08 15:21:30 -06002296 } else if ((type == RTN_LOCAL) && (orig_oif != 0) &&
2297 (orig_oif != dev_out->ifindex)) {
2298 /* For local routes that require a particular output interface
2299 * we do not want to cache the result. Caching the result
2300 * causes incorrect behaviour when there are multiple source
2301 * addresses on the interface, the end result being that if the
2302 * intended recipient is waiting on that interface for the
2303 * packet he won't receive it because it will be delivered on
2304 * the loopback interface and the IP_PKTINFO ipi_ifindex will
2305 * be set to the loopback interface as well.
2306 */
Julian Anastasov94720e32018-05-02 09:41:19 +03002307 do_cache = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002308 }
2309
David S. Millerf2bb4be2012-07-17 12:20:47 -07002310 fnhe = NULL;
Julian Anastasov63617422012-11-22 23:04:14 +02002311 do_cache &= fi != NULL;
Julian Anastasov94720e32018-05-02 09:41:19 +03002312 if (fi) {
David Aherneba618a2019-04-02 14:11:55 -07002313 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
David S. Millerc5038a82012-07-31 15:02:02 -07002314 struct rtable __rcu **prth;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00002315
David Aherna5995e72019-04-30 07:45:50 -07002316 fnhe = find_exception(nhc, fl4->daddr);
Julian Anastasov94720e32018-05-02 09:41:19 +03002317 if (!do_cache)
2318 goto add;
Xin Longdeed49d2016-02-18 21:21:19 +08002319 if (fnhe) {
Timo Teräs2ffae992013-06-27 10:27:05 +03002320 prth = &fnhe->fnhe_rth_output;
Julian Anastasov94720e32018-05-02 09:41:19 +03002321 } else {
2322 if (unlikely(fl4->flowi4_flags &
2323 FLOWI_FLAG_KNOWN_NH &&
David Ahernbdf00462019-04-05 16:30:26 -07002324 !(nhc->nhc_gw_family &&
David Aherneba618a2019-04-02 14:11:55 -07002325 nhc->nhc_scope == RT_SCOPE_LINK))) {
Julian Anastasov94720e32018-05-02 09:41:19 +03002326 do_cache = false;
2327 goto add;
Julian Anastasovc92b9652012-10-08 11:41:19 +00002328 }
David Ahern0f457a32019-04-30 07:45:48 -07002329 prth = raw_cpu_ptr(nhc->nhc_pcpu_rth_output);
Julian Anastasovc92b9652012-10-08 11:41:19 +00002330 }
David S. Millerc5038a82012-07-31 15:02:02 -07002331 rth = rcu_dereference(*prth);
Wei Wang9df16ef2017-06-17 10:42:31 -07002332 if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst))
David S. Millerc5038a82012-07-31 15:02:02 -07002333 return rth;
David S. Millerf2bb4be2012-07-17 12:20:47 -07002334 }
Julian Anastasovc92b9652012-10-08 11:41:19 +00002335
2336add:
David Ahernd08c4f32015-09-02 13:58:34 -07002337 rth = rt_dst_alloc(dev_out, flags, type,
David S. Miller5c1e6aa2011-04-28 14:13:38 -07002338 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerf2bb4be2012-07-17 12:20:47 -07002339 IN_DEV_CONF_GET(in_dev, NOXFRM),
Julian Anastasovc92b9652012-10-08 11:41:19 +00002340 do_cache);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00002341 if (!rth)
David S. Miller5ada5522011-02-17 15:29:00 -08002342 return ERR_PTR(-ENOBUFS);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00002343
David Ahern9438c872017-08-11 17:02:02 -07002344 rth->rt_iif = orig_oif;
David Ahernb7503e02015-09-02 13:58:35 -07002345
Linus Torvalds1da177e2005-04-16 15:20:36 -07002346 RT_CACHE_STAT_INC(out_slow_tot);
2347
Linus Torvalds1da177e2005-04-16 15:20:36 -07002348 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002349 if (flags & RTCF_LOCAL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07002350 !(dev_out->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002351 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002352 RT_CACHE_STAT_INC(out_slow_mc);
2353 }
2354#ifdef CONFIG_IP_MROUTE
David S. Miller982721f2011-02-16 21:44:24 -08002355 if (type == RTN_MULTICAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002356 if (IN_DEV_MFORWARD(in_dev) &&
David S. Miller813b3b52011-04-28 14:48:42 -07002357 !ipv4_is_local_multicast(fl4->daddr)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002358 rth->dst.input = ip_mr_input;
2359 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002360 }
2361 }
2362#endif
2363 }
2364
Wei Wanga4c2fd72017-06-17 10:42:42 -07002365 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache);
David Ahern99428952018-02-13 20:32:04 -08002366 lwtunnel_set_redirect(&rth->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002367
David S. Miller5ada5522011-02-17 15:29:00 -08002368 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002369}
2370
Linus Torvalds1da177e2005-04-16 15:20:36 -07002371/*
2372 * Major route resolver routine.
2373 */
2374
David Ahern3abd1ade2017-05-25 10:42:33 -07002375struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
2376 const struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002377{
Julian Anastasovf61759e2011-12-02 11:39:42 +00002378 __u8 tos = RT_FL_TOS(fl4);
Eric Dumazetd0ea2b12018-04-07 13:42:42 -07002379 struct fib_result res = {
2380 .type = RTN_UNSPEC,
2381 .fi = NULL,
2382 .table = NULL,
2383 .tclassid = 0,
2384 };
David S. Miller5ada5522011-02-17 15:29:00 -08002385 struct rtable *rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002386
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002387 fl4->flowi4_iif = LOOPBACK_IFINDEX;
David S. Miller813b3b52011-04-28 14:48:42 -07002388 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
2389 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
2390 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
David S. Miller44713b62011-03-04 21:24:47 -08002391
David S. Miller010c2702011-02-17 15:37:09 -08002392 rcu_read_lock();
David Ahern3abd1ade2017-05-25 10:42:33 -07002393 rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
2394 rcu_read_unlock();
2395
2396 return rth;
2397}
2398EXPORT_SYMBOL_GPL(ip_route_output_key_hash);
2399
2400struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
2401 struct fib_result *res,
2402 const struct sk_buff *skb)
2403{
2404 struct net_device *dev_out = NULL;
2405 int orig_oif = fl4->flowi4_oif;
2406 unsigned int flags = 0;
2407 struct rtable *rth;
2408 int err = -ENETUNREACH;
2409
David S. Miller813b3b52011-04-28 14:48:42 -07002410 if (fl4->saddr) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08002411 rth = ERR_PTR(-EINVAL);
David S. Miller813b3b52011-04-28 14:48:42 -07002412 if (ipv4_is_multicast(fl4->saddr) ||
2413 ipv4_is_lbcast(fl4->saddr) ||
2414 ipv4_is_zeronet(fl4->saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002415 goto out;
2416
Linus Torvalds1da177e2005-04-16 15:20:36 -07002417 /* I removed check for oif == dev_out->oif here.
2418 It was wrong for two reasons:
Denis V. Lunev1ab35272008-01-22 22:04:30 -08002419 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
2420 is assigned to multiple interfaces.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002421 2. Moreover, we are allowed to send packets with saddr
2422 of another iface. --ANK
2423 */
2424
David S. Miller813b3b52011-04-28 14:48:42 -07002425 if (fl4->flowi4_oif == 0 &&
2426 (ipv4_is_multicast(fl4->daddr) ||
2427 ipv4_is_lbcast(fl4->daddr))) {
Julian Anastasova210d012008-10-01 07:28:28 -07002428 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07002429 dev_out = __ip_dev_find(net, fl4->saddr, false);
Ian Morris51456b22015-04-03 09:17:26 +01002430 if (!dev_out)
Julian Anastasova210d012008-10-01 07:28:28 -07002431 goto out;
2432
Linus Torvalds1da177e2005-04-16 15:20:36 -07002433 /* Special hack: user can direct multicasts
2434 and limited broadcast via necessary interface
2435 without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
2436 This hack is not just for fun, it allows
2437 vic,vat and friends to work.
2438 They bind socket to loopback, set ttl to zero
2439 and expect that it will work.
2440 From the viewpoint of routing cache they are broken,
2441 because we are not allowed to build multicast path
2442 with loopback source addr (look, routing cache
2443 cannot know, that ttl is zero, so that packet
2444 will not leave this host and route is valid).
2445 Luckily, this hack is good workaround.
2446 */
2447
David S. Miller813b3b52011-04-28 14:48:42 -07002448 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002449 goto make_route;
2450 }
Julian Anastasova210d012008-10-01 07:28:28 -07002451
David S. Miller813b3b52011-04-28 14:48:42 -07002452 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
Julian Anastasova210d012008-10-01 07:28:28 -07002453 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07002454 if (!__ip_dev_find(net, fl4->saddr, false))
Julian Anastasova210d012008-10-01 07:28:28 -07002455 goto out;
Julian Anastasova210d012008-10-01 07:28:28 -07002456 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002457 }
2458
2459
David S. Miller813b3b52011-04-28 14:48:42 -07002460 if (fl4->flowi4_oif) {
2461 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002462 rth = ERR_PTR(-ENODEV);
Ian Morris51456b22015-04-03 09:17:26 +01002463 if (!dev_out)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002464 goto out;
Herbert Xue5ed6392005-10-03 14:35:55 -07002465
2466 /* RACE: Check return value of inet_select_addr instead. */
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002467 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08002468 rth = ERR_PTR(-ENETUNREACH);
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002469 goto out;
2470 }
David S. Miller813b3b52011-04-28 14:48:42 -07002471 if (ipv4_is_local_multicast(fl4->daddr) ||
Andrew Lunn6a211652015-05-01 16:39:54 +02002472 ipv4_is_lbcast(fl4->daddr) ||
2473 fl4->flowi4_proto == IPPROTO_IGMP) {
David S. Miller813b3b52011-04-28 14:48:42 -07002474 if (!fl4->saddr)
2475 fl4->saddr = inet_select_addr(dev_out, 0,
2476 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002477 goto make_route;
2478 }
Jiri Benc0a7e2262013-10-04 17:04:48 +02002479 if (!fl4->saddr) {
David S. Miller813b3b52011-04-28 14:48:42 -07002480 if (ipv4_is_multicast(fl4->daddr))
2481 fl4->saddr = inet_select_addr(dev_out, 0,
2482 fl4->flowi4_scope);
2483 else if (!fl4->daddr)
2484 fl4->saddr = inet_select_addr(dev_out, 0,
2485 RT_SCOPE_HOST);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002486 }
2487 }
2488
David S. Miller813b3b52011-04-28 14:48:42 -07002489 if (!fl4->daddr) {
2490 fl4->daddr = fl4->saddr;
2491 if (!fl4->daddr)
2492 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
Denis V. Lunevb40afd02008-01-22 22:06:19 -08002493 dev_out = net->loopback_dev;
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002494 fl4->flowi4_oif = LOOPBACK_IFINDEX;
David Ahern3abd1ade2017-05-25 10:42:33 -07002495 res->type = RTN_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002496 flags |= RTCF_LOCAL;
2497 goto make_route;
2498 }
2499
David Ahern3abd1ade2017-05-25 10:42:33 -07002500 err = fib_lookup(net, fl4, res, 0);
Nikola Forró0315e382015-09-17 16:01:32 +02002501 if (err) {
David Ahern3abd1ade2017-05-25 10:42:33 -07002502 res->fi = NULL;
2503 res->table = NULL;
David Ahern6104e112016-10-12 13:20:11 -07002504 if (fl4->flowi4_oif &&
David Aherne58e4152016-10-31 15:54:00 -07002505 (ipv4_is_multicast(fl4->daddr) ||
2506 !netif_index_is_l3_master(net, fl4->flowi4_oif))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002507 /* Apparently, routing tables are wrong. Assume,
2508 that the destination is on link.
2509
2510 WHY? DW.
2511 Because we are allowed to send to iface
2512 even if it has NO routes and NO assigned
2513 addresses. When oif is specified, routing
2514 tables are looked up with only one purpose:
2515 to catch if destination is gatewayed, rather than
2516 direct. Moreover, if MSG_DONTROUTE is set,
2517 we send packet, ignoring both routing tables
2518 and ifaddr state. --ANK
2519
2520
2521 We could make it even if oif is unknown,
2522 likely IPv6, but we do not.
2523 */
2524
David S. Miller813b3b52011-04-28 14:48:42 -07002525 if (fl4->saddr == 0)
2526 fl4->saddr = inet_select_addr(dev_out, 0,
2527 RT_SCOPE_LINK);
David Ahern3abd1ade2017-05-25 10:42:33 -07002528 res->type = RTN_UNICAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002529 goto make_route;
2530 }
Nikola Forró0315e382015-09-17 16:01:32 +02002531 rth = ERR_PTR(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002532 goto out;
2533 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002534
David Ahern3abd1ade2017-05-25 10:42:33 -07002535 if (res->type == RTN_LOCAL) {
David S. Miller813b3b52011-04-28 14:48:42 -07002536 if (!fl4->saddr) {
David Ahern3abd1ade2017-05-25 10:42:33 -07002537 if (res->fi->fib_prefsrc)
2538 fl4->saddr = res->fi->fib_prefsrc;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002539 else
David S. Miller813b3b52011-04-28 14:48:42 -07002540 fl4->saddr = fl4->daddr;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002541 }
David Ahern5f02ce242016-09-10 12:09:54 -07002542
2543 /* L3 master device is the loopback for that domain */
David Ahern3abd1ade2017-05-25 10:42:33 -07002544 dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? :
Robert Shearmanb7c84872017-04-21 21:34:59 +01002545 net->loopback_dev;
David Ahern839da4d2017-08-10 13:49:10 -07002546
2547 /* make sure orig_oif points to fib result device even
2548 * though packet rx/tx happens over loopback or l3mdev
2549 */
2550 orig_oif = FIB_RES_OIF(*res);
2551
David S. Miller813b3b52011-04-28 14:48:42 -07002552 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002553 flags |= RTCF_LOCAL;
2554 goto make_route;
2555 }
2556
David Ahern3abd1ade2017-05-25 10:42:33 -07002557 fib_select_path(net, res, fl4, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002558
David Ahern3abd1ade2017-05-25 10:42:33 -07002559 dev_out = FIB_RES_DEV(*res);
David S. Miller813b3b52011-04-28 14:48:42 -07002560 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002561
2562
2563make_route:
David Ahern3abd1ade2017-05-25 10:42:33 -07002564 rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002565
David S. Miller010c2702011-02-17 15:37:09 -08002566out:
David S. Millerb23dd4f2011-03-02 14:31:35 -08002567 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002568}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002569
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002570static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2571{
2572 return NULL;
2573}
2574
Steffen Klassertebb762f2011-11-23 02:12:51 +00002575static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
Roland Dreierec831ea2011-01-31 13:16:00 -08002576{
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002577 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2578
2579 return mtu ? : dst->dev->mtu;
Roland Dreierec831ea2011-01-31 13:16:00 -08002580}
2581
David S. Miller6700c272012-07-17 03:29:28 -07002582static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2583 struct sk_buff *skb, u32 mtu)
David S. Miller14e50e52007-05-24 18:17:54 -07002584{
2585}
2586
David S. Miller6700c272012-07-17 03:29:28 -07002587static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2588 struct sk_buff *skb)
David S. Millerb587ee32012-07-12 00:39:24 -07002589{
2590}
2591
Held Bernhard0972ddb2011-04-24 22:07:32 +00002592static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2593 unsigned long old)
2594{
2595 return NULL;
2596}
2597
David S. Miller14e50e52007-05-24 18:17:54 -07002598static struct dst_ops ipv4_dst_blackhole_ops = {
2599 .family = AF_INET,
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002600 .check = ipv4_blackhole_dst_check,
Steffen Klassertebb762f2011-11-23 02:12:51 +00002601 .mtu = ipv4_blackhole_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -08002602 .default_advmss = ipv4_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -07002603 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
David S. Millerb587ee32012-07-12 00:39:24 -07002604 .redirect = ipv4_rt_blackhole_redirect,
Held Bernhard0972ddb2011-04-24 22:07:32 +00002605 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -07002606 .neigh_lookup = ipv4_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -07002607};
2608
David S. Miller2774c132011-03-01 14:59:04 -08002609struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -07002610{
David S. Miller2774c132011-03-01 14:59:04 -08002611 struct rtable *ort = (struct rtable *) dst_orig;
David S. Millerf5b0a872012-07-19 12:31:33 -07002612 struct rtable *rt;
David S. Miller14e50e52007-05-24 18:17:54 -07002613
Steffen Klassert6c0e7282017-10-09 08:43:55 +02002614 rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0);
David S. Miller14e50e52007-05-24 18:17:54 -07002615 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002616 struct dst_entry *new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -07002617
David S. Miller14e50e52007-05-24 18:17:54 -07002618 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -08002619 new->input = dst_discard;
Eric W. Biedermanede20592015-10-07 16:48:47 -05002620 new->output = dst_discard_out;
David S. Miller14e50e52007-05-24 18:17:54 -07002621
Wei Wang1dbe32522017-06-17 10:42:26 -07002622 new->dev = net->loopback_dev;
David S. Miller14e50e52007-05-24 18:17:54 -07002623 if (new->dev)
2624 dev_hold(new->dev);
2625
David S. Miller9917e1e82012-07-17 14:44:26 -07002626 rt->rt_is_input = ort->rt_is_input;
David S. Miller5e2b61f2011-03-04 21:47:09 -08002627 rt->rt_iif = ort->rt_iif;
David S. Miller59436342012-07-10 06:58:42 -07002628 rt->rt_pmtu = ort->rt_pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01002629 rt->rt_mtu_locked = ort->rt_mtu_locked;
David S. Miller14e50e52007-05-24 18:17:54 -07002630
fan.duca4c3fc2013-07-30 08:33:53 +08002631 rt->rt_genid = rt_genid_ipv4(net);
David S. Miller14e50e52007-05-24 18:17:54 -07002632 rt->rt_flags = ort->rt_flags;
2633 rt->rt_type = ort->rt_type;
David Ahern1550c172019-04-05 16:30:27 -07002634 rt->rt_gw_family = ort->rt_gw_family;
2635 if (rt->rt_gw_family == AF_INET)
2636 rt->rt_gw4 = ort->rt_gw4;
David Ahern0f5f7d72019-04-05 16:30:29 -07002637 else if (rt->rt_gw_family == AF_INET6)
2638 rt->rt_gw6 = ort->rt_gw6;
David S. Miller14e50e52007-05-24 18:17:54 -07002639
David S. Millercaacf052012-07-31 15:06:50 -07002640 INIT_LIST_HEAD(&rt->rt_uncached);
David S. Miller14e50e52007-05-24 18:17:54 -07002641 }
2642
David S. Miller2774c132011-03-01 14:59:04 -08002643 dst_release(dst_orig);
2644
2645 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -07002646}
2647
David S. Miller9d6ec932011-03-12 01:12:47 -05002648struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
Eric Dumazet6f9c9612015-09-25 07:39:10 -07002649 const struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002650{
David S. Miller9d6ec932011-03-12 01:12:47 -05002651 struct rtable *rt = __ip_route_output_key(net, flp4);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002652
David S. Millerb23dd4f2011-03-02 14:31:35 -08002653 if (IS_ERR(rt))
2654 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002655
David S. Miller56157872011-05-02 14:37:45 -07002656 if (flp4->flowi4_proto)
Steffen Klassertf92ee612014-09-16 10:08:40 +02002657 rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
2658 flowi4_to_flowi(flp4),
2659 sk, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002660
David S. Millerb23dd4f2011-03-02 14:31:35 -08002661 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002662}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002663EXPORT_SYMBOL_GPL(ip_route_output_flow);
2664
David Ahern3765d352017-05-25 10:42:36 -07002665/* called with rcu_read_lock held */
Roopa Prabhu404eb772018-05-22 14:03:27 -07002666static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
2667 struct rtable *rt, u32 table_id, struct flowi4 *fl4,
2668 struct sk_buff *skb, u32 portid, u32 seq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002669{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002670 struct rtmsg *r;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002671 struct nlmsghdr *nlh;
Steffen Klassert2bc8ca42011-10-11 01:12:02 +00002672 unsigned long expires = 0;
David S. Millerf1850712012-07-10 07:26:01 -07002673 u32 error;
Julian Anastasov521f5492012-07-20 12:02:08 +03002674 u32 metrics[RTAX_MAX];
Thomas Grafbe403ea2006-08-17 18:15:17 -07002675
David Ahernd3166e02017-05-25 10:42:35 -07002676 nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), 0);
Ian Morris51456b22015-04-03 09:17:26 +01002677 if (!nlh)
Patrick McHardy26932562007-01-31 23:16:40 -08002678 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002679
2680 r = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002681 r->rtm_family = AF_INET;
2682 r->rtm_dst_len = 32;
2683 r->rtm_src_len = 0;
David Millerd6c0a4f2012-07-01 02:02:59 +00002684 r->rtm_tos = fl4->flowi4_tos;
David Ahern8a430ed2017-01-11 15:42:17 -08002685 r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT;
David Ahernc36ba662015-09-02 13:58:36 -07002686 if (nla_put_u32(skb, RTA_TABLE, table_id))
David S. Millerf3756b72012-04-01 20:39:02 -04002687 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002688 r->rtm_type = rt->rt_type;
2689 r->rtm_scope = RT_SCOPE_UNIVERSE;
2690 r->rtm_protocol = RTPROT_UNSPEC;
2691 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2692 if (rt->rt_flags & RTCF_NOTIFY)
2693 r->rtm_flags |= RTM_F_NOTIFY;
Hannes Frederic Sowadf4d9252015-01-23 12:01:26 +01002694 if (IPCB(skb)->flags & IPSKB_DOREDIRECT)
2695 r->rtm_flags |= RTCF_DOREDIRECT;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002696
Jiri Benc930345e2015-03-29 16:59:25 +02002697 if (nla_put_in_addr(skb, RTA_DST, dst))
David S. Millerf3756b72012-04-01 20:39:02 -04002698 goto nla_put_failure;
David Miller1a00fee2012-07-01 02:02:56 +00002699 if (src) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002700 r->rtm_src_len = 32;
Jiri Benc930345e2015-03-29 16:59:25 +02002701 if (nla_put_in_addr(skb, RTA_SRC, src))
David S. Millerf3756b72012-04-01 20:39:02 -04002702 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002703 }
David S. Millerf3756b72012-04-01 20:39:02 -04002704 if (rt->dst.dev &&
2705 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2706 goto nla_put_failure;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002707#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf3756b72012-04-01 20:39:02 -04002708 if (rt->dst.tclassid &&
2709 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2710 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002711#endif
David S. Miller41347dc2012-06-28 04:05:27 -07002712 if (!rt_is_input_route(rt) &&
David Millerd6c0a4f2012-07-01 02:02:59 +00002713 fl4->saddr != src) {
Jiri Benc930345e2015-03-29 16:59:25 +02002714 if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
David S. Millerf3756b72012-04-01 20:39:02 -04002715 goto nla_put_failure;
2716 }
David Ahern1550c172019-04-05 16:30:27 -07002717 if (rt->rt_gw_family == AF_INET &&
David Ahern0f5f7d72019-04-05 16:30:29 -07002718 nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) {
David S. Millerf3756b72012-04-01 20:39:02 -04002719 goto nla_put_failure;
David Ahern0f5f7d72019-04-05 16:30:29 -07002720 } else if (rt->rt_gw_family == AF_INET6) {
2721 int alen = sizeof(struct in6_addr);
2722 struct nlattr *nla;
2723 struct rtvia *via;
2724
2725 nla = nla_reserve(skb, RTA_VIA, alen + 2);
2726 if (!nla)
2727 goto nla_put_failure;
2728
2729 via = nla_data(nla);
2730 via->rtvia_family = AF_INET6;
2731 memcpy(via->rtvia_addr, &rt->rt_gw6, alen);
2732 }
Thomas Grafbe403ea2006-08-17 18:15:17 -07002733
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002734 expires = rt->dst.expires;
2735 if (expires) {
2736 unsigned long now = jiffies;
2737
2738 if (time_before(now, expires))
2739 expires -= now;
2740 else
2741 expires = 0;
2742 }
2743
Julian Anastasov521f5492012-07-20 12:02:08 +03002744 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002745 if (rt->rt_pmtu && expires)
Julian Anastasov521f5492012-07-20 12:02:08 +03002746 metrics[RTAX_MTU - 1] = rt->rt_pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01002747 if (rt->rt_mtu_locked && expires)
2748 metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU);
Julian Anastasov521f5492012-07-20 12:02:08 +03002749 if (rtnetlink_put_metrics(skb, metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -07002750 goto nla_put_failure;
2751
David Millerb4869882012-07-01 02:03:01 +00002752 if (fl4->flowi4_mark &&
stephen hemminger68aaed52012-10-10 08:27:25 +00002753 nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
David S. Millerf3756b72012-04-01 20:39:02 -04002754 goto nla_put_failure;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002755
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002756 if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
2757 nla_put_u32(skb, RTA_UID,
2758 from_kuid_munged(current_user_ns(), fl4->flowi4_uid)))
2759 goto nla_put_failure;
2760
Changli Gaod8d1f302010-06-10 23:31:35 -07002761 error = rt->dst.error;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002762
David S. Millerc7537962010-11-11 17:07:48 -08002763 if (rt_is_input_route(rt)) {
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002764#ifdef CONFIG_IP_MROUTE
2765 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2766 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2767 int err = ipmr_get_route(net, skb,
2768 fl4->saddr, fl4->daddr,
David Ahern9f09eae2017-01-06 17:39:06 -08002769 r, portid);
Nikolay Aleksandrov2cf75072016-09-25 23:08:31 +02002770
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002771 if (err <= 0) {
David Ahern0c8d8032017-01-05 19:32:46 -08002772 if (err == 0)
2773 return 0;
2774 goto nla_put_failure;
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002775 }
2776 } else
2777#endif
Roopa Prabhu404eb772018-05-22 14:03:27 -07002778 if (nla_put_u32(skb, RTA_IIF, fl4->flowi4_iif))
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002779 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002780 }
2781
David S. Millerf1850712012-07-10 07:26:01 -07002782 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002783 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002784
Johannes Berg053c0952015-01-16 22:09:00 +01002785 nlmsg_end(skb, nlh);
2786 return 0;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002787
2788nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002789 nlmsg_cancel(skb, nlh);
2790 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002791}
2792
Roopa Prabhu404eb772018-05-22 14:03:27 -07002793static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst,
2794 u8 ip_proto, __be16 sport,
2795 __be16 dport)
2796{
2797 struct sk_buff *skb;
2798 struct iphdr *iph;
2799
2800 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2801 if (!skb)
2802 return NULL;
2803
2804 /* Reserve room for dummy headers, this skb can pass
2805 * through good chunk of routing engine.
2806 */
2807 skb_reset_mac_header(skb);
2808 skb_reset_network_header(skb);
2809 skb->protocol = htons(ETH_P_IP);
2810 iph = skb_put(skb, sizeof(struct iphdr));
2811 iph->protocol = ip_proto;
2812 iph->saddr = src;
2813 iph->daddr = dst;
2814 iph->version = 0x4;
2815 iph->frag_off = 0;
2816 iph->ihl = 0x5;
2817 skb_set_transport_header(skb, skb->len);
2818
2819 switch (iph->protocol) {
2820 case IPPROTO_UDP: {
2821 struct udphdr *udph;
2822
2823 udph = skb_put_zero(skb, sizeof(struct udphdr));
2824 udph->source = sport;
2825 udph->dest = dport;
2826 udph->len = sizeof(struct udphdr);
2827 udph->check = 0;
2828 break;
2829 }
2830 case IPPROTO_TCP: {
2831 struct tcphdr *tcph;
2832
2833 tcph = skb_put_zero(skb, sizeof(struct tcphdr));
2834 tcph->source = sport;
2835 tcph->dest = dport;
2836 tcph->doff = sizeof(struct tcphdr) / 4;
2837 tcph->rst = 1;
2838 tcph->check = ~tcp_v4_check(sizeof(struct tcphdr),
2839 src, dst, 0);
2840 break;
2841 }
2842 case IPPROTO_ICMP: {
2843 struct icmphdr *icmph;
2844
2845 icmph = skb_put_zero(skb, sizeof(struct icmphdr));
2846 icmph->type = ICMP_ECHO;
2847 icmph->code = 0;
2848 }
2849 }
2850
2851 return skb;
2852}
2853
Jakub Kicinskia00302b62019-01-18 10:46:19 -08002854static int inet_rtm_valid_getroute_req(struct sk_buff *skb,
2855 const struct nlmsghdr *nlh,
2856 struct nlattr **tb,
2857 struct netlink_ext_ack *extack)
2858{
2859 struct rtmsg *rtm;
2860 int i, err;
2861
2862 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
2863 NL_SET_ERR_MSG(extack,
2864 "ipv4: Invalid header for route get request");
2865 return -EINVAL;
2866 }
2867
2868 if (!netlink_strict_get_check(skb))
Johannes Berg8cb08172019-04-26 14:07:28 +02002869 return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
2870 rtm_ipv4_policy, extack);
Jakub Kicinskia00302b62019-01-18 10:46:19 -08002871
2872 rtm = nlmsg_data(nlh);
2873 if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) ||
2874 (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) ||
2875 rtm->rtm_table || rtm->rtm_protocol ||
2876 rtm->rtm_scope || rtm->rtm_type) {
2877 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for route get request");
2878 return -EINVAL;
2879 }
2880
2881 if (rtm->rtm_flags & ~(RTM_F_NOTIFY |
2882 RTM_F_LOOKUP_TABLE |
2883 RTM_F_FIB_MATCH)) {
2884 NL_SET_ERR_MSG(extack, "ipv4: Unsupported rtm_flags for route get request");
2885 return -EINVAL;
2886 }
2887
Johannes Berg8cb08172019-04-26 14:07:28 +02002888 err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
2889 rtm_ipv4_policy, extack);
Jakub Kicinskia00302b62019-01-18 10:46:19 -08002890 if (err)
2891 return err;
2892
2893 if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
2894 (tb[RTA_DST] && !rtm->rtm_dst_len)) {
2895 NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4");
2896 return -EINVAL;
2897 }
2898
2899 for (i = 0; i <= RTA_MAX; i++) {
2900 if (!tb[i])
2901 continue;
2902
2903 switch (i) {
2904 case RTA_IIF:
2905 case RTA_OIF:
2906 case RTA_SRC:
2907 case RTA_DST:
2908 case RTA_IP_PROTO:
2909 case RTA_SPORT:
2910 case RTA_DPORT:
2911 case RTA_MARK:
2912 case RTA_UID:
2913 break;
2914 default:
2915 NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in route get request");
2916 return -EINVAL;
2917 }
2918 }
2919
2920 return 0;
2921}
2922
David Ahernc21ef3e2017-04-16 09:48:24 -07002923static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2924 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002925{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002926 struct net *net = sock_net(in_skb->sk);
Thomas Grafd889ce32006-08-17 18:15:44 -07002927 struct nlattr *tb[RTA_MAX+1];
Roopa Prabhu404eb772018-05-22 14:03:27 -07002928 u32 table_id = RT_TABLE_MAIN;
2929 __be16 sport = 0, dport = 0;
David Ahern3765d352017-05-25 10:42:36 -07002930 struct fib_result res = {};
Roopa Prabhu404eb772018-05-22 14:03:27 -07002931 u8 ip_proto = IPPROTO_UDP;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002932 struct rtable *rt = NULL;
Roopa Prabhu404eb772018-05-22 14:03:27 -07002933 struct sk_buff *skb;
2934 struct rtmsg *rtm;
Maciej Żenczykowskie8e3fbe2018-09-29 23:44:47 -07002935 struct flowi4 fl4 = {};
Al Viro9e12bb22006-09-26 21:25:20 -07002936 __be32 dst = 0;
2937 __be32 src = 0;
Roopa Prabhu404eb772018-05-22 14:03:27 -07002938 kuid_t uid;
Al Viro9e12bb22006-09-26 21:25:20 -07002939 u32 iif;
Thomas Grafd889ce32006-08-17 18:15:44 -07002940 int err;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002941 int mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002942
Jakub Kicinskia00302b62019-01-18 10:46:19 -08002943 err = inet_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
Thomas Grafd889ce32006-08-17 18:15:44 -07002944 if (err < 0)
Roopa Prabhu404eb772018-05-22 14:03:27 -07002945 return err;
Thomas Grafd889ce32006-08-17 18:15:44 -07002946
2947 rtm = nlmsg_data(nlh);
Jiri Benc67b61f62015-03-29 16:59:26 +02002948 src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
2949 dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
Thomas Grafd889ce32006-08-17 18:15:44 -07002950 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002951 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002952 if (tb[RTA_UID])
2953 uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID]));
2954 else
2955 uid = (iif ? INVALID_UID : current_uid());
Linus Torvalds1da177e2005-04-16 15:20:36 -07002956
Roopa Prabhu404eb772018-05-22 14:03:27 -07002957 if (tb[RTA_IP_PROTO]) {
2958 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
Hangbin Liu5e1a99e2019-02-27 16:15:29 +08002959 &ip_proto, AF_INET, extack);
Roopa Prabhu404eb772018-05-22 14:03:27 -07002960 if (err)
2961 return err;
2962 }
Florian Laryschbbadb9a2017-04-07 14:42:20 +02002963
Roopa Prabhu404eb772018-05-22 14:03:27 -07002964 if (tb[RTA_SPORT])
2965 sport = nla_get_be16(tb[RTA_SPORT]);
2966
2967 if (tb[RTA_DPORT])
2968 dport = nla_get_be16(tb[RTA_DPORT]);
2969
2970 skb = inet_rtm_getroute_build_skb(src, dst, ip_proto, sport, dport);
2971 if (!skb)
2972 return -ENOBUFS;
Florian Laryschbbadb9a2017-04-07 14:42:20 +02002973
David Millerd6c0a4f2012-07-01 02:02:59 +00002974 fl4.daddr = dst;
2975 fl4.saddr = src;
2976 fl4.flowi4_tos = rtm->rtm_tos;
2977 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
2978 fl4.flowi4_mark = mark;
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002979 fl4.flowi4_uid = uid;
Roopa Prabhu404eb772018-05-22 14:03:27 -07002980 if (sport)
2981 fl4.fl4_sport = sport;
2982 if (dport)
2983 fl4.fl4_dport = dport;
2984 fl4.flowi4_proto = ip_proto;
David Millerd6c0a4f2012-07-01 02:02:59 +00002985
David Ahern3765d352017-05-25 10:42:36 -07002986 rcu_read_lock();
2987
Linus Torvalds1da177e2005-04-16 15:20:36 -07002988 if (iif) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002989 struct net_device *dev;
2990
David Ahern3765d352017-05-25 10:42:36 -07002991 dev = dev_get_by_index_rcu(net, iif);
Ian Morris51456b22015-04-03 09:17:26 +01002992 if (!dev) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002993 err = -ENODEV;
Roopa Prabhu404eb772018-05-22 14:03:27 -07002994 goto errout_rcu;
Thomas Grafd889ce32006-08-17 18:15:44 -07002995 }
2996
Roopa Prabhu404eb772018-05-22 14:03:27 -07002997 fl4.flowi4_iif = iif; /* for rt_fill_info */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002998 skb->dev = dev;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002999 skb->mark = mark;
David Ahern3765d352017-05-25 10:42:36 -07003000 err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos,
3001 dev, &res);
Thomas Grafd889ce32006-08-17 18:15:44 -07003002
Eric Dumazet511c3f92009-06-02 05:14:27 +00003003 rt = skb_rtable(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07003004 if (err == 0 && rt->dst.error)
3005 err = -rt->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003006 } else {
Lorenzo Colitti6503a302018-01-11 18:36:26 +09003007 fl4.flowi4_iif = LOOPBACK_IFINDEX;
Ido Schimmel21f94772018-12-20 17:03:27 +00003008 skb->dev = net->loopback_dev;
David Ahern3765d352017-05-25 10:42:36 -07003009 rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb);
David S. Millerb23dd4f2011-03-02 14:31:35 -08003010 err = 0;
3011 if (IS_ERR(rt))
3012 err = PTR_ERR(rt);
Florian Westphal2c87d632017-08-14 00:52:58 +02003013 else
3014 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003015 }
Thomas Grafd889ce32006-08-17 18:15:44 -07003016
Linus Torvalds1da177e2005-04-16 15:20:36 -07003017 if (err)
Roopa Prabhu404eb772018-05-22 14:03:27 -07003018 goto errout_rcu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003019
Linus Torvalds1da177e2005-04-16 15:20:36 -07003020 if (rtm->rtm_flags & RTM_F_NOTIFY)
3021 rt->rt_flags |= RTCF_NOTIFY;
3022
David Ahernc36ba662015-09-02 13:58:36 -07003023 if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
David Ahern68e813a2018-02-14 14:24:28 -08003024 table_id = res.table ? res.table->tb_id : 0;
David Ahernc36ba662015-09-02 13:58:36 -07003025
Roopa Prabhu404eb772018-05-22 14:03:27 -07003026 /* reset skb for netlink reply msg */
3027 skb_trim(skb, 0);
3028 skb_reset_network_header(skb);
3029 skb_reset_transport_header(skb);
3030 skb_reset_mac_header(skb);
3031
Roopa Prabhubc3aae22017-08-16 12:38:52 -07003032 if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
3033 if (!res.fi) {
3034 err = fib_props[res.type].error;
3035 if (!err)
3036 err = -EHOSTUNREACH;
Roopa Prabhu404eb772018-05-22 14:03:27 -07003037 goto errout_rcu;
Roopa Prabhubc3aae22017-08-16 12:38:52 -07003038 }
Roopa Prabhub6179812017-05-25 10:42:39 -07003039 err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
3040 nlh->nlmsg_seq, RTM_NEWROUTE, table_id,
3041 rt->rt_type, res.prefix, res.prefixlen,
3042 fl4.flowi4_tos, res.fi, 0);
Roopa Prabhubc3aae22017-08-16 12:38:52 -07003043 } else {
Roopa Prabhu404eb772018-05-22 14:03:27 -07003044 err = rt_fill_info(net, dst, src, rt, table_id, &fl4, skb,
Roopa Prabhuba52d612017-05-31 22:53:25 -07003045 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq);
Roopa Prabhubc3aae22017-08-16 12:38:52 -07003046 }
David S. Miller7b46a642015-01-18 23:36:08 -05003047 if (err < 0)
Roopa Prabhu404eb772018-05-22 14:03:27 -07003048 goto errout_rcu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003049
David Ahern3765d352017-05-25 10:42:36 -07003050 rcu_read_unlock();
3051
Eric W. Biederman15e47302012-09-07 20:12:54 +00003052 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003053
Thomas Grafd889ce32006-08-17 18:15:44 -07003054errout_free:
Roopa Prabhu404eb772018-05-22 14:03:27 -07003055 return err;
3056errout_rcu:
David Ahern3765d352017-05-25 10:42:36 -07003057 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003058 kfree_skb(skb);
Roopa Prabhu404eb772018-05-22 14:03:27 -07003059 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003060}
3061
Linus Torvalds1da177e2005-04-16 15:20:36 -07003062void ip_rt_multicast_event(struct in_device *in_dev)
3063{
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00003064 rt_cache_flush(dev_net(in_dev->dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07003065}
3066
3067#ifdef CONFIG_SYSCTL
Gao feng082c7ca2013-02-19 00:43:12 +00003068static int ip_rt_gc_interval __read_mostly = 60 * HZ;
3069static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
3070static int ip_rt_gc_elasticity __read_mostly = 8;
Arnd Bergmann773daa32018-02-28 14:32:48 +01003071static int ip_min_valid_pmtu __read_mostly = IPV4_MIN_MTU;
Gao feng082c7ca2013-02-19 00:43:12 +00003072
Joe Perchesfe2c6332013-06-11 23:04:25 -07003073static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07003074 void __user *buffer,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003075 size_t *lenp, loff_t *ppos)
3076{
Timo Teräs5aad1de2013-05-27 20:46:33 +00003077 struct net *net = (struct net *)__ctl->extra1;
3078
Linus Torvalds1da177e2005-04-16 15:20:36 -07003079 if (write) {
Timo Teräs5aad1de2013-05-27 20:46:33 +00003080 rt_cache_flush(net);
3081 fnhe_genid_bump(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003082 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09003083 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003084
3085 return -EINVAL;
3086}
3087
Joe Perchesfe2c6332013-06-11 23:04:25 -07003088static struct ctl_table ipv4_route_table[] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09003089 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003090 .procname = "gc_thresh",
3091 .data = &ipv4_dst_ops.gc_thresh,
3092 .maxlen = sizeof(int),
3093 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003094 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003095 },
3096 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003097 .procname = "max_size",
3098 .data = &ip_rt_max_size,
3099 .maxlen = sizeof(int),
3100 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003101 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003102 },
3103 {
3104 /* Deprecated. Use gc_min_interval_ms */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09003105
Linus Torvalds1da177e2005-04-16 15:20:36 -07003106 .procname = "gc_min_interval",
3107 .data = &ip_rt_gc_min_interval,
3108 .maxlen = sizeof(int),
3109 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003110 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003111 },
3112 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003113 .procname = "gc_min_interval_ms",
3114 .data = &ip_rt_gc_min_interval,
3115 .maxlen = sizeof(int),
3116 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003117 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003118 },
3119 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003120 .procname = "gc_timeout",
3121 .data = &ip_rt_gc_timeout,
3122 .maxlen = sizeof(int),
3123 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003124 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003125 },
3126 {
Eric Dumazet9f28a2f2011-12-21 15:47:16 -05003127 .procname = "gc_interval",
3128 .data = &ip_rt_gc_interval,
3129 .maxlen = sizeof(int),
3130 .mode = 0644,
3131 .proc_handler = proc_dointvec_jiffies,
3132 },
3133 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003134 .procname = "redirect_load",
3135 .data = &ip_rt_redirect_load,
3136 .maxlen = sizeof(int),
3137 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003138 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003139 },
3140 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003141 .procname = "redirect_number",
3142 .data = &ip_rt_redirect_number,
3143 .maxlen = sizeof(int),
3144 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003145 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003146 },
3147 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003148 .procname = "redirect_silence",
3149 .data = &ip_rt_redirect_silence,
3150 .maxlen = sizeof(int),
3151 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003152 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003153 },
3154 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003155 .procname = "error_cost",
3156 .data = &ip_rt_error_cost,
3157 .maxlen = sizeof(int),
3158 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003159 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003160 },
3161 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003162 .procname = "error_burst",
3163 .data = &ip_rt_error_burst,
3164 .maxlen = sizeof(int),
3165 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003166 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003167 },
3168 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003169 .procname = "gc_elasticity",
3170 .data = &ip_rt_gc_elasticity,
3171 .maxlen = sizeof(int),
3172 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003173 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003174 },
3175 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003176 .procname = "mtu_expires",
3177 .data = &ip_rt_mtu_expires,
3178 .maxlen = sizeof(int),
3179 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003180 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003181 },
3182 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003183 .procname = "min_pmtu",
3184 .data = &ip_rt_min_pmtu,
3185 .maxlen = sizeof(int),
3186 .mode = 0644,
Sabrina Dubrocac7272c22018-02-26 16:13:43 +01003187 .proc_handler = proc_dointvec_minmax,
3188 .extra1 = &ip_min_valid_pmtu,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003189 },
3190 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003191 .procname = "min_adv_mss",
3192 .data = &ip_rt_min_advmss,
3193 .maxlen = sizeof(int),
3194 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003195 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003196 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08003197 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003198};
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003199
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003200static struct ctl_table ipv4_route_flush_table[] = {
3201 {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003202 .procname = "flush",
3203 .maxlen = sizeof(int),
3204 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003205 .proc_handler = ipv4_sysctl_rtcache_flush,
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003206 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08003207 { },
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003208};
3209
3210static __net_init int sysctl_route_net_init(struct net *net)
3211{
3212 struct ctl_table *tbl;
3213
3214 tbl = ipv4_route_flush_table;
Octavian Purdila09ad9bc2009-11-25 15:14:13 -08003215 if (!net_eq(net, &init_net)) {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003216 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
Ian Morris51456b22015-04-03 09:17:26 +01003217 if (!tbl)
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003218 goto err_dup;
Eric W. Biederman464dc802012-11-16 03:02:59 +00003219
3220 /* Don't export sysctls to unprivileged users */
3221 if (net->user_ns != &init_user_ns)
3222 tbl[0].procname = NULL;
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003223 }
3224 tbl[0].extra1 = net;
3225
Eric W. Biedermanec8f23c2012-04-19 13:44:49 +00003226 net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
Ian Morris51456b22015-04-03 09:17:26 +01003227 if (!net->ipv4.route_hdr)
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003228 goto err_reg;
3229 return 0;
3230
3231err_reg:
3232 if (tbl != ipv4_route_flush_table)
3233 kfree(tbl);
3234err_dup:
3235 return -ENOMEM;
3236}
3237
3238static __net_exit void sysctl_route_net_exit(struct net *net)
3239{
3240 struct ctl_table *tbl;
3241
3242 tbl = net->ipv4.route_hdr->ctl_table_arg;
3243 unregister_net_sysctl_table(net->ipv4.route_hdr);
3244 BUG_ON(tbl == ipv4_route_flush_table);
3245 kfree(tbl);
3246}
3247
3248static __net_initdata struct pernet_operations sysctl_route_ops = {
3249 .init = sysctl_route_net_init,
3250 .exit = sysctl_route_net_exit,
3251};
Linus Torvalds1da177e2005-04-16 15:20:36 -07003252#endif
3253
Neil Horman3ee94372010-05-08 01:57:52 -07003254static __net_init int rt_genid_init(struct net *net)
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003255{
fan.duca4c3fc2013-07-30 08:33:53 +08003256 atomic_set(&net->ipv4.rt_genid, 0);
Timo Teräs5aad1de2013-05-27 20:46:33 +00003257 atomic_set(&net->fnhe_genid, 0);
Jason A. Donenfeld7aed9f72017-06-07 23:01:20 -04003258 atomic_set(&net->ipv4.dev_addr_genid, get_random_int());
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003259 return 0;
3260}
3261
Neil Horman3ee94372010-05-08 01:57:52 -07003262static __net_initdata struct pernet_operations rt_genid_ops = {
3263 .init = rt_genid_init,
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003264};
3265
David S. Millerc3426b42012-06-09 16:27:05 -07003266static int __net_init ipv4_inetpeer_init(struct net *net)
3267{
3268 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3269
3270 if (!bp)
3271 return -ENOMEM;
3272 inet_peer_base_init(bp);
3273 net->ipv4.peers = bp;
3274 return 0;
3275}
3276
3277static void __net_exit ipv4_inetpeer_exit(struct net *net)
3278{
3279 struct inet_peer_base *bp = net->ipv4.peers;
3280
3281 net->ipv4.peers = NULL;
David S. Miller56a6b242012-06-09 16:32:41 -07003282 inetpeer_invalidate_tree(bp);
David S. Millerc3426b42012-06-09 16:27:05 -07003283 kfree(bp);
3284}
3285
3286static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
3287 .init = ipv4_inetpeer_init,
3288 .exit = ipv4_inetpeer_exit,
3289};
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003290
Patrick McHardyc7066f72011-01-14 13:36:42 +01003291#ifdef CONFIG_IP_ROUTE_CLASSID
Tejun Heo7d720c32010-02-16 15:20:26 +00003292struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
Patrick McHardyc7066f72011-01-14 13:36:42 +01003293#endif /* CONFIG_IP_ROUTE_CLASSID */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003294
Linus Torvalds1da177e2005-04-16 15:20:36 -07003295int __init ip_rt_init(void)
3296{
Eric Dumazet5055c372015-01-14 15:17:06 -08003297 int cpu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003298
Kees Cook6da2ec52018-06-12 13:55:00 -07003299 ip_idents = kmalloc_array(IP_IDENTS_SZ, sizeof(*ip_idents),
3300 GFP_KERNEL);
Eric Dumazet73f156a2014-06-02 05:26:03 -07003301 if (!ip_idents)
3302 panic("IP: failed to allocate ip_idents\n");
3303
3304 prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
3305
Eric Dumazet355b590c2015-05-01 10:37:49 -07003306 ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL);
3307 if (!ip_tstamps)
3308 panic("IP: failed to allocate ip_tstamps\n");
3309
Eric Dumazet5055c372015-01-14 15:17:06 -08003310 for_each_possible_cpu(cpu) {
3311 struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
3312
3313 INIT_LIST_HEAD(&ul->head);
3314 spin_lock_init(&ul->lock);
3315 }
Patrick McHardyc7066f72011-01-14 13:36:42 +01003316#ifdef CONFIG_IP_ROUTE_CLASSID
Ingo Molnar0dcec8c2009-02-25 14:07:33 +01003317 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
Linus Torvalds1da177e2005-04-16 15:20:36 -07003318 if (!ip_rt_acct)
3319 panic("IP: failed to allocate ip_rt_acct\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003320#endif
3321
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07003322 ipv4_dst_ops.kmem_cachep =
3323 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09003324 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003325
David S. Miller14e50e52007-05-24 18:17:54 -07003326 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
3327
Eric Dumazetfc66f952010-10-08 06:37:34 +00003328 if (dst_entries_init(&ipv4_dst_ops) < 0)
3329 panic("IP: failed to allocate ipv4_dst_ops counter\n");
3330
3331 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
3332 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
3333
David S. Miller89aef892012-07-17 11:00:09 -07003334 ipv4_dst_ops.gc_thresh = ~0;
3335 ip_rt_max_size = INT_MAX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003336
Linus Torvalds1da177e2005-04-16 15:20:36 -07003337 devinet_init();
3338 ip_fib_init();
3339
Denis V. Lunev73b38712008-02-28 20:51:18 -08003340 if (ip_rt_proc_init())
Joe Perches058bd4d2012-03-11 18:36:11 +00003341 pr_err("Unable to create route proc files\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003342#ifdef CONFIG_XFRM
3343 xfrm_init();
Steffen Klassert703fb942012-11-13 08:52:24 +01003344 xfrm4_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003345#endif
Florian Westphal394f51a2017-08-15 16:34:44 +02003346 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL,
3347 RTNL_FLAG_DOIT_UNLOCKED);
Thomas Graf63f34442007-03-22 11:55:17 -07003348
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003349#ifdef CONFIG_SYSCTL
3350 register_pernet_subsys(&sysctl_route_ops);
3351#endif
Neil Horman3ee94372010-05-08 01:57:52 -07003352 register_pernet_subsys(&rt_genid_ops);
David S. Millerc3426b42012-06-09 16:27:05 -07003353 register_pernet_subsys(&ipv4_inetpeer_ops);
Tim Hansen1bcdca32017-10-04 15:59:49 -04003354 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003355}
3356
Al Viroa1bc6eb2008-07-30 06:32:52 -04003357#ifdef CONFIG_SYSCTL
Al Viroeeb61f72008-07-27 08:59:33 +01003358/*
3359 * We really need to sanitize the damn ipv4 init order, then all
3360 * this nonsense will go away.
3361 */
3362void __init ip_static_sysctl_init(void)
3363{
Eric W. Biederman4e5ca782012-04-19 13:32:39 +00003364 register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
Al Viroeeb61f72008-07-27 08:59:33 +01003365}
Al Viroa1bc6eb2008-07-30 06:32:52 -04003366#endif