blob: b2b35b38724d1f993ef62fe6adaa938c66db671e [file] [log] [blame]
Thomas Gleixner2874c5f2019-05-27 08:55:01 +02001// SPDX-License-Identifier: GPL-2.0-or-later
Linus Torvalds1da177e2005-04-16 15:20:36 -07002/*
3 * INET An implementation of the TCP/IP protocol suite for the LINUX
4 * operating system. INET is implemented using the BSD Socket
5 * interface as the means of communication with the user level.
6 *
7 * ROUTE - implementation of the IP router.
8 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07009 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -070010 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
11 * Alan Cox, <gw4pts@gw4pts.ampr.org>
12 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
13 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
14 *
15 * Fixes:
16 * Alan Cox : Verify area fixes.
17 * Alan Cox : cli() protects routing changes
18 * Rui Oliveira : ICMP routing table updates
19 * (rco@di.uminho.pt) Routing table insertion and update
20 * Linus Torvalds : Rewrote bits to be sensible
21 * Alan Cox : Added BSD route gw semantics
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090022 * Alan Cox : Super /proc >4K
Linus Torvalds1da177e2005-04-16 15:20:36 -070023 * Alan Cox : MTU in route table
24 * Alan Cox : MSS actually. Also added the window
25 * clamper.
26 * Sam Lantinga : Fixed route matching in rt_del()
27 * Alan Cox : Routing cache support.
28 * Alan Cox : Removed compatibility cruft.
29 * Alan Cox : RTF_REJECT support.
30 * Alan Cox : TCP irtt support.
31 * Jonathan Naylor : Added Metric support.
32 * Miquel van Smoorenburg : BSD API fixes.
33 * Miquel van Smoorenburg : Metrics.
34 * Alan Cox : Use __u32 properly
35 * Alan Cox : Aligned routing errors more closely with BSD
36 * our system is still very different.
37 * Alan Cox : Faster /proc handling
38 * Alexey Kuznetsov : Massive rework to support tree based routing,
39 * routing caches and better behaviour.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090040 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070041 * Olaf Erb : irtt wasn't being copied right.
42 * Bjorn Ekwall : Kerneld route support.
43 * Alan Cox : Multicast fixed (I hope)
44 * Pavel Krauz : Limited broadcast fixed
45 * Mike McLagan : Routing by source
46 * Alexey Kuznetsov : End of old history. Split to fib.c and
47 * route.c and rewritten from scratch.
48 * Andi Kleen : Load-limit warning messages.
49 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
50 * Vitaly E. Lavrov : Race condition in ip_route_input_slow.
51 * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow.
52 * Vladimir V. Ivanov : IP rule info (flowid) is really useful.
53 * Marc Boucher : routing by fwmark
54 * Robert Olsson : Added rt_cache statistics
55 * Arnaldo C. Melo : Convert proc stuff to seq_file
Eric Dumazetbb1d23b2005-07-05 15:00:32 -070056 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
Ilia Sotnikovcef26852006-03-25 01:38:55 -080057 * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect
58 * Ilia Sotnikov : Removed TOS from hash calculations
Linus Torvalds1da177e2005-04-16 15:20:36 -070059 */
60
Joe Perchesafd465032012-03-12 07:03:32 +000061#define pr_fmt(fmt) "IPv4: " fmt
62
Linus Torvalds1da177e2005-04-16 15:20:36 -070063#include <linux/module.h>
Linus Torvalds7c0f6ba2016-12-24 11:46:01 -080064#include <linux/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070065#include <linux/bitops.h>
66#include <linux/types.h>
67#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070068#include <linux/mm.h>
69#include <linux/string.h>
70#include <linux/socket.h>
71#include <linux/sockios.h>
72#include <linux/errno.h>
73#include <linux/in.h>
74#include <linux/inet.h>
75#include <linux/netdevice.h>
76#include <linux/proc_fs.h>
77#include <linux/init.h>
78#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070079#include <linux/inetdevice.h>
80#include <linux/igmp.h>
81#include <linux/pkt_sched.h>
82#include <linux/mroute.h>
83#include <linux/netfilter_ipv4.h>
84#include <linux/random.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070085#include <linux/rcupdate.h>
86#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090087#include <linux/slab.h>
Eric Dumazet73f156a2014-06-02 05:26:03 -070088#include <linux/jhash.h>
Herbert Xu352e5122007-11-13 21:34:06 -080089#include <net/dst.h>
Thomas Graf1b7179d2015-07-21 10:43:59 +020090#include <net/dst_metadata.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020091#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070092#include <net/protocol.h>
93#include <net/ip.h>
94#include <net/route.h>
95#include <net/inetpeer.h>
96#include <net/sock.h>
97#include <net/ip_fib.h>
98#include <net/arp.h>
99#include <net/tcp.h>
100#include <net/icmp.h>
101#include <net/xfrm.h>
Roopa Prabhu571e7222015-07-21 10:43:47 +0200102#include <net/lwtunnel.h>
Tom Tucker8d717402006-07-30 20:43:36 -0700103#include <net/netevent.h>
Thomas Graf63f34442007-03-22 11:55:17 -0700104#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105#ifdef CONFIG_SYSCTL
106#include <linux/sysctl.h>
107#endif
David S. Miller6e5714e2011-08-03 20:50:44 -0700108#include <net/secure_seq.h>
Thomas Graf1b7179d2015-07-21 10:43:59 +0200109#include <net/ip_tunnels.h>
David Ahern385add92015-09-29 20:07:13 -0700110#include <net/l3mdev.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111
Roopa Prabhub6179812017-05-25 10:42:39 -0700112#include "fib_lookup.h"
113
David S. Miller68a5e3d2011-03-11 20:07:33 -0500114#define RT_FL_TOS(oldflp4) \
Julian Anastasovf61759e2011-12-02 11:39:42 +0000115 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117#define RT_GC_TIMEOUT (300*HZ)
118
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119static int ip_rt_max_size;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700120static int ip_rt_redirect_number __read_mostly = 9;
121static int ip_rt_redirect_load __read_mostly = HZ / 50;
122static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
123static int ip_rt_error_cost __read_mostly = HZ;
124static int ip_rt_error_burst __read_mostly = 5 * HZ;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700125static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
Sabrina Dubrocac7272c22018-02-26 16:13:43 +0100126static u32 ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700127static int ip_rt_min_advmss __read_mostly = 256;
Eric Dumazet9f28a2f2011-12-21 15:47:16 -0500128
Xin Longdeed49d2016-02-18 21:21:19 +0800129static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
Sabrina Dubrocac7272c22018-02-26 16:13:43 +0100130
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131/*
132 * Interface to generic destination cache.
133 */
134
135static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -0800136static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
Steffen Klassertebb762f2011-11-23 02:12:51 +0000137static unsigned int ipv4_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
139static void ipv4_link_failure(struct sk_buff *skb);
David S. Miller6700c272012-07-17 03:29:28 -0700140static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
141 struct sk_buff *skb, u32 mtu);
142static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
143 struct sk_buff *skb);
David S. Millercaacf052012-07-31 15:06:50 -0700144static void ipv4_dst_destroy(struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145
David S. Miller62fa8a82011-01-26 20:51:05 -0800146static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
147{
David S. Miller31248732012-07-10 07:08:18 -0700148 WARN_ON(1);
149 return NULL;
David S. Miller62fa8a82011-01-26 20:51:05 -0800150}
151
David S. Millerf894cbf2012-07-02 21:52:24 -0700152static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
153 struct sk_buff *skb,
154 const void *daddr);
Julian Anastasov63fca652017-02-06 23:14:15 +0200155static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700156
Linus Torvalds1da177e2005-04-16 15:20:36 -0700157static struct dst_ops ipv4_dst_ops = {
158 .family = AF_INET,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159 .check = ipv4_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800160 .default_advmss = ipv4_default_advmss,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000161 .mtu = ipv4_mtu,
David S. Miller62fa8a82011-01-26 20:51:05 -0800162 .cow_metrics = ipv4_cow_metrics,
David S. Millercaacf052012-07-31 15:06:50 -0700163 .destroy = ipv4_dst_destroy,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164 .negative_advice = ipv4_negative_advice,
165 .link_failure = ipv4_link_failure,
166 .update_pmtu = ip_rt_update_pmtu,
David S. Millere47a1852012-07-11 20:55:47 -0700167 .redirect = ip_do_redirect,
Eric W. Biedermanb92dacd2015-10-07 16:48:37 -0500168 .local_out = __ip_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700169 .neigh_lookup = ipv4_neigh_lookup,
Julian Anastasov63fca652017-02-06 23:14:15 +0200170 .confirm_neigh = ipv4_confirm_neigh,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171};
172
173#define ECN_OR_COST(class) TC_PRIO_##class
174
Philippe De Muyter4839c522007-07-09 15:32:57 -0700175const __u8 ip_tos2prio[16] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176 TC_PRIO_BESTEFFORT,
Dan Siemon4a2b9c32011-03-15 13:56:07 +0000177 ECN_OR_COST(BESTEFFORT),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178 TC_PRIO_BESTEFFORT,
179 ECN_OR_COST(BESTEFFORT),
180 TC_PRIO_BULK,
181 ECN_OR_COST(BULK),
182 TC_PRIO_BULK,
183 ECN_OR_COST(BULK),
184 TC_PRIO_INTERACTIVE,
185 ECN_OR_COST(INTERACTIVE),
186 TC_PRIO_INTERACTIVE,
187 ECN_OR_COST(INTERACTIVE),
188 TC_PRIO_INTERACTIVE_BULK,
189 ECN_OR_COST(INTERACTIVE_BULK),
190 TC_PRIO_INTERACTIVE_BULK,
191 ECN_OR_COST(INTERACTIVE_BULK)
192};
Amir Vadaid4a96862012-04-04 21:33:28 +0000193EXPORT_SYMBOL(ip_tos2prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194
Eric Dumazet2f970d82006-01-17 02:54:36 -0800195static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
Christoph Lameter3ed66e92014-04-07 15:39:40 -0700196#define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197
Linus Torvalds1da177e2005-04-16 15:20:36 -0700198#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
200{
Eric Dumazet29e75252008-01-31 17:05:09 -0800201 if (*pos)
David S. Miller89aef892012-07-17 11:00:09 -0700202 return NULL;
Eric Dumazet29e75252008-01-31 17:05:09 -0800203 return SEQ_START_TOKEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204}
205
206static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
207{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 ++*pos;
David S. Miller89aef892012-07-17 11:00:09 -0700209 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210}
211
212static void rt_cache_seq_stop(struct seq_file *seq, void *v)
213{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214}
215
216static int rt_cache_seq_show(struct seq_file *seq, void *v)
217{
218 if (v == SEQ_START_TOKEN)
219 seq_printf(seq, "%-127s\n",
220 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
221 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
222 "HHUptod\tSpecDst");
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900223 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700224}
225
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700226static const struct seq_operations rt_cache_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227 .start = rt_cache_seq_start,
228 .next = rt_cache_seq_next,
229 .stop = rt_cache_seq_stop,
230 .show = rt_cache_seq_show,
231};
232
233static int rt_cache_seq_open(struct inode *inode, struct file *file)
234{
David S. Miller89aef892012-07-17 11:00:09 -0700235 return seq_open(file, &rt_cache_seq_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236}
237
Arjan van de Ven9a321442007-02-12 00:55:35 -0800238static const struct file_operations rt_cache_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239 .open = rt_cache_seq_open,
240 .read = seq_read,
241 .llseek = seq_lseek,
David S. Miller89aef892012-07-17 11:00:09 -0700242 .release = seq_release,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243};
244
245
246static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
247{
248 int cpu;
249
250 if (*pos == 0)
251 return SEQ_START_TOKEN;
252
Rusty Russell0f23174a2008-12-29 12:23:42 +0000253 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254 if (!cpu_possible(cpu))
255 continue;
256 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800257 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 }
259 return NULL;
260}
261
262static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
263{
264 int cpu;
265
Rusty Russell0f23174a2008-12-29 12:23:42 +0000266 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267 if (!cpu_possible(cpu))
268 continue;
269 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800270 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 }
272 return NULL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900273
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274}
275
276static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
277{
278
279}
280
281static int rt_cpu_seq_show(struct seq_file *seq, void *v)
282{
283 struct rt_cache_stat *st = v;
284
285 if (v == SEQ_START_TOKEN) {
Olaf Rempel5bec0032005-04-28 12:16:08 -0700286 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287 return 0;
288 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900289
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
291 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
Eric Dumazetfc66f952010-10-08 06:37:34 +0000292 dst_entries_get_slow(&ipv4_dst_ops),
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700293 0, /* st->in_hit */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294 st->in_slow_tot,
295 st->in_slow_mc,
296 st->in_no_route,
297 st->in_brd,
298 st->in_martian_dst,
299 st->in_martian_src,
300
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700301 0, /* st->out_hit */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700302 st->out_slow_tot,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900303 st->out_slow_mc,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700305 0, /* st->gc_total */
306 0, /* st->gc_ignored */
307 0, /* st->gc_goal_miss */
308 0, /* st->gc_dst_overflow */
309 0, /* st->in_hlist_search */
310 0 /* st->out_hlist_search */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311 );
312 return 0;
313}
314
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700315static const struct seq_operations rt_cpu_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316 .start = rt_cpu_seq_start,
317 .next = rt_cpu_seq_next,
318 .stop = rt_cpu_seq_stop,
319 .show = rt_cpu_seq_show,
320};
321
322
323static int rt_cpu_seq_open(struct inode *inode, struct file *file)
324{
325 return seq_open(file, &rt_cpu_seq_ops);
326}
327
Arjan van de Ven9a321442007-02-12 00:55:35 -0800328static const struct file_operations rt_cpu_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329 .open = rt_cpu_seq_open,
330 .read = seq_read,
331 .llseek = seq_lseek,
332 .release = seq_release,
333};
334
Patrick McHardyc7066f72011-01-14 13:36:42 +0100335#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800336static int rt_acct_proc_show(struct seq_file *m, void *v)
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800337{
Alexey Dobriyana661c412009-11-25 15:40:35 -0800338 struct ip_rt_acct *dst, *src;
339 unsigned int i, j;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800340
Alexey Dobriyana661c412009-11-25 15:40:35 -0800341 dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
342 if (!dst)
343 return -ENOMEM;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800344
Alexey Dobriyana661c412009-11-25 15:40:35 -0800345 for_each_possible_cpu(i) {
346 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
347 for (j = 0; j < 256; j++) {
348 dst[j].o_bytes += src[j].o_bytes;
349 dst[j].o_packets += src[j].o_packets;
350 dst[j].i_bytes += src[j].i_bytes;
351 dst[j].i_packets += src[j].i_packets;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800352 }
353 }
Alexey Dobriyana661c412009-11-25 15:40:35 -0800354
355 seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
356 kfree(dst);
357 return 0;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800358}
359#endif
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800360
Denis V. Lunev73b38712008-02-28 20:51:18 -0800361static int __net_init ip_rt_do_proc_init(struct net *net)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800362{
363 struct proc_dir_entry *pde;
364
Joe Perchesd6444062018-03-23 15:54:38 -0700365 pde = proc_create("rt_cache", 0444, net->proc_net,
Gao fengd4beaa62013-02-18 01:34:54 +0000366 &rt_cache_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800367 if (!pde)
368 goto err1;
369
Joe Perchesd6444062018-03-23 15:54:38 -0700370 pde = proc_create("rt_cache", 0444,
Wang Chen77020722008-02-28 14:14:25 -0800371 net->proc_net_stat, &rt_cpu_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800372 if (!pde)
373 goto err2;
374
Patrick McHardyc7066f72011-01-14 13:36:42 +0100375#ifdef CONFIG_IP_ROUTE_CLASSID
Christoph Hellwig3f3942a2018-05-15 15:57:23 +0200376 pde = proc_create_single("rt_acct", 0, net->proc_net,
377 rt_acct_proc_show);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800378 if (!pde)
379 goto err3;
380#endif
381 return 0;
382
Patrick McHardyc7066f72011-01-14 13:36:42 +0100383#ifdef CONFIG_IP_ROUTE_CLASSID
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800384err3:
385 remove_proc_entry("rt_cache", net->proc_net_stat);
386#endif
387err2:
388 remove_proc_entry("rt_cache", net->proc_net);
389err1:
390 return -ENOMEM;
391}
Denis V. Lunev73b38712008-02-28 20:51:18 -0800392
393static void __net_exit ip_rt_do_proc_exit(struct net *net)
394{
395 remove_proc_entry("rt_cache", net->proc_net_stat);
396 remove_proc_entry("rt_cache", net->proc_net);
Patrick McHardyc7066f72011-01-14 13:36:42 +0100397#ifdef CONFIG_IP_ROUTE_CLASSID
Denis V. Lunev73b38712008-02-28 20:51:18 -0800398 remove_proc_entry("rt_acct", net->proc_net);
Alexey Dobriyan0a931ac2010-01-17 03:32:50 +0000399#endif
Denis V. Lunev73b38712008-02-28 20:51:18 -0800400}
401
402static struct pernet_operations ip_rt_proc_ops __net_initdata = {
403 .init = ip_rt_do_proc_init,
404 .exit = ip_rt_do_proc_exit,
405};
406
407static int __init ip_rt_proc_init(void)
408{
409 return register_pernet_subsys(&ip_rt_proc_ops);
410}
411
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800412#else
Denis V. Lunev73b38712008-02-28 20:51:18 -0800413static inline int ip_rt_proc_init(void)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800414{
415 return 0;
416}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417#endif /* CONFIG_PROC_FS */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900418
Eric Dumazet4331deb2012-07-25 05:11:23 +0000419static inline bool rt_is_expired(const struct rtable *rth)
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700420{
fan.duca4c3fc2013-07-30 08:33:53 +0800421 return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev));
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700422}
423
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +0000424void rt_cache_flush(struct net *net)
Eric Dumazet29e75252008-01-31 17:05:09 -0800425{
fan.duca4c3fc2013-07-30 08:33:53 +0800426 rt_genid_bump_ipv4(net);
Eric Dumazet98376382010-03-08 03:20:00 +0000427}
428
David S. Millerf894cbf2012-07-02 21:52:24 -0700429static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
430 struct sk_buff *skb,
431 const void *daddr)
David Miller3769cff2011-07-11 22:44:24 +0000432{
David Ahern1550c172019-04-05 16:30:27 -0700433 const struct rtable *rt = container_of(dst, struct rtable, dst);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700434 struct net_device *dev = dst->dev;
David Miller3769cff2011-07-11 22:44:24 +0000435 struct neighbour *n;
436
David Ahern5c9f7c12019-04-05 16:30:34 -0700437 rcu_read_lock_bh();
David S. Millerd3aaeb32011-07-18 00:40:17 -0700438
David Ahern5c9f7c12019-04-05 16:30:34 -0700439 if (likely(rt->rt_gw_family == AF_INET)) {
440 n = ip_neigh_gw4(dev, rt->rt_gw4);
441 } else if (rt->rt_gw_family == AF_INET6) {
442 n = ip_neigh_gw6(dev, &rt->rt_gw6);
443 } else {
444 __be32 pkey;
445
446 pkey = skb ? ip_hdr(skb)->daddr : *((__be32 *) daddr);
447 n = ip_neigh_gw4(dev, pkey);
448 }
449
Ido Schimmel537de0c2019-07-04 19:26:38 +0300450 if (!IS_ERR(n) && !refcount_inc_not_zero(&n->refcnt))
David Ahern5c9f7c12019-04-05 16:30:34 -0700451 n = NULL;
452
453 rcu_read_unlock_bh();
454
455 return n;
David S. Millerd3aaeb32011-07-18 00:40:17 -0700456}
457
Julian Anastasov63fca652017-02-06 23:14:15 +0200458static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)
459{
David Ahern1550c172019-04-05 16:30:27 -0700460 const struct rtable *rt = container_of(dst, struct rtable, dst);
Julian Anastasov63fca652017-02-06 23:14:15 +0200461 struct net_device *dev = dst->dev;
462 const __be32 *pkey = daddr;
Julian Anastasov63fca652017-02-06 23:14:15 +0200463
David Ahern6de9c052019-04-05 16:30:36 -0700464 if (rt->rt_gw_family == AF_INET) {
David Ahern1550c172019-04-05 16:30:27 -0700465 pkey = (const __be32 *)&rt->rt_gw4;
David Ahern6de9c052019-04-05 16:30:36 -0700466 } else if (rt->rt_gw_family == AF_INET6) {
467 return __ipv6_confirm_neigh_stub(dev, &rt->rt_gw6);
468 } else if (!daddr ||
Julian Anastasov63fca652017-02-06 23:14:15 +0200469 (rt->rt_flags &
David Ahern6de9c052019-04-05 16:30:36 -0700470 (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) {
Julian Anastasov63fca652017-02-06 23:14:15 +0200471 return;
David Ahern6de9c052019-04-05 16:30:36 -0700472 }
Julian Anastasov63fca652017-02-06 23:14:15 +0200473 __ipv4_confirm_neigh(dev, *(__force u32 *)pkey);
474}
475
Eric Dumazet04ca6972014-07-26 08:58:10 +0200476#define IP_IDENTS_SZ 2048u
Eric Dumazet04ca6972014-07-26 08:58:10 +0200477
Eric Dumazet355b590c2015-05-01 10:37:49 -0700478static atomic_t *ip_idents __read_mostly;
479static u32 *ip_tstamps __read_mostly;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200480
481/* In order to protect privacy, we add a perturbation to identifiers
482 * if one generator is seldom used. This makes hard for an attacker
483 * to infer how many packets were sent between two points in time.
484 */
485u32 ip_idents_reserve(u32 hash, int segs)
486{
Eric Dumazet355b590c2015-05-01 10:37:49 -0700487 u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
488 atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
Mark Rutland6aa7de02017-10-23 14:07:29 -0700489 u32 old = READ_ONCE(*p_tstamp);
Eric Dumazet04ca6972014-07-26 08:58:10 +0200490 u32 now = (u32)jiffies;
Eric Dumazetadb03112016-09-20 18:06:17 -0700491 u32 new, delta = 0;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200492
Eric Dumazet355b590c2015-05-01 10:37:49 -0700493 if (old != now && cmpxchg(p_tstamp, old, now) == old)
Eric Dumazet04ca6972014-07-26 08:58:10 +0200494 delta = prandom_u32_max(now - old);
495
Eric Dumazetadb03112016-09-20 18:06:17 -0700496 /* Do not use atomic_add_return() as it makes UBSAN unhappy */
497 do {
498 old = (u32)atomic_read(p_id);
499 new = old + delta + segs;
500 } while (atomic_cmpxchg(p_id, old, new) != old);
501
502 return new - segs;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200503}
504EXPORT_SYMBOL(ip_idents_reserve);
Eric Dumazet73f156a2014-06-02 05:26:03 -0700505
Hannes Frederic Sowab6a77192015-03-25 17:07:44 +0100506void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700507{
Eric Dumazet73f156a2014-06-02 05:26:03 -0700508 u32 hash, id;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509
Eric Dumazetdf453702019-03-27 12:40:33 -0700510 /* Note the following code is not safe, but this is okay. */
511 if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key)))
512 get_random_bytes(&net->ipv4.ip_id_key,
513 sizeof(net->ipv4.ip_id_key));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700514
Eric Dumazetdf453702019-03-27 12:40:33 -0700515 hash = siphash_3u32((__force u32)iph->daddr,
Eric Dumazet04ca6972014-07-26 08:58:10 +0200516 (__force u32)iph->saddr,
Eric Dumazetdf453702019-03-27 12:40:33 -0700517 iph->protocol,
518 &net->ipv4.ip_id_key);
Eric Dumazet73f156a2014-06-02 05:26:03 -0700519 id = ip_idents_reserve(hash, segs);
520 iph->id = htons(id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700521}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000522EXPORT_SYMBOL(__ip_select_ident);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700523
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900524static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
525 const struct sock *sk,
David S. Miller4895c772012-07-17 04:19:00 -0700526 const struct iphdr *iph,
527 int oif, u8 tos,
528 u8 prot, u32 mark, int flow_flags)
529{
530 if (sk) {
531 const struct inet_sock *inet = inet_sk(sk);
532
533 oif = sk->sk_bound_dev_if;
534 mark = sk->sk_mark;
535 tos = RT_CONN_FLAGS(sk);
536 prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
537 }
538 flowi4_init_output(fl4, oif, mark, tos,
539 RT_SCOPE_UNIVERSE, prot,
540 flow_flags,
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900541 iph->daddr, iph->saddr, 0, 0,
542 sock_net_uid(net, sk));
David S. Miller4895c772012-07-17 04:19:00 -0700543}
544
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200545static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
546 const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700547{
Lorenzo Colittid109e612016-11-30 02:56:47 +0900548 const struct net *net = dev_net(skb->dev);
David S. Miller4895c772012-07-17 04:19:00 -0700549 const struct iphdr *iph = ip_hdr(skb);
550 int oif = skb->dev->ifindex;
551 u8 tos = RT_TOS(iph->tos);
552 u8 prot = iph->protocol;
553 u32 mark = skb->mark;
554
Lorenzo Colittid109e612016-11-30 02:56:47 +0900555 __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0);
David S. Miller4895c772012-07-17 04:19:00 -0700556}
557
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200558static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700559{
560 const struct inet_sock *inet = inet_sk(sk);
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200561 const struct ip_options_rcu *inet_opt;
David S. Miller4895c772012-07-17 04:19:00 -0700562 __be32 daddr = inet->inet_daddr;
563
564 rcu_read_lock();
565 inet_opt = rcu_dereference(inet->inet_opt);
566 if (inet_opt && inet_opt->opt.srr)
567 daddr = inet_opt->opt.faddr;
568 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
569 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
570 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
571 inet_sk_flowi_flags(sk),
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900572 daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
David S. Miller4895c772012-07-17 04:19:00 -0700573 rcu_read_unlock();
574}
575
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200576static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
577 const struct sk_buff *skb)
David S. Miller4895c772012-07-17 04:19:00 -0700578{
579 if (skb)
580 build_skb_flow_key(fl4, skb, sk);
581 else
582 build_sk_flow_key(fl4, sk);
583}
584
David S. Millerc5038a82012-07-31 15:02:02 -0700585static DEFINE_SPINLOCK(fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700586
Timo Teräs2ffae992013-06-27 10:27:05 +0300587static void fnhe_flush_routes(struct fib_nh_exception *fnhe)
588{
589 struct rtable *rt;
590
591 rt = rcu_dereference(fnhe->fnhe_rth_input);
592 if (rt) {
593 RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL);
Wei Wang95c47f92017-06-17 10:42:30 -0700594 dst_dev_put(&rt->dst);
Wei Wang08301062017-06-17 10:42:29 -0700595 dst_release(&rt->dst);
Timo Teräs2ffae992013-06-27 10:27:05 +0300596 }
597 rt = rcu_dereference(fnhe->fnhe_rth_output);
598 if (rt) {
599 RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL);
Wei Wang95c47f92017-06-17 10:42:30 -0700600 dst_dev_put(&rt->dst);
Wei Wang08301062017-06-17 10:42:29 -0700601 dst_release(&rt->dst);
Timo Teräs2ffae992013-06-27 10:27:05 +0300602 }
603}
604
Julian Anastasovaee06da2012-07-18 10:15:35 +0000605static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
David S. Miller4895c772012-07-17 04:19:00 -0700606{
607 struct fib_nh_exception *fnhe, *oldest;
608
609 oldest = rcu_dereference(hash->chain);
610 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
611 fnhe = rcu_dereference(fnhe->fnhe_next)) {
612 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
613 oldest = fnhe;
614 }
Timo Teräs2ffae992013-06-27 10:27:05 +0300615 fnhe_flush_routes(oldest);
David S. Miller4895c772012-07-17 04:19:00 -0700616 return oldest;
617}
618
David S. Millerd3a25c92012-07-17 13:23:08 -0700619static inline u32 fnhe_hashfun(__be32 daddr)
620{
Eric Dumazetd546c622014-09-04 08:21:31 -0700621 static u32 fnhe_hashrnd __read_mostly;
David S. Millerd3a25c92012-07-17 13:23:08 -0700622 u32 hval;
623
Eric Dumazetd546c622014-09-04 08:21:31 -0700624 net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd));
625 hval = jhash_1word((__force u32) daddr, fnhe_hashrnd);
626 return hash_32(hval, FNHE_HASH_SHIFT);
David S. Millerd3a25c92012-07-17 13:23:08 -0700627}
628
Timo Teräs387aa652013-05-27 20:46:31 +0000629static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
630{
631 rt->rt_pmtu = fnhe->fnhe_pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100632 rt->rt_mtu_locked = fnhe->fnhe_mtu_locked;
Timo Teräs387aa652013-05-27 20:46:31 +0000633 rt->dst.expires = fnhe->fnhe_expires;
634
635 if (fnhe->fnhe_gw) {
636 rt->rt_flags |= RTCF_REDIRECTED;
David Ahern1550c172019-04-05 16:30:27 -0700637 rt->rt_gw_family = AF_INET;
638 rt->rt_gw4 = fnhe->fnhe_gw;
Timo Teräs387aa652013-05-27 20:46:31 +0000639 }
640}
641
David Aherna5995e72019-04-30 07:45:50 -0700642static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr,
643 __be32 gw, u32 pmtu, bool lock,
644 unsigned long expires)
David S. Miller4895c772012-07-17 04:19:00 -0700645{
Julian Anastasovaee06da2012-07-18 10:15:35 +0000646 struct fnhe_hash_bucket *hash;
David S. Miller4895c772012-07-17 04:19:00 -0700647 struct fib_nh_exception *fnhe;
Timo Teräs387aa652013-05-27 20:46:31 +0000648 struct rtable *rt;
Xin Longcebe84c2017-11-17 14:27:18 +0800649 u32 genid, hval;
Timo Teräs387aa652013-05-27 20:46:31 +0000650 unsigned int i;
David S. Miller4895c772012-07-17 04:19:00 -0700651 int depth;
Xin Longcebe84c2017-11-17 14:27:18 +0800652
David Aherna5995e72019-04-30 07:45:50 -0700653 genid = fnhe_genid(dev_net(nhc->nhc_dev));
Xin Longcebe84c2017-11-17 14:27:18 +0800654 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -0700655
David S. Millerc5038a82012-07-31 15:02:02 -0700656 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000657
David Aherna5995e72019-04-30 07:45:50 -0700658 hash = rcu_dereference(nhc->nhc_exceptions);
David S. Miller4895c772012-07-17 04:19:00 -0700659 if (!hash) {
Kees Cook6396bb22018-06-12 14:03:40 -0700660 hash = kcalloc(FNHE_HASH_SIZE, sizeof(*hash), GFP_ATOMIC);
David S. Miller4895c772012-07-17 04:19:00 -0700661 if (!hash)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000662 goto out_unlock;
David Aherna5995e72019-04-30 07:45:50 -0700663 rcu_assign_pointer(nhc->nhc_exceptions, hash);
David S. Miller4895c772012-07-17 04:19:00 -0700664 }
665
David S. Miller4895c772012-07-17 04:19:00 -0700666 hash += hval;
667
668 depth = 0;
669 for (fnhe = rcu_dereference(hash->chain); fnhe;
670 fnhe = rcu_dereference(fnhe->fnhe_next)) {
671 if (fnhe->fnhe_daddr == daddr)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000672 break;
David S. Miller4895c772012-07-17 04:19:00 -0700673 depth++;
674 }
675
Julian Anastasovaee06da2012-07-18 10:15:35 +0000676 if (fnhe) {
Xin Longcebe84c2017-11-17 14:27:18 +0800677 if (fnhe->fnhe_genid != genid)
678 fnhe->fnhe_genid = genid;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000679 if (gw)
680 fnhe->fnhe_gw = gw;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100681 if (pmtu) {
Julian Anastasovaee06da2012-07-18 10:15:35 +0000682 fnhe->fnhe_pmtu = pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100683 fnhe->fnhe_mtu_locked = lock;
684 }
Xin Longe39d5242017-11-17 14:27:06 +0800685 fnhe->fnhe_expires = max(1UL, expires);
Timo Teräs387aa652013-05-27 20:46:31 +0000686 /* Update all cached dsts too */
Timo Teräs2ffae992013-06-27 10:27:05 +0300687 rt = rcu_dereference(fnhe->fnhe_rth_input);
688 if (rt)
689 fill_route_from_fnhe(rt, fnhe);
690 rt = rcu_dereference(fnhe->fnhe_rth_output);
Timo Teräs387aa652013-05-27 20:46:31 +0000691 if (rt)
692 fill_route_from_fnhe(rt, fnhe);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000693 } else {
694 if (depth > FNHE_RECLAIM_DEPTH)
695 fnhe = fnhe_oldest(hash);
696 else {
697 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
698 if (!fnhe)
699 goto out_unlock;
700
701 fnhe->fnhe_next = hash->chain;
702 rcu_assign_pointer(hash->chain, fnhe);
703 }
Xin Longcebe84c2017-11-17 14:27:18 +0800704 fnhe->fnhe_genid = genid;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000705 fnhe->fnhe_daddr = daddr;
706 fnhe->fnhe_gw = gw;
707 fnhe->fnhe_pmtu = pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100708 fnhe->fnhe_mtu_locked = lock;
Julian Anastasov94720e32018-05-02 09:41:19 +0300709 fnhe->fnhe_expires = max(1UL, expires);
Timo Teräs387aa652013-05-27 20:46:31 +0000710
711 /* Exception created; mark the cached routes for the nexthop
712 * stale, so anyone caching it rechecks if this exception
713 * applies to them.
714 */
David Ahern0f457a32019-04-30 07:45:48 -0700715 rt = rcu_dereference(nhc->nhc_rth_input);
Timo Teräs2ffae992013-06-27 10:27:05 +0300716 if (rt)
717 rt->dst.obsolete = DST_OBSOLETE_KILL;
718
Timo Teräs387aa652013-05-27 20:46:31 +0000719 for_each_possible_cpu(i) {
720 struct rtable __rcu **prt;
David Ahern0f457a32019-04-30 07:45:48 -0700721 prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i);
Timo Teräs387aa652013-05-27 20:46:31 +0000722 rt = rcu_dereference(*prt);
723 if (rt)
724 rt->dst.obsolete = DST_OBSOLETE_KILL;
725 }
David S. Miller4895c772012-07-17 04:19:00 -0700726 }
David S. Miller4895c772012-07-17 04:19:00 -0700727
David S. Miller4895c772012-07-17 04:19:00 -0700728 fnhe->fnhe_stamp = jiffies;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000729
730out_unlock:
David S. Millerc5038a82012-07-31 15:02:02 -0700731 spin_unlock_bh(&fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700732}
733
David S. Millerceb33202012-07-17 11:31:28 -0700734static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
735 bool kill_route)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736{
David S. Millere47a1852012-07-11 20:55:47 -0700737 __be32 new_gw = icmp_hdr(skb)->un.gateway;
David S. Miller94206122012-07-11 20:38:08 -0700738 __be32 old_gw = ip_hdr(skb)->saddr;
David S. Millere47a1852012-07-11 20:55:47 -0700739 struct net_device *dev = skb->dev;
David S. Millere47a1852012-07-11 20:55:47 -0700740 struct in_device *in_dev;
David S. Miller4895c772012-07-17 04:19:00 -0700741 struct fib_result res;
David S. Millere47a1852012-07-11 20:55:47 -0700742 struct neighbour *n;
Denis V. Lunev317805b2008-02-28 20:50:06 -0800743 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700744
David S. Miller94206122012-07-11 20:38:08 -0700745 switch (icmp_hdr(skb)->code & 7) {
746 case ICMP_REDIR_NET:
747 case ICMP_REDIR_NETTOS:
748 case ICMP_REDIR_HOST:
749 case ICMP_REDIR_HOSTTOS:
750 break;
751
752 default:
753 return;
754 }
755
David Ahern1550c172019-04-05 16:30:27 -0700756 if (rt->rt_gw_family != AF_INET || rt->rt_gw4 != old_gw)
David S. Millere47a1852012-07-11 20:55:47 -0700757 return;
758
759 in_dev = __in_dev_get_rcu(dev);
760 if (!in_dev)
761 return;
762
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900763 net = dev_net(dev);
Joe Perches9d4fb272009-11-23 10:41:23 -0800764 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
765 ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
766 ipv4_is_zeronet(new_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700767 goto reject_redirect;
768
769 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
770 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
771 goto reject_redirect;
772 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
773 goto reject_redirect;
774 } else {
Denis V. Lunev317805b2008-02-28 20:50:06 -0800775 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776 goto reject_redirect;
777 }
778
Stephen Suryaputra Lin969447f2016-11-10 11:16:15 -0500779 n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
780 if (!n)
781 n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
WANG Cong2c1a4312014-09-24 17:07:53 -0700782 if (!IS_ERR(n)) {
David S. Millere47a1852012-07-11 20:55:47 -0700783 if (!(n->nud_state & NUD_VALID)) {
784 neigh_event_send(n, NULL);
785 } else {
Andy Gospodarek0eeb0752015-06-23 13:45:37 -0400786 if (fib_lookup(net, fl4, &res, 0) == 0) {
David Aherneba618a2019-04-02 14:11:55 -0700787 struct fib_nh_common *nhc = FIB_RES_NHC(res);
David S. Miller4895c772012-07-17 04:19:00 -0700788
David Aherna5995e72019-04-30 07:45:50 -0700789 update_or_create_fnhe(nhc, fl4->daddr, new_gw,
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100790 0, false,
791 jiffies + ip_rt_gc_timeout);
David S. Miller4895c772012-07-17 04:19:00 -0700792 }
David S. Millerceb33202012-07-17 11:31:28 -0700793 if (kill_route)
794 rt->dst.obsolete = DST_OBSOLETE_KILL;
David S. Millere47a1852012-07-11 20:55:47 -0700795 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
796 }
797 neigh_release(n);
798 }
799 return;
800
801reject_redirect:
802#ifdef CONFIG_IP_ROUTE_VERBOSE
David S. Miller99ee0382012-07-12 07:40:05 -0700803 if (IN_DEV_LOG_MARTIANS(in_dev)) {
804 const struct iphdr *iph = (const struct iphdr *) skb->data;
805 __be32 daddr = iph->daddr;
806 __be32 saddr = iph->saddr;
807
David S. Millere47a1852012-07-11 20:55:47 -0700808 net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
809 " Advised path = %pI4 -> %pI4\n",
810 &old_gw, dev->name, &new_gw,
811 &saddr, &daddr);
David S. Miller99ee0382012-07-12 07:40:05 -0700812 }
David S. Millere47a1852012-07-11 20:55:47 -0700813#endif
814 ;
815}
816
David S. Miller4895c772012-07-17 04:19:00 -0700817static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
818{
819 struct rtable *rt;
820 struct flowi4 fl4;
Michal Kubecekf96ef982013-05-28 08:26:49 +0200821 const struct iphdr *iph = (const struct iphdr *) skb->data;
Lorenzo Colitti7d995692016-12-23 00:33:57 +0900822 struct net *net = dev_net(skb->dev);
Michal Kubecekf96ef982013-05-28 08:26:49 +0200823 int oif = skb->dev->ifindex;
824 u8 tos = RT_TOS(iph->tos);
825 u8 prot = iph->protocol;
826 u32 mark = skb->mark;
David S. Miller4895c772012-07-17 04:19:00 -0700827
828 rt = (struct rtable *) dst;
829
Lorenzo Colitti7d995692016-12-23 00:33:57 +0900830 __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
David S. Millerceb33202012-07-17 11:31:28 -0700831 __ip_do_redirect(rt, skb, &fl4, true);
David S. Miller4895c772012-07-17 04:19:00 -0700832}
833
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
835{
Eric Dumazetee6b9672008-03-05 18:30:47 -0800836 struct rtable *rt = (struct rtable *)dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700837 struct dst_entry *ret = dst;
838
839 if (rt) {
Timo Teräsd11a4dc2010-03-18 23:20:20 +0000840 if (dst->obsolete > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700841 ip_rt_put(rt);
842 ret = NULL;
David S. Miller59436342012-07-10 06:58:42 -0700843 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
844 rt->dst.expires) {
David S. Miller89aef892012-07-17 11:00:09 -0700845 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846 ret = NULL;
847 }
848 }
849 return ret;
850}
851
852/*
853 * Algorithm:
854 * 1. The first ip_rt_redirect_number redirects are sent
855 * with exponential backoff, then we stop sending them at all,
856 * assuming that the host ignores our redirects.
857 * 2. If we did not see packets requiring redirects
858 * during ip_rt_redirect_silence, we assume that the host
859 * forgot redirected route and start to send redirects again.
860 *
861 * This algorithm is much cheaper and more intelligent than dumb load limiting
862 * in icmp.c.
863 *
864 * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
865 * and "frag. need" (breaks PMTU discovery) in icmp.c.
866 */
867
868void ip_rt_send_redirect(struct sk_buff *skb)
869{
Eric Dumazet511c3f92009-06-02 05:14:27 +0000870 struct rtable *rt = skb_rtable(skb);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700871 struct in_device *in_dev;
David S. Miller92d86822011-02-04 15:55:25 -0800872 struct inet_peer *peer;
David S. Miller1d861aa2012-07-10 03:58:16 -0700873 struct net *net;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700874 int log_martians;
David Ahern192132b2015-08-27 16:07:03 -0700875 int vif;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876
Eric Dumazet30038fc2009-08-28 23:52:01 -0700877 rcu_read_lock();
Changli Gaod8d1f302010-06-10 23:31:35 -0700878 in_dev = __in_dev_get_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700879 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
880 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700881 return;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700882 }
883 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
David Ahern385add92015-09-29 20:07:13 -0700884 vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700885 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886
David S. Miller1d861aa2012-07-10 03:58:16 -0700887 net = dev_net(rt->dst.dev);
David Ahern192132b2015-08-27 16:07:03 -0700888 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800889 if (!peer) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000890 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
891 rt_nexthop(rt, ip_hdr(skb)->daddr));
David S. Miller92d86822011-02-04 15:55:25 -0800892 return;
893 }
894
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895 /* No redirected packets during ip_rt_redirect_silence;
896 * reset the algorithm.
897 */
Lorenzo Bianconic09551c62019-02-06 19:18:04 +0100898 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) {
David S. Miller92d86822011-02-04 15:55:25 -0800899 peer->rate_tokens = 0;
Lorenzo Bianconic09551c62019-02-06 19:18:04 +0100900 peer->n_redirects = 0;
901 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902
903 /* Too many ignored redirects; do not send anything
Changli Gaod8d1f302010-06-10 23:31:35 -0700904 * set dst.rate_last to the last seen redirected packet.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905 */
Lorenzo Bianconic09551c62019-02-06 19:18:04 +0100906 if (peer->n_redirects >= ip_rt_redirect_number) {
David S. Miller92d86822011-02-04 15:55:25 -0800907 peer->rate_last = jiffies;
David S. Miller1d861aa2012-07-10 03:58:16 -0700908 goto out_put_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909 }
910
911 /* Check for load limit; set rate_last to the latest sent
912 * redirect.
913 */
David S. Miller92d86822011-02-04 15:55:25 -0800914 if (peer->rate_tokens == 0 ||
Li Yewang14fb8a72006-12-18 00:26:35 -0800915 time_after(jiffies,
David S. Miller92d86822011-02-04 15:55:25 -0800916 (peer->rate_last +
917 (ip_rt_redirect_load << peer->rate_tokens)))) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000918 __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
919
920 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
David S. Miller92d86822011-02-04 15:55:25 -0800921 peer->rate_last = jiffies;
922 ++peer->rate_tokens;
Lorenzo Bianconic09551c62019-02-06 19:18:04 +0100923 ++peer->n_redirects;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700924#ifdef CONFIG_IP_ROUTE_VERBOSE
Eric Dumazet30038fc2009-08-28 23:52:01 -0700925 if (log_martians &&
Joe Perchese87cc472012-05-13 21:56:26 +0000926 peer->rate_tokens == ip_rt_redirect_number)
927 net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
David S. Miller92101b32012-07-23 16:29:00 -0700928 &ip_hdr(skb)->saddr, inet_iif(skb),
Julian Anastasove81da0e2012-10-08 11:41:15 +0000929 &ip_hdr(skb)->daddr, &gw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700930#endif
931 }
David S. Miller1d861aa2012-07-10 03:58:16 -0700932out_put_peer:
933 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934}
935
936static int ip_error(struct sk_buff *skb)
937{
Eric Dumazet511c3f92009-06-02 05:14:27 +0000938 struct rtable *rt = skb_rtable(skb);
Stephen Suryaputrae2c0dc12018-02-28 12:20:44 -0500939 struct net_device *dev = skb->dev;
940 struct in_device *in_dev;
David S. Miller92d86822011-02-04 15:55:25 -0800941 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700942 unsigned long now;
David S. Miller251da412012-06-26 16:27:09 -0700943 struct net *net;
David S. Miller92d86822011-02-04 15:55:25 -0800944 bool send;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700945 int code;
946
Stephen Suryaputrae2c0dc12018-02-28 12:20:44 -0500947 if (netif_is_l3_master(skb->dev)) {
948 dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif);
949 if (!dev)
950 goto out;
951 }
952
953 in_dev = __in_dev_get_rcu(dev);
954
Eric W. Biederman381c7592015-05-22 04:58:12 -0500955 /* IP on this device is disabled. */
956 if (!in_dev)
957 goto out;
958
David S. Miller251da412012-06-26 16:27:09 -0700959 net = dev_net(rt->dst.dev);
960 if (!IN_DEV_FORWARD(in_dev)) {
961 switch (rt->dst.error) {
962 case EHOSTUNREACH:
Eric Dumazetb45386e2016-04-27 16:44:35 -0700963 __IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS);
David S. Miller251da412012-06-26 16:27:09 -0700964 break;
965
966 case ENETUNREACH:
Eric Dumazetb45386e2016-04-27 16:44:35 -0700967 __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
David S. Miller251da412012-06-26 16:27:09 -0700968 break;
969 }
970 goto out;
971 }
972
Changli Gaod8d1f302010-06-10 23:31:35 -0700973 switch (rt->dst.error) {
Joe Perches4500ebf2011-07-01 09:43:07 +0000974 case EINVAL:
975 default:
976 goto out;
977 case EHOSTUNREACH:
978 code = ICMP_HOST_UNREACH;
979 break;
980 case ENETUNREACH:
981 code = ICMP_NET_UNREACH;
Eric Dumazetb45386e2016-04-27 16:44:35 -0700982 __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
Joe Perches4500ebf2011-07-01 09:43:07 +0000983 break;
984 case EACCES:
985 code = ICMP_PKT_FILTERED;
986 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987 }
988
David Ahern192132b2015-08-27 16:07:03 -0700989 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
David Ahern385add92015-09-29 20:07:13 -0700990 l3mdev_master_ifindex(skb->dev), 1);
David S. Miller92d86822011-02-04 15:55:25 -0800991
992 send = true;
993 if (peer) {
994 now = jiffies;
995 peer->rate_tokens += now - peer->rate_last;
996 if (peer->rate_tokens > ip_rt_error_burst)
997 peer->rate_tokens = ip_rt_error_burst;
998 peer->rate_last = now;
999 if (peer->rate_tokens >= ip_rt_error_cost)
1000 peer->rate_tokens -= ip_rt_error_cost;
1001 else
1002 send = false;
David S. Miller1d861aa2012-07-10 03:58:16 -07001003 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004 }
David S. Miller92d86822011-02-04 15:55:25 -08001005 if (send)
1006 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007
1008out: kfree_skb(skb);
1009 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001010}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001011
Steffen Klassertd851c122012-10-07 22:47:25 +00001012static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013{
Steffen Klassertd851c122012-10-07 22:47:25 +00001014 struct dst_entry *dst = &rt->dst;
Sabrina Dubroca28d35bc2018-10-09 17:48:15 +02001015 u32 old_mtu = ipv4_mtu(dst);
David S. Miller4895c772012-07-17 04:19:00 -07001016 struct fib_result res;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001017 bool lock = false;
David S. Miller2c8cec52011-02-09 20:42:07 -08001018
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001019 if (ip_mtu_locked(dst))
Steffen Klassertfa1e4922013-01-16 20:58:10 +00001020 return;
1021
Sabrina Dubroca28d35bc2018-10-09 17:48:15 +02001022 if (old_mtu < mtu)
Li Wei3cdaa5b2015-01-29 16:09:03 +08001023 return;
1024
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001025 if (mtu < ip_rt_min_pmtu) {
1026 lock = true;
Sabrina Dubroca28d35bc2018-10-09 17:48:15 +02001027 mtu = min(old_mtu, ip_rt_min_pmtu);
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001028 }
Eric Dumazetfe6fe792011-06-08 06:07:07 +00001029
Sabrina Dubroca28d35bc2018-10-09 17:48:15 +02001030 if (rt->rt_pmtu == mtu && !lock &&
Timo Teräsf0162292013-05-27 20:46:32 +00001031 time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
1032 return;
1033
Eric Dumazetc5ae7d42012-08-28 12:33:07 +00001034 rcu_read_lock();
Andy Gospodarek0eeb0752015-06-23 13:45:37 -04001035 if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
David Aherneba618a2019-04-02 14:11:55 -07001036 struct fib_nh_common *nhc = FIB_RES_NHC(res);
David S. Miller4895c772012-07-17 04:19:00 -07001037
David Aherna5995e72019-04-30 07:45:50 -07001038 update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
Julian Anastasovaee06da2012-07-18 10:15:35 +00001039 jiffies + ip_rt_mtu_expires);
David S. Miller4895c772012-07-17 04:19:00 -07001040 }
Eric Dumazetc5ae7d42012-08-28 12:33:07 +00001041 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001042}
1043
David S. Miller4895c772012-07-17 04:19:00 -07001044static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1045 struct sk_buff *skb, u32 mtu)
1046{
1047 struct rtable *rt = (struct rtable *) dst;
1048 struct flowi4 fl4;
1049
1050 ip_rt_build_flow_key(&fl4, sk, skb);
Steffen Klassertd851c122012-10-07 22:47:25 +00001051 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller4895c772012-07-17 04:19:00 -07001052}
1053
David S. Miller36393392012-06-14 22:21:46 -07001054void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
Maciej Żenczykowskid888f392018-09-25 20:56:26 -07001055 int oif, u8 protocol)
David S. Miller36393392012-06-14 22:21:46 -07001056{
David S. Miller4895c772012-07-17 04:19:00 -07001057 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Miller36393392012-06-14 22:21:46 -07001058 struct flowi4 fl4;
1059 struct rtable *rt;
Maciej Żenczykowskid888f392018-09-25 20:56:26 -07001060 u32 mark = IP4_REPLY_MARK(net, skb->mark);
Lorenzo Colitti1b3c61d2014-05-13 10:17:34 -07001061
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001062 __build_flow_key(net, &fl4, NULL, iph, oif,
Maciej Żenczykowskid888f392018-09-25 20:56:26 -07001063 RT_TOS(iph->tos), protocol, mark, 0);
David S. Miller36393392012-06-14 22:21:46 -07001064 rt = __ip_route_output_key(net, &fl4);
1065 if (!IS_ERR(rt)) {
David S. Miller4895c772012-07-17 04:19:00 -07001066 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller36393392012-06-14 22:21:46 -07001067 ip_rt_put(rt);
1068 }
1069}
1070EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
1071
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001072static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
David S. Miller36393392012-06-14 22:21:46 -07001073{
David S. Miller4895c772012-07-17 04:19:00 -07001074 const struct iphdr *iph = (const struct iphdr *) skb->data;
1075 struct flowi4 fl4;
1076 struct rtable *rt;
David S. Miller36393392012-06-14 22:21:46 -07001077
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001078 __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0);
Lorenzo Colitti1b3c61d2014-05-13 10:17:34 -07001079
1080 if (!fl4.flowi4_mark)
1081 fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
1082
David S. Miller4895c772012-07-17 04:19:00 -07001083 rt = __ip_route_output_key(sock_net(sk), &fl4);
1084 if (!IS_ERR(rt)) {
1085 __ip_rt_update_pmtu(rt, &fl4, mtu);
1086 ip_rt_put(rt);
1087 }
David S. Miller36393392012-06-14 22:21:46 -07001088}
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001089
1090void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1091{
1092 const struct iphdr *iph = (const struct iphdr *) skb->data;
1093 struct flowi4 fl4;
1094 struct rtable *rt;
Eric Dumazet7f502362014-06-30 01:26:23 -07001095 struct dst_entry *odst = NULL;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001096 bool new = false;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001097 struct net *net = sock_net(sk);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001098
1099 bh_lock_sock(sk);
Hannes Frederic Sowa482fc602013-11-05 02:24:17 +01001100
1101 if (!ip_sk_accept_pmtu(sk))
1102 goto out;
1103
Eric Dumazet7f502362014-06-30 01:26:23 -07001104 odst = sk_dst_get(sk);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001105
Eric Dumazet7f502362014-06-30 01:26:23 -07001106 if (sock_owned_by_user(sk) || !odst) {
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001107 __ipv4_sk_update_pmtu(skb, sk, mtu);
1108 goto out;
1109 }
1110
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001111 __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001112
Eric Dumazet7f502362014-06-30 01:26:23 -07001113 rt = (struct rtable *)odst;
Ian Morris51456b22015-04-03 09:17:26 +01001114 if (odst->obsolete && !odst->ops->check(odst, 0)) {
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001115 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1116 if (IS_ERR(rt))
1117 goto out;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001118
1119 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001120 }
1121
David Miller0f6c4802017-11-28 15:40:46 -05001122 __ip_rt_update_pmtu((struct rtable *) xfrm_dst_path(&rt->dst), &fl4, mtu);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001123
Eric Dumazet7f502362014-06-30 01:26:23 -07001124 if (!dst_check(&rt->dst, 0)) {
Steffen Klassertb44108d2013-01-22 00:01:28 +00001125 if (new)
1126 dst_release(&rt->dst);
1127
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001128 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1129 if (IS_ERR(rt))
1130 goto out;
1131
Steffen Klassertb44108d2013-01-22 00:01:28 +00001132 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001133 }
1134
Steffen Klassertb44108d2013-01-22 00:01:28 +00001135 if (new)
Eric Dumazet7f502362014-06-30 01:26:23 -07001136 sk_dst_set(sk, &rt->dst);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001137
1138out:
1139 bh_unlock_sock(sk);
Eric Dumazet7f502362014-06-30 01:26:23 -07001140 dst_release(odst);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001141}
David S. Miller36393392012-06-14 22:21:46 -07001142EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
David S. Millerf39925d2011-02-09 22:00:16 -08001143
David S. Millerb42597e2012-07-11 21:25:45 -07001144void ipv4_redirect(struct sk_buff *skb, struct net *net,
Maciej Żenczykowski1042caa2018-09-25 20:56:27 -07001145 int oif, u8 protocol)
David S. Millerb42597e2012-07-11 21:25:45 -07001146{
David S. Miller4895c772012-07-17 04:19:00 -07001147 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Millerb42597e2012-07-11 21:25:45 -07001148 struct flowi4 fl4;
1149 struct rtable *rt;
1150
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001151 __build_flow_key(net, &fl4, NULL, iph, oif,
Maciej Żenczykowski1042caa2018-09-25 20:56:27 -07001152 RT_TOS(iph->tos), protocol, 0, 0);
David S. Millerb42597e2012-07-11 21:25:45 -07001153 rt = __ip_route_output_key(net, &fl4);
1154 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001155 __ip_do_redirect(rt, skb, &fl4, false);
David S. Millerb42597e2012-07-11 21:25:45 -07001156 ip_rt_put(rt);
1157 }
1158}
1159EXPORT_SYMBOL_GPL(ipv4_redirect);
1160
1161void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
1162{
David S. Miller4895c772012-07-17 04:19:00 -07001163 const struct iphdr *iph = (const struct iphdr *) skb->data;
1164 struct flowi4 fl4;
1165 struct rtable *rt;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001166 struct net *net = sock_net(sk);
David S. Millerb42597e2012-07-11 21:25:45 -07001167
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001168 __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
1169 rt = __ip_route_output_key(net, &fl4);
David S. Miller4895c772012-07-17 04:19:00 -07001170 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001171 __ip_do_redirect(rt, skb, &fl4, false);
David S. Miller4895c772012-07-17 04:19:00 -07001172 ip_rt_put(rt);
1173 }
David S. Millerb42597e2012-07-11 21:25:45 -07001174}
1175EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1176
David S. Millerefbc368d2011-12-01 13:38:59 -05001177static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1178{
1179 struct rtable *rt = (struct rtable *) dst;
1180
David S. Millerceb33202012-07-17 11:31:28 -07001181 /* All IPV4 dsts are created with ->obsolete set to the value
1182 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1183 * into this function always.
1184 *
Timo Teräs387aa652013-05-27 20:46:31 +00001185 * When a PMTU/redirect information update invalidates a route,
1186 * this is indicated by setting obsolete to DST_OBSOLETE_KILL or
Julian Wiedmann02afc7a2019-03-20 20:02:56 +01001187 * DST_OBSOLETE_DEAD.
David S. Millerceb33202012-07-17 11:31:28 -07001188 */
Timo Teräs387aa652013-05-27 20:46:31 +00001189 if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
David S. Millerefbc368d2011-12-01 13:38:59 -05001190 return NULL;
Timo Teräsd11a4dc2010-03-18 23:20:20 +00001191 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192}
1193
Eric Dumazet20ff83f2019-04-24 08:04:05 -07001194static void ipv4_send_dest_unreach(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195{
Stephen Suryaputraed0de452019-04-12 16:19:27 -04001196 struct ip_options opt;
Eric Dumazetc543cb42019-04-13 17:32:21 -07001197 int res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198
Stephen Suryaputraed0de452019-04-12 16:19:27 -04001199 /* Recompile ip options since IPCB may not be valid anymore.
Eric Dumazet20ff83f2019-04-24 08:04:05 -07001200 * Also check we have a reasonable ipv4 header.
Stephen Suryaputraed0de452019-04-12 16:19:27 -04001201 */
Eric Dumazet20ff83f2019-04-24 08:04:05 -07001202 if (!pskb_network_may_pull(skb, sizeof(struct iphdr)) ||
1203 ip_hdr(skb)->version != 4 || ip_hdr(skb)->ihl < 5)
Stephen Suryaputraed0de452019-04-12 16:19:27 -04001204 return;
1205
Eric Dumazet20ff83f2019-04-24 08:04:05 -07001206 memset(&opt, 0, sizeof(opt));
1207 if (ip_hdr(skb)->ihl > 5) {
1208 if (!pskb_network_may_pull(skb, ip_hdr(skb)->ihl * 4))
1209 return;
1210 opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr);
1211
1212 rcu_read_lock();
1213 res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL);
1214 rcu_read_unlock();
1215
1216 if (res)
1217 return;
1218 }
Stephen Suryaputraed0de452019-04-12 16:19:27 -04001219 __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt);
Eric Dumazet20ff83f2019-04-24 08:04:05 -07001220}
1221
1222static void ipv4_link_failure(struct sk_buff *skb)
1223{
1224 struct rtable *rt;
1225
1226 ipv4_send_dest_unreach(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227
Eric Dumazet511c3f92009-06-02 05:14:27 +00001228 rt = skb_rtable(skb);
David S. Miller59436342012-07-10 06:58:42 -07001229 if (rt)
1230 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231}
1232
Eric W. Biedermanede20592015-10-07 16:48:47 -05001233static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234{
Joe Perches91df42b2012-05-15 14:11:54 +00001235 pr_debug("%s: %pI4 -> %pI4, %s\n",
1236 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1237 skb->dev ? skb->dev->name : "?");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001238 kfree_skb(skb);
Dave Jonesc378a9c2011-05-21 07:16:42 +00001239 WARN_ON(1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240 return 0;
1241}
1242
1243/*
1244 We do not cache source address of outgoing interface,
1245 because it is used only by IP RR, TS and SRR options,
1246 so that it out of fast path.
1247
1248 BTW remember: "addr" is allowed to be not aligned
1249 in IP options!
1250 */
1251
David S. Miller8e363602011-05-13 17:29:41 -04001252void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001253{
Al Viroa61ced52006-09-26 21:27:54 -07001254 __be32 src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001255
David S. Millerc7537962010-11-11 17:07:48 -08001256 if (rt_is_output_route(rt))
David S. Millerc5be24f2011-05-13 18:01:21 -04001257 src = ip_hdr(skb)->saddr;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001258 else {
David S. Miller8e363602011-05-13 17:29:41 -04001259 struct fib_result res;
Maciej Żenczykowskie351bb62018-09-29 23:44:46 -07001260 struct iphdr *iph = ip_hdr(skb);
1261 struct flowi4 fl4 = {
1262 .daddr = iph->daddr,
1263 .saddr = iph->saddr,
1264 .flowi4_tos = RT_TOS(iph->tos),
1265 .flowi4_oif = rt->dst.dev->ifindex,
1266 .flowi4_iif = skb->dev->ifindex,
1267 .flowi4_mark = skb->mark,
1268 };
David S. Miller5e2b61f2011-03-04 21:47:09 -08001269
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001270 rcu_read_lock();
Andy Gospodarek0eeb0752015-06-23 13:45:37 -04001271 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0)
David Aherneba618a2019-04-02 14:11:55 -07001272 src = fib_result_prefsrc(dev_net(rt->dst.dev), &res);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001273 else
David S. Millerf8126f12012-07-13 05:03:45 -07001274 src = inet_select_addr(rt->dst.dev,
1275 rt_nexthop(rt, iph->daddr),
1276 RT_SCOPE_UNIVERSE);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001277 rcu_read_unlock();
1278 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279 memcpy(addr, &src, 4);
1280}
1281
Patrick McHardyc7066f72011-01-14 13:36:42 +01001282#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001283static void set_class_tag(struct rtable *rt, u32 tag)
1284{
Changli Gaod8d1f302010-06-10 23:31:35 -07001285 if (!(rt->dst.tclassid & 0xFFFF))
1286 rt->dst.tclassid |= tag & 0xFFFF;
1287 if (!(rt->dst.tclassid & 0xFFFF0000))
1288 rt->dst.tclassid |= tag & 0xFFFF0000;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001289}
1290#endif
1291
David S. Miller0dbaee32010-12-13 12:52:14 -08001292static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1293{
Gao Feng7ed14d92017-04-12 12:34:03 +08001294 unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
Eric Dumazet164a5e72017-10-18 17:02:03 -07001295 unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
Gao Feng7ed14d92017-04-12 12:34:03 +08001296 ip_rt_min_advmss);
David S. Miller0dbaee32010-12-13 12:52:14 -08001297
Gao Feng7ed14d92017-04-12 12:34:03 +08001298 return min(advmss, IPV4_MAX_PMTU - header_size);
David S. Miller0dbaee32010-12-13 12:52:14 -08001299}
1300
Steffen Klassertebb762f2011-11-23 02:12:51 +00001301static unsigned int ipv4_mtu(const struct dst_entry *dst)
David S. Millerd33e4552010-12-14 13:01:14 -08001302{
Steffen Klassert261663b2011-11-23 02:14:50 +00001303 const struct rtable *rt = (const struct rtable *) dst;
David S. Miller59436342012-07-10 06:58:42 -07001304 unsigned int mtu = rt->rt_pmtu;
1305
Alexander Duyck98d75c32012-08-27 06:30:01 +00001306 if (!mtu || time_after_eq(jiffies, rt->dst.expires))
David S. Miller59436342012-07-10 06:58:42 -07001307 mtu = dst_metric_raw(dst, RTAX_MTU);
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001308
Steffen Klassert38d523e2013-01-16 20:55:01 +00001309 if (mtu)
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001310 return mtu;
1311
Eric Dumazetc780a042017-08-16 11:09:12 -07001312 mtu = READ_ONCE(dst->dev->mtu);
David S. Millerd33e4552010-12-14 13:01:14 -08001313
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001314 if (unlikely(ip_mtu_locked(dst))) {
David Ahern1550c172019-04-05 16:30:27 -07001315 if (rt->rt_gw_family && mtu > 576)
David S. Millerd33e4552010-12-14 13:01:14 -08001316 mtu = 576;
1317 }
1318
Roopa Prabhu14972cb2016-08-24 20:10:43 -07001319 mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
1320
1321 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
David S. Millerd33e4552010-12-14 13:01:14 -08001322}
1323
David Aherna5995e72019-04-30 07:45:50 -07001324static void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr)
Julian Anastasov94720e32018-05-02 09:41:19 +03001325{
1326 struct fnhe_hash_bucket *hash;
1327 struct fib_nh_exception *fnhe, __rcu **fnhe_p;
1328 u32 hval = fnhe_hashfun(daddr);
1329
1330 spin_lock_bh(&fnhe_lock);
1331
David Aherna5995e72019-04-30 07:45:50 -07001332 hash = rcu_dereference_protected(nhc->nhc_exceptions,
Julian Anastasov94720e32018-05-02 09:41:19 +03001333 lockdep_is_held(&fnhe_lock));
1334 hash += hval;
1335
1336 fnhe_p = &hash->chain;
1337 fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
1338 while (fnhe) {
1339 if (fnhe->fnhe_daddr == daddr) {
1340 rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
1341 fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
Xin Longee60ad22019-03-08 14:50:54 +08001342 /* set fnhe_daddr to 0 to ensure it won't bind with
1343 * new dsts in rt_bind_exception().
1344 */
1345 fnhe->fnhe_daddr = 0;
Julian Anastasov94720e32018-05-02 09:41:19 +03001346 fnhe_flush_routes(fnhe);
1347 kfree_rcu(fnhe, rcu);
1348 break;
1349 }
1350 fnhe_p = &fnhe->fnhe_next;
1351 fnhe = rcu_dereference_protected(fnhe->fnhe_next,
1352 lockdep_is_held(&fnhe_lock));
1353 }
1354
1355 spin_unlock_bh(&fnhe_lock);
1356}
1357
David Aherna5995e72019-04-30 07:45:50 -07001358static struct fib_nh_exception *find_exception(struct fib_nh_common *nhc,
1359 __be32 daddr)
David S. Miller4895c772012-07-17 04:19:00 -07001360{
David Aherna5995e72019-04-30 07:45:50 -07001361 struct fnhe_hash_bucket *hash = rcu_dereference(nhc->nhc_exceptions);
David S. Miller4895c772012-07-17 04:19:00 -07001362 struct fib_nh_exception *fnhe;
1363 u32 hval;
1364
David S. Millerf2bb4be2012-07-17 12:20:47 -07001365 if (!hash)
1366 return NULL;
1367
David S. Millerd3a25c92012-07-17 13:23:08 -07001368 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -07001369
1370 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1371 fnhe = rcu_dereference(fnhe->fnhe_next)) {
Julian Anastasov94720e32018-05-02 09:41:19 +03001372 if (fnhe->fnhe_daddr == daddr) {
1373 if (fnhe->fnhe_expires &&
1374 time_after(jiffies, fnhe->fnhe_expires)) {
David Aherna5995e72019-04-30 07:45:50 -07001375 ip_del_fnhe(nhc, daddr);
Julian Anastasov94720e32018-05-02 09:41:19 +03001376 break;
1377 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001378 return fnhe;
Julian Anastasov94720e32018-05-02 09:41:19 +03001379 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001380 }
1381 return NULL;
1382}
David S. Miller4895c772012-07-17 04:19:00 -07001383
David Ahern50d889b2018-05-21 09:08:13 -07001384/* MTU selection:
1385 * 1. mtu on route is locked - use it
1386 * 2. mtu from nexthop exception
1387 * 3. mtu from egress device
1388 */
1389
1390u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
1391{
David Aherneba618a2019-04-02 14:11:55 -07001392 struct fib_nh_common *nhc = res->nhc;
1393 struct net_device *dev = nhc->nhc_dev;
David Ahern50d889b2018-05-21 09:08:13 -07001394 struct fib_info *fi = res->fi;
David Ahern50d889b2018-05-21 09:08:13 -07001395 u32 mtu = 0;
1396
1397 if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu ||
1398 fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU))
1399 mtu = fi->fib_mtu;
1400
1401 if (likely(!mtu)) {
1402 struct fib_nh_exception *fnhe;
1403
David Aherna5995e72019-04-30 07:45:50 -07001404 fnhe = find_exception(nhc, daddr);
David Ahern50d889b2018-05-21 09:08:13 -07001405 if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires))
1406 mtu = fnhe->fnhe_pmtu;
1407 }
1408
1409 if (likely(!mtu))
1410 mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU);
1411
David Aherneba618a2019-04-02 14:11:55 -07001412 return mtu - lwtunnel_headroom(nhc->nhc_lwtstate, mtu);
David Ahern50d889b2018-05-21 09:08:13 -07001413}
1414
David S. Millercaacf052012-07-31 15:06:50 -07001415static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
Wei Wanga4c2fd72017-06-17 10:42:42 -07001416 __be32 daddr, const bool do_cache)
David S. Millerf2bb4be2012-07-17 12:20:47 -07001417{
David S. Millercaacf052012-07-31 15:06:50 -07001418 bool ret = false;
1419
David S. Millerc5038a82012-07-31 15:02:02 -07001420 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +00001421
David S. Millerc5038a82012-07-31 15:02:02 -07001422 if (daddr == fnhe->fnhe_daddr) {
Timo Teräs2ffae992013-06-27 10:27:05 +03001423 struct rtable __rcu **porig;
1424 struct rtable *orig;
Timo Teräs5aad1de2013-05-27 20:46:33 +00001425 int genid = fnhe_genid(dev_net(rt->dst.dev));
Timo Teräs2ffae992013-06-27 10:27:05 +03001426
1427 if (rt_is_input_route(rt))
1428 porig = &fnhe->fnhe_rth_input;
1429 else
1430 porig = &fnhe->fnhe_rth_output;
1431 orig = rcu_dereference(*porig);
Timo Teräs5aad1de2013-05-27 20:46:33 +00001432
1433 if (fnhe->fnhe_genid != genid) {
1434 fnhe->fnhe_genid = genid;
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001435 fnhe->fnhe_gw = 0;
1436 fnhe->fnhe_pmtu = 0;
1437 fnhe->fnhe_expires = 0;
Hangbin Liu0e8411e42018-05-09 18:06:44 +08001438 fnhe->fnhe_mtu_locked = false;
Timo Teräs2ffae992013-06-27 10:27:05 +03001439 fnhe_flush_routes(fnhe);
1440 orig = NULL;
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001441 }
Timo Teräs387aa652013-05-27 20:46:31 +00001442 fill_route_from_fnhe(rt, fnhe);
David Ahern1550c172019-04-05 16:30:27 -07001443 if (!rt->rt_gw4) {
1444 rt->rt_gw4 = daddr;
1445 rt->rt_gw_family = AF_INET;
1446 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001447
Wei Wanga4c2fd72017-06-17 10:42:42 -07001448 if (do_cache) {
Wei Wang08301062017-06-17 10:42:29 -07001449 dst_hold(&rt->dst);
Timo Teräs2ffae992013-06-27 10:27:05 +03001450 rcu_assign_pointer(*porig, rt);
Wei Wang08301062017-06-17 10:42:29 -07001451 if (orig) {
Wei Wang95c47f92017-06-17 10:42:30 -07001452 dst_dev_put(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001453 dst_release(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001454 }
Timo Teräs2ffae992013-06-27 10:27:05 +03001455 ret = true;
1456 }
David S. Millerc5038a82012-07-31 15:02:02 -07001457
1458 fnhe->fnhe_stamp = jiffies;
David S. Millerc5038a82012-07-31 15:02:02 -07001459 }
1460 spin_unlock_bh(&fnhe_lock);
David S. Millercaacf052012-07-31 15:06:50 -07001461
1462 return ret;
Eric Dumazet54764bb2012-07-31 01:08:23 +00001463}
1464
David Ahern87063a1f2019-04-30 07:45:49 -07001465static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt)
David S. Millerf2bb4be2012-07-17 12:20:47 -07001466{
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001467 struct rtable *orig, *prev, **p;
David S. Millercaacf052012-07-31 15:06:50 -07001468 bool ret = true;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001469
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001470 if (rt_is_input_route(rt)) {
David Ahern0f457a32019-04-30 07:45:48 -07001471 p = (struct rtable **)&nhc->nhc_rth_input;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001472 } else {
David Ahern0f457a32019-04-30 07:45:48 -07001473 p = (struct rtable **)raw_cpu_ptr(nhc->nhc_pcpu_rth_output);
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001474 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001475 orig = *p;
1476
Wei Wang08301062017-06-17 10:42:29 -07001477 /* hold dst before doing cmpxchg() to avoid race condition
1478 * on this dst
1479 */
1480 dst_hold(&rt->dst);
David S. Millerf2bb4be2012-07-17 12:20:47 -07001481 prev = cmpxchg(p, orig, rt);
1482 if (prev == orig) {
Wei Wang08301062017-06-17 10:42:29 -07001483 if (orig) {
Wei Wang95c47f92017-06-17 10:42:30 -07001484 dst_dev_put(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001485 dst_release(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001486 }
1487 } else {
1488 dst_release(&rt->dst);
David S. Millercaacf052012-07-31 15:06:50 -07001489 ret = false;
Wei Wang08301062017-06-17 10:42:29 -07001490 }
David S. Millercaacf052012-07-31 15:06:50 -07001491
1492 return ret;
1493}
1494
Eric Dumazet5055c372015-01-14 15:17:06 -08001495struct uncached_list {
1496 spinlock_t lock;
1497 struct list_head head;
1498};
1499
1500static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
David S. Millercaacf052012-07-31 15:06:50 -07001501
Xin Long510c3212018-02-14 19:06:02 +08001502void rt_add_uncached_list(struct rtable *rt)
David S. Millercaacf052012-07-31 15:06:50 -07001503{
Eric Dumazet5055c372015-01-14 15:17:06 -08001504 struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list);
1505
1506 rt->rt_uncached_list = ul;
1507
1508 spin_lock_bh(&ul->lock);
1509 list_add_tail(&rt->rt_uncached, &ul->head);
1510 spin_unlock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001511}
1512
Xin Long510c3212018-02-14 19:06:02 +08001513void rt_del_uncached_list(struct rtable *rt)
David S. Millercaacf052012-07-31 15:06:50 -07001514{
Eric Dumazet78df76a2012-08-24 05:40:47 +00001515 if (!list_empty(&rt->rt_uncached)) {
Eric Dumazet5055c372015-01-14 15:17:06 -08001516 struct uncached_list *ul = rt->rt_uncached_list;
1517
1518 spin_lock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001519 list_del(&rt->rt_uncached);
Eric Dumazet5055c372015-01-14 15:17:06 -08001520 spin_unlock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001521 }
1522}
1523
Xin Long510c3212018-02-14 19:06:02 +08001524static void ipv4_dst_destroy(struct dst_entry *dst)
1525{
Xin Long510c3212018-02-14 19:06:02 +08001526 struct rtable *rt = (struct rtable *)dst;
1527
David Ahern1620a332018-10-04 20:07:54 -07001528 ip_dst_metrics_put(dst);
Xin Long510c3212018-02-14 19:06:02 +08001529 rt_del_uncached_list(rt);
1530}
1531
David S. Millercaacf052012-07-31 15:06:50 -07001532void rt_flush_dev(struct net_device *dev)
1533{
Eric Dumazet5055c372015-01-14 15:17:06 -08001534 struct net *net = dev_net(dev);
1535 struct rtable *rt;
1536 int cpu;
David S. Millercaacf052012-07-31 15:06:50 -07001537
Eric Dumazet5055c372015-01-14 15:17:06 -08001538 for_each_possible_cpu(cpu) {
1539 struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
1540
1541 spin_lock_bh(&ul->lock);
1542 list_for_each_entry(rt, &ul->head, rt_uncached) {
David S. Millercaacf052012-07-31 15:06:50 -07001543 if (rt->dst.dev != dev)
1544 continue;
1545 rt->dst.dev = net->loopback_dev;
1546 dev_hold(rt->dst.dev);
1547 dev_put(dev);
1548 }
Eric Dumazet5055c372015-01-14 15:17:06 -08001549 spin_unlock_bh(&ul->lock);
David S. Miller4895c772012-07-17 04:19:00 -07001550 }
1551}
1552
Eric Dumazet4331deb2012-07-25 05:11:23 +00001553static bool rt_cache_valid(const struct rtable *rt)
David S. Millerd2d68ba92012-07-17 12:58:50 -07001554{
Eric Dumazet4331deb2012-07-25 05:11:23 +00001555 return rt &&
1556 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1557 !rt_is_expired(rt);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001558}
1559
David S. Millerf2bb4be2012-07-17 12:20:47 -07001560static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
David S. Miller5e2b61f2011-03-04 21:47:09 -08001561 const struct fib_result *res,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001562 struct fib_nh_exception *fnhe,
Wei Wanga4c2fd72017-06-17 10:42:42 -07001563 struct fib_info *fi, u16 type, u32 itag,
1564 const bool do_cache)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001565{
David S. Millercaacf052012-07-31 15:06:50 -07001566 bool cached = false;
1567
Linus Torvalds1da177e2005-04-16 15:20:36 -07001568 if (fi) {
David Aherneba618a2019-04-02 14:11:55 -07001569 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
David S. Miller4895c772012-07-17 04:19:00 -07001570
David Ahern0f5f7d72019-04-05 16:30:29 -07001571 if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) {
1572 rt->rt_gw_family = nhc->nhc_gw_family;
1573 /* only INET and INET6 are supported */
1574 if (likely(nhc->nhc_gw_family == AF_INET))
1575 rt->rt_gw4 = nhc->nhc_gw.ipv4;
1576 else
1577 rt->rt_gw6 = nhc->nhc_gw.ipv6;
Julian Anastasov155e8332012-10-08 11:41:18 +00001578 }
David Ahern0f5f7d72019-04-05 16:30:29 -07001579
David Aherne1255ed2018-10-04 20:07:53 -07001580 ip_dst_init_metrics(&rt->dst, fi->fib_metrics);
1581
Patrick McHardyc7066f72011-01-14 13:36:42 +01001582#ifdef CONFIG_IP_ROUTE_CLASSID
David Ahern87063a1f2019-04-30 07:45:49 -07001583 {
1584 struct fib_nh *nh;
1585
1586 nh = container_of(nhc, struct fib_nh, nh_common);
1587 rt->dst.tclassid = nh->nh_tclassid;
1588 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001589#endif
David Ahern87063a1f2019-04-30 07:45:49 -07001590 rt->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);
David S. Millerc5038a82012-07-31 15:02:02 -07001591 if (unlikely(fnhe))
Wei Wanga4c2fd72017-06-17 10:42:42 -07001592 cached = rt_bind_exception(rt, fnhe, daddr, do_cache);
1593 else if (do_cache)
David Ahern87063a1f2019-04-30 07:45:49 -07001594 cached = rt_cache_route(nhc, rt);
Julian Anastasov155e8332012-10-08 11:41:18 +00001595 if (unlikely(!cached)) {
1596 /* Routes we intend to cache in nexthop exception or
1597 * FIB nexthop have the DST_NOCACHE bit clear.
1598 * However, if we are unsuccessful at storing this
1599 * route into the cache we really need to set it.
1600 */
David Ahern1550c172019-04-05 16:30:27 -07001601 if (!rt->rt_gw4) {
1602 rt->rt_gw_family = AF_INET;
1603 rt->rt_gw4 = daddr;
1604 }
Julian Anastasov155e8332012-10-08 11:41:18 +00001605 rt_add_uncached_list(rt);
1606 }
1607 } else
David S. Millercaacf052012-07-31 15:06:50 -07001608 rt_add_uncached_list(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001609
Patrick McHardyc7066f72011-01-14 13:36:42 +01001610#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001611#ifdef CONFIG_IP_MULTIPLE_TABLES
David S. Miller85b91b02012-07-13 08:21:29 -07001612 set_class_tag(rt, res->tclassid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001613#endif
1614 set_class_tag(rt, itag);
1615#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001616}
1617
David Ahern9ab179d2016-04-07 11:10:06 -07001618struct rtable *rt_dst_alloc(struct net_device *dev,
1619 unsigned int flags, u16 type,
1620 bool nopolicy, bool noxfrm, bool will_cache)
David S. Miller0c4dcd52011-02-17 15:42:37 -08001621{
David Ahernd08c4f32015-09-02 13:58:34 -07001622 struct rtable *rt;
1623
1624 rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
Wei Wanga4c2fd72017-06-17 10:42:42 -07001625 (will_cache ? 0 : DST_HOST) |
David Ahernd08c4f32015-09-02 13:58:34 -07001626 (nopolicy ? DST_NOPOLICY : 0) |
Wei Wangb2a9c0e2017-06-17 10:42:41 -07001627 (noxfrm ? DST_NOXFRM : 0));
David Ahernd08c4f32015-09-02 13:58:34 -07001628
1629 if (rt) {
1630 rt->rt_genid = rt_genid_ipv4(dev_net(dev));
1631 rt->rt_flags = flags;
1632 rt->rt_type = type;
1633 rt->rt_is_input = 0;
1634 rt->rt_iif = 0;
1635 rt->rt_pmtu = 0;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001636 rt->rt_mtu_locked = 0;
David Ahern1550c172019-04-05 16:30:27 -07001637 rt->rt_gw_family = 0;
1638 rt->rt_gw4 = 0;
David Ahernd08c4f32015-09-02 13:58:34 -07001639 INIT_LIST_HEAD(&rt->rt_uncached);
1640
1641 rt->dst.output = ip_output;
1642 if (flags & RTCF_LOCAL)
1643 rt->dst.input = ip_local_deliver;
1644 }
1645
1646 return rt;
David S. Miller0c4dcd52011-02-17 15:42:37 -08001647}
David Ahern9ab179d2016-04-07 11:10:06 -07001648EXPORT_SYMBOL(rt_dst_alloc);
David S. Miller0c4dcd52011-02-17 15:42:37 -08001649
Stephen Suryaputra5b18f122019-06-26 02:21:16 -04001650struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt)
1651{
1652 struct rtable *new_rt;
1653
1654 new_rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
1655 rt->dst.flags);
1656
1657 if (new_rt) {
1658 new_rt->rt_genid = rt_genid_ipv4(dev_net(dev));
1659 new_rt->rt_flags = rt->rt_flags;
1660 new_rt->rt_type = rt->rt_type;
1661 new_rt->rt_is_input = rt->rt_is_input;
1662 new_rt->rt_iif = rt->rt_iif;
1663 new_rt->rt_pmtu = rt->rt_pmtu;
1664 new_rt->rt_mtu_locked = rt->rt_mtu_locked;
1665 new_rt->rt_gw_family = rt->rt_gw_family;
1666 if (rt->rt_gw_family == AF_INET)
1667 new_rt->rt_gw4 = rt->rt_gw4;
1668 else if (rt->rt_gw_family == AF_INET6)
1669 new_rt->rt_gw6 = rt->rt_gw6;
1670 INIT_LIST_HEAD(&new_rt->rt_uncached);
1671
1672 new_rt->dst.flags |= DST_HOST;
1673 new_rt->dst.input = rt->dst.input;
1674 new_rt->dst.output = rt->dst.output;
1675 new_rt->dst.error = rt->dst.error;
1676 new_rt->dst.lastuse = jiffies;
1677 new_rt->dst.lwtstate = lwtstate_get(rt->dst.lwtstate);
1678 }
1679 return new_rt;
1680}
1681EXPORT_SYMBOL(rt_dst_clone);
1682
Eric Dumazet96d36222010-06-02 19:21:31 +00001683/* called in rcu_read_lock() section */
Paolo Abenibc044e82017-09-28 15:51:37 +02001684int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1685 u8 tos, struct net_device *dev,
1686 struct in_device *in_dev, u32 *itag)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001687{
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001688 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001689
1690 /* Primary sanity checks. */
Ian Morris51456b22015-04-03 09:17:26 +01001691 if (!in_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001692 return -EINVAL;
1693
Jan Engelhardt1e637c72008-01-21 03:18:08 -08001694 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
Thomas Grafd0daebc32012-06-12 00:44:01 +00001695 skb->protocol != htons(ETH_P_IP))
Paolo Abenibc044e82017-09-28 15:51:37 +02001696 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697
Alexander Duyck75fea732015-09-28 11:10:38 -07001698 if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
Paolo Abenibc044e82017-09-28 15:51:37 +02001699 return -EINVAL;
Thomas Grafd0daebc32012-06-12 00:44:01 +00001700
Joe Perchesf97c1e02007-12-16 13:45:43 -08001701 if (ipv4_is_zeronet(saddr)) {
Edward Chron1d2f4eb2019-01-31 15:00:40 -08001702 if (!ipv4_is_local_multicast(daddr) &&
1703 ip_hdr(skb)->protocol != IPPROTO_IGMP)
Paolo Abenibc044e82017-09-28 15:51:37 +02001704 return -EINVAL;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001705 } else {
David S. Miller9e56e382012-06-28 18:54:02 -07001706 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
Paolo Abenibc044e82017-09-28 15:51:37 +02001707 in_dev, itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001708 if (err < 0)
Paolo Abenibc044e82017-09-28 15:51:37 +02001709 return err;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001710 }
Paolo Abenibc044e82017-09-28 15:51:37 +02001711 return 0;
1712}
1713
1714/* called in rcu_read_lock() section */
1715static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1716 u8 tos, struct net_device *dev, int our)
1717{
1718 struct in_device *in_dev = __in_dev_get_rcu(dev);
1719 unsigned int flags = RTCF_MULTICAST;
1720 struct rtable *rth;
1721 u32 itag = 0;
1722 int err;
1723
1724 err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag);
1725 if (err)
1726 return err;
1727
David Ahernd08c4f32015-09-02 13:58:34 -07001728 if (our)
1729 flags |= RTCF_LOCAL;
1730
1731 rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001732 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001733 if (!rth)
Paolo Abenibc044e82017-09-28 15:51:37 +02001734 return -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001735
Patrick McHardyc7066f72011-01-14 13:36:42 +01001736#ifdef CONFIG_IP_ROUTE_CLASSID
Changli Gaod8d1f302010-06-10 23:31:35 -07001737 rth->dst.tclassid = itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001738#endif
David S. Millercf911662011-04-28 14:31:47 -07001739 rth->dst.output = ip_rt_bug;
David S. Miller9917e1e82012-07-17 14:44:26 -07001740 rth->rt_is_input= 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001741
1742#ifdef CONFIG_IP_MROUTE
Joe Perchesf97c1e02007-12-16 13:45:43 -08001743 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
Changli Gaod8d1f302010-06-10 23:31:35 -07001744 rth->dst.input = ip_mr_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001745#endif
1746 RT_CACHE_STAT_INC(in_slow_mc);
1747
David S. Miller89aef892012-07-17 11:00:09 -07001748 skb_dst_set(skb, &rth->dst);
1749 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001750}
1751
1752
1753static void ip_handle_martian_source(struct net_device *dev,
1754 struct in_device *in_dev,
1755 struct sk_buff *skb,
Al Viro9e12bb22006-09-26 21:25:20 -07001756 __be32 daddr,
1757 __be32 saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001758{
1759 RT_CACHE_STAT_INC(in_martian_src);
1760#ifdef CONFIG_IP_ROUTE_VERBOSE
1761 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1762 /*
1763 * RFC1812 recommendation, if source is martian,
1764 * the only hint is MAC header.
1765 */
Joe Perches058bd4d2012-03-11 18:36:11 +00001766 pr_warn("martian source %pI4 from %pI4, on dev %s\n",
Harvey Harrison673d57e2008-10-31 00:53:57 -07001767 &daddr, &saddr, dev->name);
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001768 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
Joe Perches058bd4d2012-03-11 18:36:11 +00001769 print_hex_dump(KERN_WARNING, "ll header: ",
1770 DUMP_PREFIX_OFFSET, 16, 1,
1771 skb_mac_header(skb),
David S. Millerb2c85102018-11-20 10:15:36 -08001772 dev->hard_header_len, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001773 }
1774 }
1775#endif
1776}
1777
Eric Dumazet47360222010-06-03 04:13:21 +00001778/* called in rcu_read_lock() section */
Stephen Hemminger5969f712008-04-10 01:52:09 -07001779static int __mkroute_input(struct sk_buff *skb,
David S. Miller982721f2011-02-16 21:44:24 -08001780 const struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001781 struct in_device *in_dev,
David S. Millerc6cffba2012-07-26 11:14:38 +00001782 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001783{
David Aherneba618a2019-04-02 14:11:55 -07001784 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
1785 struct net_device *dev = nhc->nhc_dev;
Timo Teräs2ffae992013-06-27 10:27:05 +03001786 struct fib_nh_exception *fnhe;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001787 struct rtable *rth;
1788 int err;
1789 struct in_device *out_dev;
David S. Millerd2d68ba92012-07-17 12:58:50 -07001790 bool do_cache;
Li RongQingfbdc0ad2014-05-22 16:36:55 +08001791 u32 itag = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001792
1793 /* get a working reference to the output device */
David Aherneba618a2019-04-02 14:11:55 -07001794 out_dev = __in_dev_get_rcu(dev);
Ian Morris51456b22015-04-03 09:17:26 +01001795 if (!out_dev) {
Joe Perchese87cc472012-05-13 21:56:26 +00001796 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001797 return -EINVAL;
1798 }
1799
Michael Smith5c04c812011-04-07 04:51:50 +00001800 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
David S. Miller9e56e382012-06-28 18:54:02 -07001801 in_dev->dev, in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802 if (err < 0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001803 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001804 saddr);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001805
Linus Torvalds1da177e2005-04-16 15:20:36 -07001806 goto cleanup;
1807 }
1808
Julian Anastasove81da0e2012-10-08 11:41:15 +00001809 do_cache = res->fi && !itag;
1810 if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
David Aherneba618a2019-04-02 14:11:55 -07001811 skb->protocol == htons(ETH_P_IP)) {
David Ahernbdf00462019-04-05 16:30:26 -07001812 __be32 gw;
David Aherneba618a2019-04-02 14:11:55 -07001813
David Ahernbdf00462019-04-05 16:30:26 -07001814 gw = nhc->nhc_gw_family == AF_INET ? nhc->nhc_gw.ipv4 : 0;
David Aherneba618a2019-04-02 14:11:55 -07001815 if (IN_DEV_SHARED_MEDIA(out_dev) ||
1816 inet_addr_onlink(out_dev, saddr, gw))
1817 IPCB(skb)->flags |= IPSKB_DOREDIRECT;
1818 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001819
1820 if (skb->protocol != htons(ETH_P_IP)) {
1821 /* Not IP (i.e. ARP). Do not create route, if it is
1822 * invalid for proxy arp. DNAT routes are always valid.
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001823 *
1824 * Proxy arp feature have been extended to allow, ARP
1825 * replies back to the same interface, to support
1826 * Private VLAN switch technologies. See arp.c.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001827 */
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001828 if (out_dev == in_dev &&
1829 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001830 err = -EINVAL;
1831 goto cleanup;
1832 }
1833 }
1834
David Aherna5995e72019-04-30 07:45:50 -07001835 fnhe = find_exception(nhc, daddr);
Julian Anastasove81da0e2012-10-08 11:41:15 +00001836 if (do_cache) {
Julian Anastasov94720e32018-05-02 09:41:19 +03001837 if (fnhe)
Timo Teräs2ffae992013-06-27 10:27:05 +03001838 rth = rcu_dereference(fnhe->fnhe_rth_input);
Julian Anastasov94720e32018-05-02 09:41:19 +03001839 else
David Ahern0f457a32019-04-30 07:45:48 -07001840 rth = rcu_dereference(nhc->nhc_rth_input);
Julian Anastasove81da0e2012-10-08 11:41:15 +00001841 if (rt_cache_valid(rth)) {
1842 skb_dst_set_noref(skb, &rth->dst);
1843 goto out;
David S. Millerd2d68ba92012-07-17 12:58:50 -07001844 }
1845 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001846
David Ahernd08c4f32015-09-02 13:58:34 -07001847 rth = rt_dst_alloc(out_dev->dev, 0, res->type,
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001848 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerd2d68ba92012-07-17 12:58:50 -07001849 IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001850 if (!rth) {
1851 err = -ENOBUFS;
1852 goto cleanup;
1853 }
1854
David S. Miller9917e1e82012-07-17 14:44:26 -07001855 rth->rt_is_input = 1;
Duan Jionga6254862014-02-17 15:23:43 +08001856 RT_CACHE_STAT_INC(in_slow_tot);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001857
Changli Gaod8d1f302010-06-10 23:31:35 -07001858 rth->dst.input = ip_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001859
Wei Wanga4c2fd72017-06-17 10:42:42 -07001860 rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag,
1861 do_cache);
David Ahern99428952018-02-13 20:32:04 -08001862 lwtunnel_set_redirect(&rth->dst);
David S. Millerc6cffba2012-07-26 11:14:38 +00001863 skb_dst_set(skb, &rth->dst);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001864out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001865 err = 0;
1866 cleanup:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001867 return err;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001868}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001869
Peter Nørlund79a13152015-09-30 10:12:22 +02001870#ifdef CONFIG_IP_ROUTE_MULTIPATH
Peter Nørlund79a13152015-09-30 10:12:22 +02001871/* To make ICMP packets follow the right flow, the multipath hash is
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001872 * calculated from the inner IP addresses.
Peter Nørlund79a13152015-09-30 10:12:22 +02001873 */
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001874static void ip_multipath_l3_keys(const struct sk_buff *skb,
1875 struct flow_keys *hash_keys)
Peter Nørlund79a13152015-09-30 10:12:22 +02001876{
1877 const struct iphdr *outer_iph = ip_hdr(skb);
David Ahern6f74b6c2018-03-02 08:32:13 -08001878 const struct iphdr *key_iph = outer_iph;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001879 const struct iphdr *inner_iph;
Peter Nørlund79a13152015-09-30 10:12:22 +02001880 const struct icmphdr *icmph;
1881 struct iphdr _inner_iph;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001882 struct icmphdr _icmph;
1883
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001884 if (likely(outer_iph->protocol != IPPROTO_ICMP))
David Ahern6f74b6c2018-03-02 08:32:13 -08001885 goto out;
Peter Nørlund79a13152015-09-30 10:12:22 +02001886
1887 if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
David Ahern6f74b6c2018-03-02 08:32:13 -08001888 goto out;
Peter Nørlund79a13152015-09-30 10:12:22 +02001889
1890 icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
1891 &_icmph);
1892 if (!icmph)
David Ahern6f74b6c2018-03-02 08:32:13 -08001893 goto out;
Peter Nørlund79a13152015-09-30 10:12:22 +02001894
1895 if (icmph->type != ICMP_DEST_UNREACH &&
1896 icmph->type != ICMP_REDIRECT &&
1897 icmph->type != ICMP_TIME_EXCEEDED &&
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001898 icmph->type != ICMP_PARAMETERPROB)
David Ahern6f74b6c2018-03-02 08:32:13 -08001899 goto out;
Peter Nørlund79a13152015-09-30 10:12:22 +02001900
1901 inner_iph = skb_header_pointer(skb,
1902 outer_iph->ihl * 4 + sizeof(_icmph),
1903 sizeof(_inner_iph), &_inner_iph);
1904 if (!inner_iph)
David Ahern6f74b6c2018-03-02 08:32:13 -08001905 goto out;
1906
1907 key_iph = inner_iph;
1908out:
1909 hash_keys->addrs.v4addrs.src = key_iph->saddr;
1910 hash_keys->addrs.v4addrs.dst = key_iph->daddr;
Peter Nørlund79a13152015-09-30 10:12:22 +02001911}
1912
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001913/* if skb is set it will be used and fl4 can be NULL */
David Ahern7efc0b62018-03-02 08:32:12 -08001914int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001915 const struct sk_buff *skb, struct flow_keys *flkeys)
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001916{
Ido Schimmel2a8e4992019-03-01 13:38:43 +00001917 u32 multipath_hash = fl4 ? fl4->flowi4_multipath_hash : 0;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001918 struct flow_keys hash_keys;
1919 u32 mhash;
1920
1921 switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
1922 case 0:
1923 memset(&hash_keys, 0, sizeof(hash_keys));
1924 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1925 if (skb) {
1926 ip_multipath_l3_keys(skb, &hash_keys);
1927 } else {
1928 hash_keys.addrs.v4addrs.src = fl4->saddr;
1929 hash_keys.addrs.v4addrs.dst = fl4->daddr;
1930 }
1931 break;
1932 case 1:
1933 /* skb is currently provided only when forwarding */
1934 if (skb) {
1935 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
1936 struct flow_keys keys;
1937
1938 /* short-circuit if we already have L4 hash present */
1939 if (skb->l4_hash)
1940 return skb_get_hash_raw(skb) >> 1;
David Ahernec7127a2018-03-02 08:32:14 -08001941
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001942 memset(&hash_keys, 0, sizeof(hash_keys));
David Ahern1fe4b112018-02-21 11:00:54 -08001943
David Ahernec7127a2018-03-02 08:32:14 -08001944 if (!flkeys) {
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001945 skb_flow_dissect_flow_keys(skb, &keys, flag);
David Ahernec7127a2018-03-02 08:32:14 -08001946 flkeys = &keys;
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001947 }
David Ahernec7127a2018-03-02 08:32:14 -08001948
1949 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1950 hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
1951 hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
1952 hash_keys.ports.src = flkeys->ports.src;
1953 hash_keys.ports.dst = flkeys->ports.dst;
1954 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001955 } else {
1956 memset(&hash_keys, 0, sizeof(hash_keys));
1957 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1958 hash_keys.addrs.v4addrs.src = fl4->saddr;
1959 hash_keys.addrs.v4addrs.dst = fl4->daddr;
1960 hash_keys.ports.src = fl4->fl4_sport;
1961 hash_keys.ports.dst = fl4->fl4_dport;
1962 hash_keys.basic.ip_proto = fl4->flowi4_proto;
1963 }
1964 break;
1965 }
1966 mhash = flow_hash_from_keys(&hash_keys);
1967
wenxu24ba1442019-02-24 11:36:20 +08001968 if (multipath_hash)
1969 mhash = jhash_2words(mhash, multipath_hash, 0);
1970
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001971 return mhash >> 1;
1972}
Peter Nørlund79a13152015-09-30 10:12:22 +02001973#endif /* CONFIG_IP_ROUTE_MULTIPATH */
1974
Stephen Hemminger5969f712008-04-10 01:52:09 -07001975static int ip_mkroute_input(struct sk_buff *skb,
1976 struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001977 struct in_device *in_dev,
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001978 __be32 daddr, __be32 saddr, u32 tos,
1979 struct flow_keys *hkeys)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001980{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001981#ifdef CONFIG_IP_ROUTE_MULTIPATH
Peter Nørlund0e884c72015-09-30 10:12:21 +02001982 if (res->fi && res->fi->fib_nhs > 1) {
David Ahern7efc0b62018-03-02 08:32:12 -08001983 int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
Peter Nørlund0e884c72015-09-30 10:12:21 +02001984
Peter Nørlund0e884c72015-09-30 10:12:21 +02001985 fib_select_multipath(res, h);
1986 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001987#endif
1988
1989 /* create a routing cache entry */
David S. Millerc6cffba2012-07-26 11:14:38 +00001990 return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001991}
1992
Linus Torvalds1da177e2005-04-16 15:20:36 -07001993/*
1994 * NOTE. We drop all the packets that has local source
1995 * addresses, because every properly looped back packet
1996 * must have correct destination already attached by output routine.
1997 *
1998 * Such approach solves two big problems:
1999 * 1. Not simplex devices are handled properly.
2000 * 2. IP spoofing attempts are filtered with 100% of guarantee.
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002001 * called with rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -07002002 */
2003
Al Viro9e12bb22006-09-26 21:25:20 -07002004static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
David Ahern5510cdf2017-05-25 10:42:34 -07002005 u8 tos, struct net_device *dev,
2006 struct fib_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002007{
Eric Dumazet96d36222010-06-02 19:21:31 +00002008 struct in_device *in_dev = __in_dev_get_rcu(dev);
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002009 struct flow_keys *flkeys = NULL, _flkeys;
2010 struct net *net = dev_net(dev);
Thomas Graf1b7179d2015-07-21 10:43:59 +02002011 struct ip_tunnel_info *tun_info;
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002012 int err = -EINVAL;
Eric Dumazet95c96172012-04-15 05:58:06 +00002013 unsigned int flags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002014 u32 itag = 0;
Eric Dumazet95c96172012-04-15 05:58:06 +00002015 struct rtable *rth;
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002016 struct flowi4 fl4;
Xin Long0a904782019-06-02 19:10:24 +08002017 bool do_cache = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002018
2019 /* IP on this device is disabled. */
2020
2021 if (!in_dev)
2022 goto out;
2023
2024 /* Check for the most weird martians, which can be not detected
2025 by fib_lookup.
2026 */
2027
Jiri Benc61adedf2015-08-20 13:56:25 +02002028 tun_info = skb_tunnel_info(skb);
Jiri Benc46fa0622015-08-28 20:48:19 +02002029 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
Thomas Graf1b7179d2015-07-21 10:43:59 +02002030 fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id;
2031 else
2032 fl4.flowi4_tun_key.tun_id = 0;
Thomas Graff38a9eb2015-07-21 10:43:56 +02002033 skb_dst_drop(skb);
2034
Thomas Grafd0daebc32012-06-12 00:44:01 +00002035 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002036 goto martian_source;
2037
David Ahern5510cdf2017-05-25 10:42:34 -07002038 res->fi = NULL;
2039 res->table = NULL;
Andy Walls27a954b2010-10-17 15:11:22 +00002040 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002041 goto brd_input;
2042
2043 /* Accept zero addresses only to limited broadcast;
2044 * I even do not know to fix it or not. Waiting for complains :-)
2045 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08002046 if (ipv4_is_zeronet(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002047 goto martian_source;
2048
Thomas Grafd0daebc32012-06-12 00:44:01 +00002049 if (ipv4_is_zeronet(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002050 goto martian_destination;
2051
Eric Dumazet9eb43e72012-08-03 21:27:25 +00002052 /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
2053 * and call it once if daddr or/and saddr are loopback addresses
2054 */
2055 if (ipv4_is_loopback(daddr)) {
2056 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00002057 goto martian_destination;
Eric Dumazet9eb43e72012-08-03 21:27:25 +00002058 } else if (ipv4_is_loopback(saddr)) {
2059 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00002060 goto martian_source;
2061 }
2062
Linus Torvalds1da177e2005-04-16 15:20:36 -07002063 /*
2064 * Now we are ready to route packet.
2065 */
David S. Miller68a5e3d2011-03-11 20:07:33 -05002066 fl4.flowi4_oif = 0;
David Aherne0d56fd2016-09-10 12:09:57 -07002067 fl4.flowi4_iif = dev->ifindex;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002068 fl4.flowi4_mark = skb->mark;
2069 fl4.flowi4_tos = tos;
2070 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
David Ahernb84f7872015-09-29 19:07:07 -07002071 fl4.flowi4_flags = 0;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002072 fl4.daddr = daddr;
2073 fl4.saddr = saddr;
Julian Anastasov8bcfd092017-02-26 15:50:52 +02002074 fl4.flowi4_uid = sock_net_uid(net, NULL);
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002075
David Ahern5a847a62018-05-16 13:36:40 -07002076 if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) {
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002077 flkeys = &_flkeys;
David Ahern5a847a62018-05-16 13:36:40 -07002078 } else {
2079 fl4.flowi4_proto = 0;
2080 fl4.fl4_sport = 0;
2081 fl4.fl4_dport = 0;
2082 }
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002083
David Ahern5510cdf2017-05-25 10:42:34 -07002084 err = fib_lookup(net, &fl4, res, 0);
Duan Jiongcd0f0b92014-02-14 18:26:22 +08002085 if (err != 0) {
2086 if (!IN_DEV_FORWARD(in_dev))
2087 err = -EHOSTUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002088 goto no_route;
Duan Jiongcd0f0b92014-02-14 18:26:22 +08002089 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002090
Xin Long5cbf7772018-07-27 16:37:28 +08002091 if (res->type == RTN_BROADCAST) {
2092 if (IN_DEV_BFORWARD(in_dev))
2093 goto make_route;
Xin Long0a904782019-06-02 19:10:24 +08002094 /* not do cache if bc_forwarding is enabled */
2095 if (IPV4_DEVCONF_ALL(net, BC_FORWARDING))
2096 do_cache = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002097 goto brd_input;
Xin Long5cbf7772018-07-27 16:37:28 +08002098 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002099
David Ahern5510cdf2017-05-25 10:42:34 -07002100 if (res->type == RTN_LOCAL) {
Michael Smith5c04c812011-04-07 04:51:50 +00002101 err = fib_validate_source(skb, saddr, daddr, tos,
Cong Wang0d5edc62014-04-15 16:25:35 -07002102 0, dev, in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00002103 if (err < 0)
David Ahern0d753962015-09-28 11:10:44 -07002104 goto martian_source;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002105 goto local_input;
2106 }
2107
Duan Jiongcd0f0b92014-02-14 18:26:22 +08002108 if (!IN_DEV_FORWARD(in_dev)) {
2109 err = -EHOSTUNREACH;
David S. Miller251da412012-06-26 16:27:09 -07002110 goto no_route;
Duan Jiongcd0f0b92014-02-14 18:26:22 +08002111 }
David Ahern5510cdf2017-05-25 10:42:34 -07002112 if (res->type != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002113 goto martian_destination;
2114
Xin Long5cbf7772018-07-27 16:37:28 +08002115make_route:
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002116 err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002117out: return err;
2118
2119brd_input:
2120 if (skb->protocol != htons(ETH_P_IP))
2121 goto e_inval;
2122
David S. Miller41347dc2012-06-28 04:05:27 -07002123 if (!ipv4_is_zeronet(saddr)) {
David S. Miller9e56e382012-06-28 18:54:02 -07002124 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
2125 in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002126 if (err < 0)
David Ahern0d753962015-09-28 11:10:44 -07002127 goto martian_source;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002128 }
2129 flags |= RTCF_BROADCAST;
David Ahern5510cdf2017-05-25 10:42:34 -07002130 res->type = RTN_BROADCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002131 RT_CACHE_STAT_INC(in_brd);
2132
2133local_input:
Xin Long0a904782019-06-02 19:10:24 +08002134 do_cache &= res->fi && !itag;
2135 if (do_cache) {
2136 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
David Aherneba618a2019-04-02 14:11:55 -07002137
Xin Long0a904782019-06-02 19:10:24 +08002138 rth = rcu_dereference(nhc->nhc_rth_input);
2139 if (rt_cache_valid(rth)) {
2140 skb_dst_set_noref(skb, &rth->dst);
2141 err = 0;
2142 goto out;
David S. Millerd2d68ba92012-07-17 12:58:50 -07002143 }
2144 }
2145
David Ahernf5a0aab2016-12-29 15:29:03 -08002146 rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
David Ahern5510cdf2017-05-25 10:42:34 -07002147 flags | RTCF_LOCAL, res->type,
David S. Millerd2d68ba92012-07-17 12:58:50 -07002148 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002149 if (!rth)
2150 goto e_nobufs;
2151
Changli Gaod8d1f302010-06-10 23:31:35 -07002152 rth->dst.output= ip_rt_bug;
David S. Millercf911662011-04-28 14:31:47 -07002153#ifdef CONFIG_IP_ROUTE_CLASSID
2154 rth->dst.tclassid = itag;
2155#endif
David S. Miller9917e1e82012-07-17 14:44:26 -07002156 rth->rt_is_input = 1;
Roopa Prabhu571e7222015-07-21 10:43:47 +02002157
Duan Jionga6254862014-02-17 15:23:43 +08002158 RT_CACHE_STAT_INC(in_slow_tot);
David Ahern5510cdf2017-05-25 10:42:34 -07002159 if (res->type == RTN_UNREACHABLE) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002160 rth->dst.input= ip_error;
2161 rth->dst.error= -err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002162 rth->rt_flags &= ~RTCF_LOCAL;
2163 }
Thomas Grafefd85702016-11-30 17:10:09 +01002164
Alexei Starovoitovdcdfdf52013-11-19 19:12:34 -08002165 if (do_cache) {
David Aherneba618a2019-04-02 14:11:55 -07002166 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
Thomas Grafefd85702016-11-30 17:10:09 +01002167
David Aherneba618a2019-04-02 14:11:55 -07002168 rth->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);
Thomas Grafefd85702016-11-30 17:10:09 +01002169 if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
2170 WARN_ON(rth->dst.input == lwtunnel_input);
2171 rth->dst.lwtstate->orig_input = rth->dst.input;
2172 rth->dst.input = lwtunnel_input;
2173 }
2174
David Ahern87063a1f2019-04-30 07:45:49 -07002175 if (unlikely(!rt_cache_route(nhc, rth)))
Alexei Starovoitovdcdfdf52013-11-19 19:12:34 -08002176 rt_add_uncached_list(rth);
Alexei Starovoitovdcdfdf52013-11-19 19:12:34 -08002177 }
David S. Miller89aef892012-07-17 11:00:09 -07002178 skb_dst_set(skb, &rth->dst);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002179 err = 0;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002180 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002181
2182no_route:
2183 RT_CACHE_STAT_INC(in_no_route);
David Ahern5510cdf2017-05-25 10:42:34 -07002184 res->type = RTN_UNREACHABLE;
2185 res->fi = NULL;
2186 res->table = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002187 goto local_input;
2188
2189 /*
2190 * Do not cache martian addresses: they should be logged (RFC1812)
2191 */
2192martian_destination:
2193 RT_CACHE_STAT_INC(in_martian_dst);
2194#ifdef CONFIG_IP_ROUTE_VERBOSE
Joe Perchese87cc472012-05-13 21:56:26 +00002195 if (IN_DEV_LOG_MARTIANS(in_dev))
2196 net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
2197 &daddr, &saddr, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002198#endif
Dietmar Eggemann2c2910a2005-06-28 13:06:23 -07002199
Linus Torvalds1da177e2005-04-16 15:20:36 -07002200e_inval:
2201 err = -EINVAL;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002202 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002203
2204e_nobufs:
2205 err = -ENOBUFS;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002206 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002207
2208martian_source:
2209 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002210 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002211}
2212
David S. Millerc6cffba2012-07-26 11:14:38 +00002213int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2214 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002215{
David Ahern5510cdf2017-05-25 10:42:34 -07002216 struct fib_result res;
2217 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002218
Julian Anastasov6e280992017-02-26 17:14:35 +02002219 tos &= IPTOS_RT_MASK;
Eric Dumazet96d36222010-06-02 19:21:31 +00002220 rcu_read_lock();
David Ahern5510cdf2017-05-25 10:42:34 -07002221 err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res);
2222 rcu_read_unlock();
Eric Dumazet96d36222010-06-02 19:21:31 +00002223
David Ahern5510cdf2017-05-25 10:42:34 -07002224 return err;
2225}
2226EXPORT_SYMBOL(ip_route_input_noref);
2227
2228/* called with rcu_read_lock held */
2229int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2230 u8 tos, struct net_device *dev, struct fib_result *res)
2231{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002232 /* Multicast recognition logic is moved from route cache to here.
2233 The problem was that too many Ethernet cards have broken/missing
2234 hardware multicast filters :-( As result the host on multicasting
2235 network acquires a lot of useless route cache entries, sort of
2236 SDR messages from all the world. Now we try to get rid of them.
2237 Really, provided software IP multicast filter is organized
2238 reasonably (at least, hashed), it does not result in a slowdown
2239 comparing with route cache reject entries.
2240 Note, that multicast routers are not affected, because
2241 route cache entry is created eventually.
2242 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08002243 if (ipv4_is_multicast(daddr)) {
Eric Dumazet96d36222010-06-02 19:21:31 +00002244 struct in_device *in_dev = __in_dev_get_rcu(dev);
David Aherne58e4152016-10-31 15:54:00 -07002245 int our = 0;
David Ahern5510cdf2017-05-25 10:42:34 -07002246 int err = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002247
Paolo Abeni22c74762019-03-06 10:42:53 +01002248 if (!in_dev)
2249 return err;
2250 our = ip_check_mc_rcu(in_dev, daddr, saddr,
2251 ip_hdr(skb)->protocol);
David Aherne58e4152016-10-31 15:54:00 -07002252
2253 /* check l3 master if no match yet */
Paolo Abeni22c74762019-03-06 10:42:53 +01002254 if (!our && netif_is_l3_slave(dev)) {
David Aherne58e4152016-10-31 15:54:00 -07002255 struct in_device *l3_in_dev;
2256
2257 l3_in_dev = __in_dev_get_rcu(skb->dev);
2258 if (l3_in_dev)
2259 our = ip_check_mc_rcu(l3_in_dev, daddr, saddr,
2260 ip_hdr(skb)->protocol);
2261 }
2262
David Aherne58e4152016-10-31 15:54:00 -07002263 if (our
Linus Torvalds1da177e2005-04-16 15:20:36 -07002264#ifdef CONFIG_IP_MROUTE
David Aherne58e4152016-10-31 15:54:00 -07002265 ||
2266 (!ipv4_is_local_multicast(daddr) &&
2267 IN_DEV_MFORWARD(in_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002268#endif
David Aherne58e4152016-10-31 15:54:00 -07002269 ) {
David Ahern5510cdf2017-05-25 10:42:34 -07002270 err = ip_route_input_mc(skb, daddr, saddr,
David Aherne58e4152016-10-31 15:54:00 -07002271 tos, dev, our);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002272 }
David Ahern5510cdf2017-05-25 10:42:34 -07002273 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002274 }
David Ahern5510cdf2017-05-25 10:42:34 -07002275
2276 return ip_route_input_slow(skb, daddr, saddr, tos, dev, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002277}
2278
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002279/* called with rcu_read_lock() */
David S. Miller982721f2011-02-16 21:44:24 -08002280static struct rtable *__mkroute_output(const struct fib_result *res,
David Miller1a00fee2012-07-01 02:02:56 +00002281 const struct flowi4 *fl4, int orig_oif,
Julian Anastasovf61759e2011-12-02 11:39:42 +00002282 struct net_device *dev_out,
David S. Miller5ada5522011-02-17 15:29:00 -08002283 unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002284{
David S. Miller982721f2011-02-16 21:44:24 -08002285 struct fib_info *fi = res->fi;
David S. Millerf2bb4be2012-07-17 12:20:47 -07002286 struct fib_nh_exception *fnhe;
David S. Miller5ada5522011-02-17 15:29:00 -08002287 struct in_device *in_dev;
David S. Miller982721f2011-02-16 21:44:24 -08002288 u16 type = res->type;
David S. Miller5ada5522011-02-17 15:29:00 -08002289 struct rtable *rth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00002290 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002291
Thomas Grafd0daebc32012-06-12 00:44:01 +00002292 in_dev = __in_dev_get_rcu(dev_out);
2293 if (!in_dev)
David S. Miller5ada5522011-02-17 15:29:00 -08002294 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002295
Thomas Grafd0daebc32012-06-12 00:44:01 +00002296 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
David Ahern5f02ce242016-09-10 12:09:54 -07002297 if (ipv4_is_loopback(fl4->saddr) &&
2298 !(dev_out->flags & IFF_LOOPBACK) &&
2299 !netif_is_l3_master(dev_out))
Thomas Grafd0daebc32012-06-12 00:44:01 +00002300 return ERR_PTR(-EINVAL);
2301
David S. Miller68a5e3d2011-03-11 20:07:33 -05002302 if (ipv4_is_lbcast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08002303 type = RTN_BROADCAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002304 else if (ipv4_is_multicast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08002305 type = RTN_MULTICAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002306 else if (ipv4_is_zeronet(fl4->daddr))
David S. Miller5ada5522011-02-17 15:29:00 -08002307 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002308
2309 if (dev_out->flags & IFF_LOOPBACK)
2310 flags |= RTCF_LOCAL;
2311
Julian Anastasov63617422012-11-22 23:04:14 +02002312 do_cache = true;
David S. Miller982721f2011-02-16 21:44:24 -08002313 if (type == RTN_BROADCAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002314 flags |= RTCF_BROADCAST | RTCF_LOCAL;
David S. Miller982721f2011-02-16 21:44:24 -08002315 fi = NULL;
2316 } else if (type == RTN_MULTICAST) {
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00002317 flags |= RTCF_MULTICAST | RTCF_LOCAL;
David S. Miller813b3b52011-04-28 14:48:42 -07002318 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
2319 fl4->flowi4_proto))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002320 flags &= ~RTCF_LOCAL;
Julian Anastasov63617422012-11-22 23:04:14 +02002321 else
2322 do_cache = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002323 /* If multicast route do not exist use
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00002324 * default one, but do not gateway in this case.
2325 * Yes, it is hack.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002326 */
David S. Miller982721f2011-02-16 21:44:24 -08002327 if (fi && res->prefixlen < 4)
2328 fi = NULL;
Chris Friesend6d5e992016-04-08 15:21:30 -06002329 } else if ((type == RTN_LOCAL) && (orig_oif != 0) &&
2330 (orig_oif != dev_out->ifindex)) {
2331 /* For local routes that require a particular output interface
2332 * we do not want to cache the result. Caching the result
2333 * causes incorrect behaviour when there are multiple source
2334 * addresses on the interface, the end result being that if the
2335 * intended recipient is waiting on that interface for the
2336 * packet he won't receive it because it will be delivered on
2337 * the loopback interface and the IP_PKTINFO ipi_ifindex will
2338 * be set to the loopback interface as well.
2339 */
Julian Anastasov94720e32018-05-02 09:41:19 +03002340 do_cache = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002341 }
2342
David S. Millerf2bb4be2012-07-17 12:20:47 -07002343 fnhe = NULL;
Julian Anastasov63617422012-11-22 23:04:14 +02002344 do_cache &= fi != NULL;
Julian Anastasov94720e32018-05-02 09:41:19 +03002345 if (fi) {
David Aherneba618a2019-04-02 14:11:55 -07002346 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
David S. Millerc5038a82012-07-31 15:02:02 -07002347 struct rtable __rcu **prth;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00002348
David Aherna5995e72019-04-30 07:45:50 -07002349 fnhe = find_exception(nhc, fl4->daddr);
Julian Anastasov94720e32018-05-02 09:41:19 +03002350 if (!do_cache)
2351 goto add;
Xin Longdeed49d2016-02-18 21:21:19 +08002352 if (fnhe) {
Timo Teräs2ffae992013-06-27 10:27:05 +03002353 prth = &fnhe->fnhe_rth_output;
Julian Anastasov94720e32018-05-02 09:41:19 +03002354 } else {
2355 if (unlikely(fl4->flowi4_flags &
2356 FLOWI_FLAG_KNOWN_NH &&
David Ahernbdf00462019-04-05 16:30:26 -07002357 !(nhc->nhc_gw_family &&
David Aherneba618a2019-04-02 14:11:55 -07002358 nhc->nhc_scope == RT_SCOPE_LINK))) {
Julian Anastasov94720e32018-05-02 09:41:19 +03002359 do_cache = false;
2360 goto add;
Julian Anastasovc92b9652012-10-08 11:41:19 +00002361 }
David Ahern0f457a32019-04-30 07:45:48 -07002362 prth = raw_cpu_ptr(nhc->nhc_pcpu_rth_output);
Julian Anastasovc92b9652012-10-08 11:41:19 +00002363 }
David S. Millerc5038a82012-07-31 15:02:02 -07002364 rth = rcu_dereference(*prth);
Wei Wang9df16ef2017-06-17 10:42:31 -07002365 if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst))
David S. Millerc5038a82012-07-31 15:02:02 -07002366 return rth;
David S. Millerf2bb4be2012-07-17 12:20:47 -07002367 }
Julian Anastasovc92b9652012-10-08 11:41:19 +00002368
2369add:
David Ahernd08c4f32015-09-02 13:58:34 -07002370 rth = rt_dst_alloc(dev_out, flags, type,
David S. Miller5c1e6aa2011-04-28 14:13:38 -07002371 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerf2bb4be2012-07-17 12:20:47 -07002372 IN_DEV_CONF_GET(in_dev, NOXFRM),
Julian Anastasovc92b9652012-10-08 11:41:19 +00002373 do_cache);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00002374 if (!rth)
David S. Miller5ada5522011-02-17 15:29:00 -08002375 return ERR_PTR(-ENOBUFS);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00002376
David Ahern9438c872017-08-11 17:02:02 -07002377 rth->rt_iif = orig_oif;
David Ahernb7503e02015-09-02 13:58:35 -07002378
Linus Torvalds1da177e2005-04-16 15:20:36 -07002379 RT_CACHE_STAT_INC(out_slow_tot);
2380
Linus Torvalds1da177e2005-04-16 15:20:36 -07002381 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002382 if (flags & RTCF_LOCAL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07002383 !(dev_out->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002384 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002385 RT_CACHE_STAT_INC(out_slow_mc);
2386 }
2387#ifdef CONFIG_IP_MROUTE
David S. Miller982721f2011-02-16 21:44:24 -08002388 if (type == RTN_MULTICAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002389 if (IN_DEV_MFORWARD(in_dev) &&
David S. Miller813b3b52011-04-28 14:48:42 -07002390 !ipv4_is_local_multicast(fl4->daddr)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002391 rth->dst.input = ip_mr_input;
2392 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002393 }
2394 }
2395#endif
2396 }
2397
Wei Wanga4c2fd72017-06-17 10:42:42 -07002398 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache);
David Ahern99428952018-02-13 20:32:04 -08002399 lwtunnel_set_redirect(&rth->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002400
David S. Miller5ada5522011-02-17 15:29:00 -08002401 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002402}
2403
Linus Torvalds1da177e2005-04-16 15:20:36 -07002404/*
2405 * Major route resolver routine.
2406 */
2407
David Ahern3abd1ade2017-05-25 10:42:33 -07002408struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
2409 const struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002410{
Julian Anastasovf61759e2011-12-02 11:39:42 +00002411 __u8 tos = RT_FL_TOS(fl4);
Eric Dumazetd0ea2b12018-04-07 13:42:42 -07002412 struct fib_result res = {
2413 .type = RTN_UNSPEC,
2414 .fi = NULL,
2415 .table = NULL,
2416 .tclassid = 0,
2417 };
David S. Miller5ada5522011-02-17 15:29:00 -08002418 struct rtable *rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002419
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002420 fl4->flowi4_iif = LOOPBACK_IFINDEX;
David S. Miller813b3b52011-04-28 14:48:42 -07002421 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
2422 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
2423 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
David S. Miller44713b62011-03-04 21:24:47 -08002424
David S. Miller010c2702011-02-17 15:37:09 -08002425 rcu_read_lock();
David Ahern3abd1ade2017-05-25 10:42:33 -07002426 rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
2427 rcu_read_unlock();
2428
2429 return rth;
2430}
2431EXPORT_SYMBOL_GPL(ip_route_output_key_hash);
2432
2433struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
2434 struct fib_result *res,
2435 const struct sk_buff *skb)
2436{
2437 struct net_device *dev_out = NULL;
2438 int orig_oif = fl4->flowi4_oif;
2439 unsigned int flags = 0;
2440 struct rtable *rth;
2441 int err = -ENETUNREACH;
2442
David S. Miller813b3b52011-04-28 14:48:42 -07002443 if (fl4->saddr) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08002444 rth = ERR_PTR(-EINVAL);
David S. Miller813b3b52011-04-28 14:48:42 -07002445 if (ipv4_is_multicast(fl4->saddr) ||
2446 ipv4_is_lbcast(fl4->saddr) ||
2447 ipv4_is_zeronet(fl4->saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002448 goto out;
2449
Linus Torvalds1da177e2005-04-16 15:20:36 -07002450 /* I removed check for oif == dev_out->oif here.
2451 It was wrong for two reasons:
Denis V. Lunev1ab35272008-01-22 22:04:30 -08002452 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
2453 is assigned to multiple interfaces.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002454 2. Moreover, we are allowed to send packets with saddr
2455 of another iface. --ANK
2456 */
2457
David S. Miller813b3b52011-04-28 14:48:42 -07002458 if (fl4->flowi4_oif == 0 &&
2459 (ipv4_is_multicast(fl4->daddr) ||
2460 ipv4_is_lbcast(fl4->daddr))) {
Julian Anastasova210d012008-10-01 07:28:28 -07002461 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07002462 dev_out = __ip_dev_find(net, fl4->saddr, false);
Ian Morris51456b22015-04-03 09:17:26 +01002463 if (!dev_out)
Julian Anastasova210d012008-10-01 07:28:28 -07002464 goto out;
2465
Linus Torvalds1da177e2005-04-16 15:20:36 -07002466 /* Special hack: user can direct multicasts
2467 and limited broadcast via necessary interface
2468 without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
2469 This hack is not just for fun, it allows
2470 vic,vat and friends to work.
2471 They bind socket to loopback, set ttl to zero
2472 and expect that it will work.
2473 From the viewpoint of routing cache they are broken,
2474 because we are not allowed to build multicast path
2475 with loopback source addr (look, routing cache
2476 cannot know, that ttl is zero, so that packet
2477 will not leave this host and route is valid).
2478 Luckily, this hack is good workaround.
2479 */
2480
David S. Miller813b3b52011-04-28 14:48:42 -07002481 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002482 goto make_route;
2483 }
Julian Anastasova210d012008-10-01 07:28:28 -07002484
David S. Miller813b3b52011-04-28 14:48:42 -07002485 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
Julian Anastasova210d012008-10-01 07:28:28 -07002486 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07002487 if (!__ip_dev_find(net, fl4->saddr, false))
Julian Anastasova210d012008-10-01 07:28:28 -07002488 goto out;
Julian Anastasova210d012008-10-01 07:28:28 -07002489 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002490 }
2491
2492
David S. Miller813b3b52011-04-28 14:48:42 -07002493 if (fl4->flowi4_oif) {
2494 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002495 rth = ERR_PTR(-ENODEV);
Ian Morris51456b22015-04-03 09:17:26 +01002496 if (!dev_out)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002497 goto out;
Herbert Xue5ed6392005-10-03 14:35:55 -07002498
2499 /* RACE: Check return value of inet_select_addr instead. */
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002500 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08002501 rth = ERR_PTR(-ENETUNREACH);
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002502 goto out;
2503 }
David S. Miller813b3b52011-04-28 14:48:42 -07002504 if (ipv4_is_local_multicast(fl4->daddr) ||
Andrew Lunn6a211652015-05-01 16:39:54 +02002505 ipv4_is_lbcast(fl4->daddr) ||
2506 fl4->flowi4_proto == IPPROTO_IGMP) {
David S. Miller813b3b52011-04-28 14:48:42 -07002507 if (!fl4->saddr)
2508 fl4->saddr = inet_select_addr(dev_out, 0,
2509 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002510 goto make_route;
2511 }
Jiri Benc0a7e2262013-10-04 17:04:48 +02002512 if (!fl4->saddr) {
David S. Miller813b3b52011-04-28 14:48:42 -07002513 if (ipv4_is_multicast(fl4->daddr))
2514 fl4->saddr = inet_select_addr(dev_out, 0,
2515 fl4->flowi4_scope);
2516 else if (!fl4->daddr)
2517 fl4->saddr = inet_select_addr(dev_out, 0,
2518 RT_SCOPE_HOST);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002519 }
2520 }
2521
David S. Miller813b3b52011-04-28 14:48:42 -07002522 if (!fl4->daddr) {
2523 fl4->daddr = fl4->saddr;
2524 if (!fl4->daddr)
2525 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
Denis V. Lunevb40afd02008-01-22 22:06:19 -08002526 dev_out = net->loopback_dev;
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002527 fl4->flowi4_oif = LOOPBACK_IFINDEX;
David Ahern3abd1ade2017-05-25 10:42:33 -07002528 res->type = RTN_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002529 flags |= RTCF_LOCAL;
2530 goto make_route;
2531 }
2532
David Ahern3abd1ade2017-05-25 10:42:33 -07002533 err = fib_lookup(net, fl4, res, 0);
Nikola Forró0315e382015-09-17 16:01:32 +02002534 if (err) {
David Ahern3abd1ade2017-05-25 10:42:33 -07002535 res->fi = NULL;
2536 res->table = NULL;
David Ahern6104e112016-10-12 13:20:11 -07002537 if (fl4->flowi4_oif &&
David Aherne58e4152016-10-31 15:54:00 -07002538 (ipv4_is_multicast(fl4->daddr) ||
2539 !netif_index_is_l3_master(net, fl4->flowi4_oif))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002540 /* Apparently, routing tables are wrong. Assume,
2541 that the destination is on link.
2542
2543 WHY? DW.
2544 Because we are allowed to send to iface
2545 even if it has NO routes and NO assigned
2546 addresses. When oif is specified, routing
2547 tables are looked up with only one purpose:
2548 to catch if destination is gatewayed, rather than
2549 direct. Moreover, if MSG_DONTROUTE is set,
2550 we send packet, ignoring both routing tables
2551 and ifaddr state. --ANK
2552
2553
2554 We could make it even if oif is unknown,
2555 likely IPv6, but we do not.
2556 */
2557
David S. Miller813b3b52011-04-28 14:48:42 -07002558 if (fl4->saddr == 0)
2559 fl4->saddr = inet_select_addr(dev_out, 0,
2560 RT_SCOPE_LINK);
David Ahern3abd1ade2017-05-25 10:42:33 -07002561 res->type = RTN_UNICAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002562 goto make_route;
2563 }
Nikola Forró0315e382015-09-17 16:01:32 +02002564 rth = ERR_PTR(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002565 goto out;
2566 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002567
David Ahern3abd1ade2017-05-25 10:42:33 -07002568 if (res->type == RTN_LOCAL) {
David S. Miller813b3b52011-04-28 14:48:42 -07002569 if (!fl4->saddr) {
David Ahern3abd1ade2017-05-25 10:42:33 -07002570 if (res->fi->fib_prefsrc)
2571 fl4->saddr = res->fi->fib_prefsrc;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002572 else
David S. Miller813b3b52011-04-28 14:48:42 -07002573 fl4->saddr = fl4->daddr;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002574 }
David Ahern5f02ce242016-09-10 12:09:54 -07002575
2576 /* L3 master device is the loopback for that domain */
David Ahern3abd1ade2017-05-25 10:42:33 -07002577 dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? :
Robert Shearmanb7c84872017-04-21 21:34:59 +01002578 net->loopback_dev;
David Ahern839da4d2017-08-10 13:49:10 -07002579
2580 /* make sure orig_oif points to fib result device even
2581 * though packet rx/tx happens over loopback or l3mdev
2582 */
2583 orig_oif = FIB_RES_OIF(*res);
2584
David S. Miller813b3b52011-04-28 14:48:42 -07002585 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002586 flags |= RTCF_LOCAL;
2587 goto make_route;
2588 }
2589
David Ahern3abd1ade2017-05-25 10:42:33 -07002590 fib_select_path(net, res, fl4, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002591
David Ahern3abd1ade2017-05-25 10:42:33 -07002592 dev_out = FIB_RES_DEV(*res);
David S. Miller813b3b52011-04-28 14:48:42 -07002593 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002594
2595
2596make_route:
David Ahern3abd1ade2017-05-25 10:42:33 -07002597 rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002598
David S. Miller010c2702011-02-17 15:37:09 -08002599out:
David S. Millerb23dd4f2011-03-02 14:31:35 -08002600 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002601}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002602
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002603static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2604{
2605 return NULL;
2606}
2607
Steffen Klassertebb762f2011-11-23 02:12:51 +00002608static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
Roland Dreierec831ea2011-01-31 13:16:00 -08002609{
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002610 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2611
2612 return mtu ? : dst->dev->mtu;
Roland Dreierec831ea2011-01-31 13:16:00 -08002613}
2614
David S. Miller6700c272012-07-17 03:29:28 -07002615static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2616 struct sk_buff *skb, u32 mtu)
David S. Miller14e50e52007-05-24 18:17:54 -07002617{
2618}
2619
David S. Miller6700c272012-07-17 03:29:28 -07002620static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2621 struct sk_buff *skb)
David S. Millerb587ee32012-07-12 00:39:24 -07002622{
2623}
2624
Held Bernhard0972ddb2011-04-24 22:07:32 +00002625static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2626 unsigned long old)
2627{
2628 return NULL;
2629}
2630
David S. Miller14e50e52007-05-24 18:17:54 -07002631static struct dst_ops ipv4_dst_blackhole_ops = {
2632 .family = AF_INET,
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002633 .check = ipv4_blackhole_dst_check,
Steffen Klassertebb762f2011-11-23 02:12:51 +00002634 .mtu = ipv4_blackhole_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -08002635 .default_advmss = ipv4_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -07002636 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
David S. Millerb587ee32012-07-12 00:39:24 -07002637 .redirect = ipv4_rt_blackhole_redirect,
Held Bernhard0972ddb2011-04-24 22:07:32 +00002638 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -07002639 .neigh_lookup = ipv4_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -07002640};
2641
David S. Miller2774c132011-03-01 14:59:04 -08002642struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -07002643{
David S. Miller2774c132011-03-01 14:59:04 -08002644 struct rtable *ort = (struct rtable *) dst_orig;
David S. Millerf5b0a872012-07-19 12:31:33 -07002645 struct rtable *rt;
David S. Miller14e50e52007-05-24 18:17:54 -07002646
Steffen Klassert6c0e7282017-10-09 08:43:55 +02002647 rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0);
David S. Miller14e50e52007-05-24 18:17:54 -07002648 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002649 struct dst_entry *new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -07002650
David S. Miller14e50e52007-05-24 18:17:54 -07002651 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -08002652 new->input = dst_discard;
Eric W. Biedermanede20592015-10-07 16:48:47 -05002653 new->output = dst_discard_out;
David S. Miller14e50e52007-05-24 18:17:54 -07002654
Wei Wang1dbe32522017-06-17 10:42:26 -07002655 new->dev = net->loopback_dev;
David S. Miller14e50e52007-05-24 18:17:54 -07002656 if (new->dev)
2657 dev_hold(new->dev);
2658
David S. Miller9917e1e82012-07-17 14:44:26 -07002659 rt->rt_is_input = ort->rt_is_input;
David S. Miller5e2b61f2011-03-04 21:47:09 -08002660 rt->rt_iif = ort->rt_iif;
David S. Miller59436342012-07-10 06:58:42 -07002661 rt->rt_pmtu = ort->rt_pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01002662 rt->rt_mtu_locked = ort->rt_mtu_locked;
David S. Miller14e50e52007-05-24 18:17:54 -07002663
fan.duca4c3fc2013-07-30 08:33:53 +08002664 rt->rt_genid = rt_genid_ipv4(net);
David S. Miller14e50e52007-05-24 18:17:54 -07002665 rt->rt_flags = ort->rt_flags;
2666 rt->rt_type = ort->rt_type;
David Ahern1550c172019-04-05 16:30:27 -07002667 rt->rt_gw_family = ort->rt_gw_family;
2668 if (rt->rt_gw_family == AF_INET)
2669 rt->rt_gw4 = ort->rt_gw4;
David Ahern0f5f7d72019-04-05 16:30:29 -07002670 else if (rt->rt_gw_family == AF_INET6)
2671 rt->rt_gw6 = ort->rt_gw6;
David S. Miller14e50e52007-05-24 18:17:54 -07002672
David S. Millercaacf052012-07-31 15:06:50 -07002673 INIT_LIST_HEAD(&rt->rt_uncached);
David S. Miller14e50e52007-05-24 18:17:54 -07002674 }
2675
David S. Miller2774c132011-03-01 14:59:04 -08002676 dst_release(dst_orig);
2677
2678 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -07002679}
2680
David S. Miller9d6ec932011-03-12 01:12:47 -05002681struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
Eric Dumazet6f9c9612015-09-25 07:39:10 -07002682 const struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002683{
David S. Miller9d6ec932011-03-12 01:12:47 -05002684 struct rtable *rt = __ip_route_output_key(net, flp4);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002685
David S. Millerb23dd4f2011-03-02 14:31:35 -08002686 if (IS_ERR(rt))
2687 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002688
David S. Miller56157872011-05-02 14:37:45 -07002689 if (flp4->flowi4_proto)
Steffen Klassertf92ee612014-09-16 10:08:40 +02002690 rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
2691 flowi4_to_flowi(flp4),
2692 sk, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002693
David S. Millerb23dd4f2011-03-02 14:31:35 -08002694 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002695}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002696EXPORT_SYMBOL_GPL(ip_route_output_flow);
2697
David Ahern3765d352017-05-25 10:42:36 -07002698/* called with rcu_read_lock held */
Roopa Prabhu404eb772018-05-22 14:03:27 -07002699static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
2700 struct rtable *rt, u32 table_id, struct flowi4 *fl4,
2701 struct sk_buff *skb, u32 portid, u32 seq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002702{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002703 struct rtmsg *r;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002704 struct nlmsghdr *nlh;
Steffen Klassert2bc8ca42011-10-11 01:12:02 +00002705 unsigned long expires = 0;
David S. Millerf1850712012-07-10 07:26:01 -07002706 u32 error;
Julian Anastasov521f5492012-07-20 12:02:08 +03002707 u32 metrics[RTAX_MAX];
Thomas Grafbe403ea2006-08-17 18:15:17 -07002708
David Ahernd3166e02017-05-25 10:42:35 -07002709 nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), 0);
Ian Morris51456b22015-04-03 09:17:26 +01002710 if (!nlh)
Patrick McHardy26932562007-01-31 23:16:40 -08002711 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002712
2713 r = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002714 r->rtm_family = AF_INET;
2715 r->rtm_dst_len = 32;
2716 r->rtm_src_len = 0;
David Millerd6c0a4f2012-07-01 02:02:59 +00002717 r->rtm_tos = fl4->flowi4_tos;
David Ahern8a430ed2017-01-11 15:42:17 -08002718 r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT;
David Ahernc36ba662015-09-02 13:58:36 -07002719 if (nla_put_u32(skb, RTA_TABLE, table_id))
David S. Millerf3756b72012-04-01 20:39:02 -04002720 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002721 r->rtm_type = rt->rt_type;
2722 r->rtm_scope = RT_SCOPE_UNIVERSE;
2723 r->rtm_protocol = RTPROT_UNSPEC;
2724 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2725 if (rt->rt_flags & RTCF_NOTIFY)
2726 r->rtm_flags |= RTM_F_NOTIFY;
Hannes Frederic Sowadf4d9252015-01-23 12:01:26 +01002727 if (IPCB(skb)->flags & IPSKB_DOREDIRECT)
2728 r->rtm_flags |= RTCF_DOREDIRECT;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002729
Jiri Benc930345e2015-03-29 16:59:25 +02002730 if (nla_put_in_addr(skb, RTA_DST, dst))
David S. Millerf3756b72012-04-01 20:39:02 -04002731 goto nla_put_failure;
David Miller1a00fee2012-07-01 02:02:56 +00002732 if (src) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002733 r->rtm_src_len = 32;
Jiri Benc930345e2015-03-29 16:59:25 +02002734 if (nla_put_in_addr(skb, RTA_SRC, src))
David S. Millerf3756b72012-04-01 20:39:02 -04002735 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002736 }
David S. Millerf3756b72012-04-01 20:39:02 -04002737 if (rt->dst.dev &&
2738 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2739 goto nla_put_failure;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002740#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf3756b72012-04-01 20:39:02 -04002741 if (rt->dst.tclassid &&
2742 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2743 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002744#endif
David S. Miller41347dc2012-06-28 04:05:27 -07002745 if (!rt_is_input_route(rt) &&
David Millerd6c0a4f2012-07-01 02:02:59 +00002746 fl4->saddr != src) {
Jiri Benc930345e2015-03-29 16:59:25 +02002747 if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
David S. Millerf3756b72012-04-01 20:39:02 -04002748 goto nla_put_failure;
2749 }
David Ahern1550c172019-04-05 16:30:27 -07002750 if (rt->rt_gw_family == AF_INET &&
David Ahern0f5f7d72019-04-05 16:30:29 -07002751 nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) {
David S. Millerf3756b72012-04-01 20:39:02 -04002752 goto nla_put_failure;
David Ahern0f5f7d72019-04-05 16:30:29 -07002753 } else if (rt->rt_gw_family == AF_INET6) {
2754 int alen = sizeof(struct in6_addr);
2755 struct nlattr *nla;
2756 struct rtvia *via;
2757
2758 nla = nla_reserve(skb, RTA_VIA, alen + 2);
2759 if (!nla)
2760 goto nla_put_failure;
2761
2762 via = nla_data(nla);
2763 via->rtvia_family = AF_INET6;
2764 memcpy(via->rtvia_addr, &rt->rt_gw6, alen);
2765 }
Thomas Grafbe403ea2006-08-17 18:15:17 -07002766
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002767 expires = rt->dst.expires;
2768 if (expires) {
2769 unsigned long now = jiffies;
2770
2771 if (time_before(now, expires))
2772 expires -= now;
2773 else
2774 expires = 0;
2775 }
2776
Julian Anastasov521f5492012-07-20 12:02:08 +03002777 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002778 if (rt->rt_pmtu && expires)
Julian Anastasov521f5492012-07-20 12:02:08 +03002779 metrics[RTAX_MTU - 1] = rt->rt_pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01002780 if (rt->rt_mtu_locked && expires)
2781 metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU);
Julian Anastasov521f5492012-07-20 12:02:08 +03002782 if (rtnetlink_put_metrics(skb, metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -07002783 goto nla_put_failure;
2784
David Millerb4869882012-07-01 02:03:01 +00002785 if (fl4->flowi4_mark &&
stephen hemminger68aaed52012-10-10 08:27:25 +00002786 nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
David S. Millerf3756b72012-04-01 20:39:02 -04002787 goto nla_put_failure;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002788
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002789 if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
2790 nla_put_u32(skb, RTA_UID,
2791 from_kuid_munged(current_user_ns(), fl4->flowi4_uid)))
2792 goto nla_put_failure;
2793
Changli Gaod8d1f302010-06-10 23:31:35 -07002794 error = rt->dst.error;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002795
David S. Millerc7537962010-11-11 17:07:48 -08002796 if (rt_is_input_route(rt)) {
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002797#ifdef CONFIG_IP_MROUTE
2798 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2799 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2800 int err = ipmr_get_route(net, skb,
2801 fl4->saddr, fl4->daddr,
David Ahern9f09eae2017-01-06 17:39:06 -08002802 r, portid);
Nikolay Aleksandrov2cf75072016-09-25 23:08:31 +02002803
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002804 if (err <= 0) {
David Ahern0c8d8032017-01-05 19:32:46 -08002805 if (err == 0)
2806 return 0;
2807 goto nla_put_failure;
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002808 }
2809 } else
2810#endif
Roopa Prabhu404eb772018-05-22 14:03:27 -07002811 if (nla_put_u32(skb, RTA_IIF, fl4->flowi4_iif))
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002812 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002813 }
2814
David S. Millerf1850712012-07-10 07:26:01 -07002815 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002816 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002817
Johannes Berg053c0952015-01-16 22:09:00 +01002818 nlmsg_end(skb, nlh);
2819 return 0;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002820
2821nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002822 nlmsg_cancel(skb, nlh);
2823 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002824}
2825
Roopa Prabhu404eb772018-05-22 14:03:27 -07002826static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst,
2827 u8 ip_proto, __be16 sport,
2828 __be16 dport)
2829{
2830 struct sk_buff *skb;
2831 struct iphdr *iph;
2832
2833 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2834 if (!skb)
2835 return NULL;
2836
2837 /* Reserve room for dummy headers, this skb can pass
2838 * through good chunk of routing engine.
2839 */
2840 skb_reset_mac_header(skb);
2841 skb_reset_network_header(skb);
2842 skb->protocol = htons(ETH_P_IP);
2843 iph = skb_put(skb, sizeof(struct iphdr));
2844 iph->protocol = ip_proto;
2845 iph->saddr = src;
2846 iph->daddr = dst;
2847 iph->version = 0x4;
2848 iph->frag_off = 0;
2849 iph->ihl = 0x5;
2850 skb_set_transport_header(skb, skb->len);
2851
2852 switch (iph->protocol) {
2853 case IPPROTO_UDP: {
2854 struct udphdr *udph;
2855
2856 udph = skb_put_zero(skb, sizeof(struct udphdr));
2857 udph->source = sport;
2858 udph->dest = dport;
2859 udph->len = sizeof(struct udphdr);
2860 udph->check = 0;
2861 break;
2862 }
2863 case IPPROTO_TCP: {
2864 struct tcphdr *tcph;
2865
2866 tcph = skb_put_zero(skb, sizeof(struct tcphdr));
2867 tcph->source = sport;
2868 tcph->dest = dport;
2869 tcph->doff = sizeof(struct tcphdr) / 4;
2870 tcph->rst = 1;
2871 tcph->check = ~tcp_v4_check(sizeof(struct tcphdr),
2872 src, dst, 0);
2873 break;
2874 }
2875 case IPPROTO_ICMP: {
2876 struct icmphdr *icmph;
2877
2878 icmph = skb_put_zero(skb, sizeof(struct icmphdr));
2879 icmph->type = ICMP_ECHO;
2880 icmph->code = 0;
2881 }
2882 }
2883
2884 return skb;
2885}
2886
Jakub Kicinskia00302b62019-01-18 10:46:19 -08002887static int inet_rtm_valid_getroute_req(struct sk_buff *skb,
2888 const struct nlmsghdr *nlh,
2889 struct nlattr **tb,
2890 struct netlink_ext_ack *extack)
2891{
2892 struct rtmsg *rtm;
2893 int i, err;
2894
2895 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
2896 NL_SET_ERR_MSG(extack,
2897 "ipv4: Invalid header for route get request");
2898 return -EINVAL;
2899 }
2900
2901 if (!netlink_strict_get_check(skb))
Johannes Berg8cb08172019-04-26 14:07:28 +02002902 return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
2903 rtm_ipv4_policy, extack);
Jakub Kicinskia00302b62019-01-18 10:46:19 -08002904
2905 rtm = nlmsg_data(nlh);
2906 if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) ||
2907 (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) ||
2908 rtm->rtm_table || rtm->rtm_protocol ||
2909 rtm->rtm_scope || rtm->rtm_type) {
2910 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for route get request");
2911 return -EINVAL;
2912 }
2913
2914 if (rtm->rtm_flags & ~(RTM_F_NOTIFY |
2915 RTM_F_LOOKUP_TABLE |
2916 RTM_F_FIB_MATCH)) {
2917 NL_SET_ERR_MSG(extack, "ipv4: Unsupported rtm_flags for route get request");
2918 return -EINVAL;
2919 }
2920
Johannes Berg8cb08172019-04-26 14:07:28 +02002921 err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
2922 rtm_ipv4_policy, extack);
Jakub Kicinskia00302b62019-01-18 10:46:19 -08002923 if (err)
2924 return err;
2925
2926 if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
2927 (tb[RTA_DST] && !rtm->rtm_dst_len)) {
2928 NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4");
2929 return -EINVAL;
2930 }
2931
2932 for (i = 0; i <= RTA_MAX; i++) {
2933 if (!tb[i])
2934 continue;
2935
2936 switch (i) {
2937 case RTA_IIF:
2938 case RTA_OIF:
2939 case RTA_SRC:
2940 case RTA_DST:
2941 case RTA_IP_PROTO:
2942 case RTA_SPORT:
2943 case RTA_DPORT:
2944 case RTA_MARK:
2945 case RTA_UID:
2946 break;
2947 default:
2948 NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in route get request");
2949 return -EINVAL;
2950 }
2951 }
2952
2953 return 0;
2954}
2955
David Ahernc21ef3e2017-04-16 09:48:24 -07002956static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2957 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002958{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002959 struct net *net = sock_net(in_skb->sk);
Thomas Grafd889ce32006-08-17 18:15:44 -07002960 struct nlattr *tb[RTA_MAX+1];
Roopa Prabhu404eb772018-05-22 14:03:27 -07002961 u32 table_id = RT_TABLE_MAIN;
2962 __be16 sport = 0, dport = 0;
David Ahern3765d352017-05-25 10:42:36 -07002963 struct fib_result res = {};
Roopa Prabhu404eb772018-05-22 14:03:27 -07002964 u8 ip_proto = IPPROTO_UDP;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002965 struct rtable *rt = NULL;
Roopa Prabhu404eb772018-05-22 14:03:27 -07002966 struct sk_buff *skb;
2967 struct rtmsg *rtm;
Maciej Żenczykowskie8e3fbe2018-09-29 23:44:47 -07002968 struct flowi4 fl4 = {};
Al Viro9e12bb22006-09-26 21:25:20 -07002969 __be32 dst = 0;
2970 __be32 src = 0;
Roopa Prabhu404eb772018-05-22 14:03:27 -07002971 kuid_t uid;
Al Viro9e12bb22006-09-26 21:25:20 -07002972 u32 iif;
Thomas Grafd889ce32006-08-17 18:15:44 -07002973 int err;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002974 int mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002975
Jakub Kicinskia00302b62019-01-18 10:46:19 -08002976 err = inet_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
Thomas Grafd889ce32006-08-17 18:15:44 -07002977 if (err < 0)
Roopa Prabhu404eb772018-05-22 14:03:27 -07002978 return err;
Thomas Grafd889ce32006-08-17 18:15:44 -07002979
2980 rtm = nlmsg_data(nlh);
Jiri Benc67b61f62015-03-29 16:59:26 +02002981 src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
2982 dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
Thomas Grafd889ce32006-08-17 18:15:44 -07002983 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002984 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002985 if (tb[RTA_UID])
2986 uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID]));
2987 else
2988 uid = (iif ? INVALID_UID : current_uid());
Linus Torvalds1da177e2005-04-16 15:20:36 -07002989
Roopa Prabhu404eb772018-05-22 14:03:27 -07002990 if (tb[RTA_IP_PROTO]) {
2991 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
Hangbin Liu5e1a99e2019-02-27 16:15:29 +08002992 &ip_proto, AF_INET, extack);
Roopa Prabhu404eb772018-05-22 14:03:27 -07002993 if (err)
2994 return err;
2995 }
Florian Laryschbbadb9a2017-04-07 14:42:20 +02002996
Roopa Prabhu404eb772018-05-22 14:03:27 -07002997 if (tb[RTA_SPORT])
2998 sport = nla_get_be16(tb[RTA_SPORT]);
2999
3000 if (tb[RTA_DPORT])
3001 dport = nla_get_be16(tb[RTA_DPORT]);
3002
3003 skb = inet_rtm_getroute_build_skb(src, dst, ip_proto, sport, dport);
3004 if (!skb)
3005 return -ENOBUFS;
Florian Laryschbbadb9a2017-04-07 14:42:20 +02003006
David Millerd6c0a4f2012-07-01 02:02:59 +00003007 fl4.daddr = dst;
3008 fl4.saddr = src;
3009 fl4.flowi4_tos = rtm->rtm_tos;
3010 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
3011 fl4.flowi4_mark = mark;
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09003012 fl4.flowi4_uid = uid;
Roopa Prabhu404eb772018-05-22 14:03:27 -07003013 if (sport)
3014 fl4.fl4_sport = sport;
3015 if (dport)
3016 fl4.fl4_dport = dport;
3017 fl4.flowi4_proto = ip_proto;
David Millerd6c0a4f2012-07-01 02:02:59 +00003018
David Ahern3765d352017-05-25 10:42:36 -07003019 rcu_read_lock();
3020
Linus Torvalds1da177e2005-04-16 15:20:36 -07003021 if (iif) {
Thomas Grafd889ce32006-08-17 18:15:44 -07003022 struct net_device *dev;
3023
David Ahern3765d352017-05-25 10:42:36 -07003024 dev = dev_get_by_index_rcu(net, iif);
Ian Morris51456b22015-04-03 09:17:26 +01003025 if (!dev) {
Thomas Grafd889ce32006-08-17 18:15:44 -07003026 err = -ENODEV;
Roopa Prabhu404eb772018-05-22 14:03:27 -07003027 goto errout_rcu;
Thomas Grafd889ce32006-08-17 18:15:44 -07003028 }
3029
Roopa Prabhu404eb772018-05-22 14:03:27 -07003030 fl4.flowi4_iif = iif; /* for rt_fill_info */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003031 skb->dev = dev;
Eric Dumazet963bfee2010-07-20 22:03:14 +00003032 skb->mark = mark;
David Ahern3765d352017-05-25 10:42:36 -07003033 err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos,
3034 dev, &res);
Thomas Grafd889ce32006-08-17 18:15:44 -07003035
Eric Dumazet511c3f92009-06-02 05:14:27 +00003036 rt = skb_rtable(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07003037 if (err == 0 && rt->dst.error)
3038 err = -rt->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003039 } else {
Lorenzo Colitti6503a302018-01-11 18:36:26 +09003040 fl4.flowi4_iif = LOOPBACK_IFINDEX;
Ido Schimmel21f94772018-12-20 17:03:27 +00003041 skb->dev = net->loopback_dev;
David Ahern3765d352017-05-25 10:42:36 -07003042 rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb);
David S. Millerb23dd4f2011-03-02 14:31:35 -08003043 err = 0;
3044 if (IS_ERR(rt))
3045 err = PTR_ERR(rt);
Florian Westphal2c87d632017-08-14 00:52:58 +02003046 else
3047 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003048 }
Thomas Grafd889ce32006-08-17 18:15:44 -07003049
Linus Torvalds1da177e2005-04-16 15:20:36 -07003050 if (err)
Roopa Prabhu404eb772018-05-22 14:03:27 -07003051 goto errout_rcu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003052
Linus Torvalds1da177e2005-04-16 15:20:36 -07003053 if (rtm->rtm_flags & RTM_F_NOTIFY)
3054 rt->rt_flags |= RTCF_NOTIFY;
3055
David Ahernc36ba662015-09-02 13:58:36 -07003056 if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
David Ahern68e813a2018-02-14 14:24:28 -08003057 table_id = res.table ? res.table->tb_id : 0;
David Ahernc36ba662015-09-02 13:58:36 -07003058
Roopa Prabhu404eb772018-05-22 14:03:27 -07003059 /* reset skb for netlink reply msg */
3060 skb_trim(skb, 0);
3061 skb_reset_network_header(skb);
3062 skb_reset_transport_header(skb);
3063 skb_reset_mac_header(skb);
3064
Roopa Prabhubc3aae22017-08-16 12:38:52 -07003065 if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
3066 if (!res.fi) {
3067 err = fib_props[res.type].error;
3068 if (!err)
3069 err = -EHOSTUNREACH;
Roopa Prabhu404eb772018-05-22 14:03:27 -07003070 goto errout_rcu;
Roopa Prabhubc3aae22017-08-16 12:38:52 -07003071 }
Roopa Prabhub6179812017-05-25 10:42:39 -07003072 err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
3073 nlh->nlmsg_seq, RTM_NEWROUTE, table_id,
3074 rt->rt_type, res.prefix, res.prefixlen,
3075 fl4.flowi4_tos, res.fi, 0);
Roopa Prabhubc3aae22017-08-16 12:38:52 -07003076 } else {
Roopa Prabhu404eb772018-05-22 14:03:27 -07003077 err = rt_fill_info(net, dst, src, rt, table_id, &fl4, skb,
Roopa Prabhuba52d612017-05-31 22:53:25 -07003078 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq);
Roopa Prabhubc3aae22017-08-16 12:38:52 -07003079 }
David S. Miller7b46a642015-01-18 23:36:08 -05003080 if (err < 0)
Roopa Prabhu404eb772018-05-22 14:03:27 -07003081 goto errout_rcu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003082
David Ahern3765d352017-05-25 10:42:36 -07003083 rcu_read_unlock();
3084
Eric W. Biederman15e47302012-09-07 20:12:54 +00003085 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003086
Thomas Grafd889ce32006-08-17 18:15:44 -07003087errout_free:
Roopa Prabhu404eb772018-05-22 14:03:27 -07003088 return err;
3089errout_rcu:
David Ahern3765d352017-05-25 10:42:36 -07003090 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003091 kfree_skb(skb);
Roopa Prabhu404eb772018-05-22 14:03:27 -07003092 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003093}
3094
Linus Torvalds1da177e2005-04-16 15:20:36 -07003095void ip_rt_multicast_event(struct in_device *in_dev)
3096{
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00003097 rt_cache_flush(dev_net(in_dev->dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07003098}
3099
3100#ifdef CONFIG_SYSCTL
Gao feng082c7ca2013-02-19 00:43:12 +00003101static int ip_rt_gc_interval __read_mostly = 60 * HZ;
3102static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
3103static int ip_rt_gc_elasticity __read_mostly = 8;
Arnd Bergmann773daa32018-02-28 14:32:48 +01003104static int ip_min_valid_pmtu __read_mostly = IPV4_MIN_MTU;
Gao feng082c7ca2013-02-19 00:43:12 +00003105
Joe Perchesfe2c6332013-06-11 23:04:25 -07003106static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07003107 void __user *buffer,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003108 size_t *lenp, loff_t *ppos)
3109{
Timo Teräs5aad1de2013-05-27 20:46:33 +00003110 struct net *net = (struct net *)__ctl->extra1;
3111
Linus Torvalds1da177e2005-04-16 15:20:36 -07003112 if (write) {
Timo Teräs5aad1de2013-05-27 20:46:33 +00003113 rt_cache_flush(net);
3114 fnhe_genid_bump(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003115 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09003116 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003117
3118 return -EINVAL;
3119}
3120
Joe Perchesfe2c6332013-06-11 23:04:25 -07003121static struct ctl_table ipv4_route_table[] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09003122 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003123 .procname = "gc_thresh",
3124 .data = &ipv4_dst_ops.gc_thresh,
3125 .maxlen = sizeof(int),
3126 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003127 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003128 },
3129 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003130 .procname = "max_size",
3131 .data = &ip_rt_max_size,
3132 .maxlen = sizeof(int),
3133 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003134 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003135 },
3136 {
3137 /* Deprecated. Use gc_min_interval_ms */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09003138
Linus Torvalds1da177e2005-04-16 15:20:36 -07003139 .procname = "gc_min_interval",
3140 .data = &ip_rt_gc_min_interval,
3141 .maxlen = sizeof(int),
3142 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003143 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003144 },
3145 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003146 .procname = "gc_min_interval_ms",
3147 .data = &ip_rt_gc_min_interval,
3148 .maxlen = sizeof(int),
3149 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003150 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003151 },
3152 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003153 .procname = "gc_timeout",
3154 .data = &ip_rt_gc_timeout,
3155 .maxlen = sizeof(int),
3156 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003157 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003158 },
3159 {
Eric Dumazet9f28a2f2011-12-21 15:47:16 -05003160 .procname = "gc_interval",
3161 .data = &ip_rt_gc_interval,
3162 .maxlen = sizeof(int),
3163 .mode = 0644,
3164 .proc_handler = proc_dointvec_jiffies,
3165 },
3166 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003167 .procname = "redirect_load",
3168 .data = &ip_rt_redirect_load,
3169 .maxlen = sizeof(int),
3170 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003171 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003172 },
3173 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003174 .procname = "redirect_number",
3175 .data = &ip_rt_redirect_number,
3176 .maxlen = sizeof(int),
3177 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003178 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003179 },
3180 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003181 .procname = "redirect_silence",
3182 .data = &ip_rt_redirect_silence,
3183 .maxlen = sizeof(int),
3184 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003185 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003186 },
3187 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003188 .procname = "error_cost",
3189 .data = &ip_rt_error_cost,
3190 .maxlen = sizeof(int),
3191 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003192 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003193 },
3194 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003195 .procname = "error_burst",
3196 .data = &ip_rt_error_burst,
3197 .maxlen = sizeof(int),
3198 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003199 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003200 },
3201 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003202 .procname = "gc_elasticity",
3203 .data = &ip_rt_gc_elasticity,
3204 .maxlen = sizeof(int),
3205 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003206 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003207 },
3208 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003209 .procname = "mtu_expires",
3210 .data = &ip_rt_mtu_expires,
3211 .maxlen = sizeof(int),
3212 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003213 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003214 },
3215 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003216 .procname = "min_pmtu",
3217 .data = &ip_rt_min_pmtu,
3218 .maxlen = sizeof(int),
3219 .mode = 0644,
Sabrina Dubrocac7272c22018-02-26 16:13:43 +01003220 .proc_handler = proc_dointvec_minmax,
3221 .extra1 = &ip_min_valid_pmtu,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003222 },
3223 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003224 .procname = "min_adv_mss",
3225 .data = &ip_rt_min_advmss,
3226 .maxlen = sizeof(int),
3227 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003228 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003229 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08003230 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003231};
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003232
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003233static struct ctl_table ipv4_route_flush_table[] = {
3234 {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003235 .procname = "flush",
3236 .maxlen = sizeof(int),
3237 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003238 .proc_handler = ipv4_sysctl_rtcache_flush,
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003239 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08003240 { },
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003241};
3242
3243static __net_init int sysctl_route_net_init(struct net *net)
3244{
3245 struct ctl_table *tbl;
3246
3247 tbl = ipv4_route_flush_table;
Octavian Purdila09ad9bc2009-11-25 15:14:13 -08003248 if (!net_eq(net, &init_net)) {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003249 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
Ian Morris51456b22015-04-03 09:17:26 +01003250 if (!tbl)
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003251 goto err_dup;
Eric W. Biederman464dc802012-11-16 03:02:59 +00003252
3253 /* Don't export sysctls to unprivileged users */
3254 if (net->user_ns != &init_user_ns)
3255 tbl[0].procname = NULL;
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003256 }
3257 tbl[0].extra1 = net;
3258
Eric W. Biedermanec8f23c2012-04-19 13:44:49 +00003259 net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
Ian Morris51456b22015-04-03 09:17:26 +01003260 if (!net->ipv4.route_hdr)
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003261 goto err_reg;
3262 return 0;
3263
3264err_reg:
3265 if (tbl != ipv4_route_flush_table)
3266 kfree(tbl);
3267err_dup:
3268 return -ENOMEM;
3269}
3270
3271static __net_exit void sysctl_route_net_exit(struct net *net)
3272{
3273 struct ctl_table *tbl;
3274
3275 tbl = net->ipv4.route_hdr->ctl_table_arg;
3276 unregister_net_sysctl_table(net->ipv4.route_hdr);
3277 BUG_ON(tbl == ipv4_route_flush_table);
3278 kfree(tbl);
3279}
3280
3281static __net_initdata struct pernet_operations sysctl_route_ops = {
3282 .init = sysctl_route_net_init,
3283 .exit = sysctl_route_net_exit,
3284};
Linus Torvalds1da177e2005-04-16 15:20:36 -07003285#endif
3286
Neil Horman3ee94372010-05-08 01:57:52 -07003287static __net_init int rt_genid_init(struct net *net)
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003288{
fan.duca4c3fc2013-07-30 08:33:53 +08003289 atomic_set(&net->ipv4.rt_genid, 0);
Timo Teräs5aad1de2013-05-27 20:46:33 +00003290 atomic_set(&net->fnhe_genid, 0);
Jason A. Donenfeld7aed9f72017-06-07 23:01:20 -04003291 atomic_set(&net->ipv4.dev_addr_genid, get_random_int());
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003292 return 0;
3293}
3294
Neil Horman3ee94372010-05-08 01:57:52 -07003295static __net_initdata struct pernet_operations rt_genid_ops = {
3296 .init = rt_genid_init,
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003297};
3298
David S. Millerc3426b42012-06-09 16:27:05 -07003299static int __net_init ipv4_inetpeer_init(struct net *net)
3300{
3301 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3302
3303 if (!bp)
3304 return -ENOMEM;
3305 inet_peer_base_init(bp);
3306 net->ipv4.peers = bp;
3307 return 0;
3308}
3309
3310static void __net_exit ipv4_inetpeer_exit(struct net *net)
3311{
3312 struct inet_peer_base *bp = net->ipv4.peers;
3313
3314 net->ipv4.peers = NULL;
David S. Miller56a6b242012-06-09 16:32:41 -07003315 inetpeer_invalidate_tree(bp);
David S. Millerc3426b42012-06-09 16:27:05 -07003316 kfree(bp);
3317}
3318
3319static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
3320 .init = ipv4_inetpeer_init,
3321 .exit = ipv4_inetpeer_exit,
3322};
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003323
Patrick McHardyc7066f72011-01-14 13:36:42 +01003324#ifdef CONFIG_IP_ROUTE_CLASSID
Tejun Heo7d720c32010-02-16 15:20:26 +00003325struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
Patrick McHardyc7066f72011-01-14 13:36:42 +01003326#endif /* CONFIG_IP_ROUTE_CLASSID */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003327
Linus Torvalds1da177e2005-04-16 15:20:36 -07003328int __init ip_rt_init(void)
3329{
Eric Dumazet5055c372015-01-14 15:17:06 -08003330 int cpu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003331
Kees Cook6da2ec52018-06-12 13:55:00 -07003332 ip_idents = kmalloc_array(IP_IDENTS_SZ, sizeof(*ip_idents),
3333 GFP_KERNEL);
Eric Dumazet73f156a2014-06-02 05:26:03 -07003334 if (!ip_idents)
3335 panic("IP: failed to allocate ip_idents\n");
3336
3337 prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
3338
Eric Dumazet355b590c2015-05-01 10:37:49 -07003339 ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL);
3340 if (!ip_tstamps)
3341 panic("IP: failed to allocate ip_tstamps\n");
3342
Eric Dumazet5055c372015-01-14 15:17:06 -08003343 for_each_possible_cpu(cpu) {
3344 struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
3345
3346 INIT_LIST_HEAD(&ul->head);
3347 spin_lock_init(&ul->lock);
3348 }
Patrick McHardyc7066f72011-01-14 13:36:42 +01003349#ifdef CONFIG_IP_ROUTE_CLASSID
Ingo Molnar0dcec8c2009-02-25 14:07:33 +01003350 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
Linus Torvalds1da177e2005-04-16 15:20:36 -07003351 if (!ip_rt_acct)
3352 panic("IP: failed to allocate ip_rt_acct\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003353#endif
3354
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07003355 ipv4_dst_ops.kmem_cachep =
3356 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09003357 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003358
David S. Miller14e50e52007-05-24 18:17:54 -07003359 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
3360
Eric Dumazetfc66f952010-10-08 06:37:34 +00003361 if (dst_entries_init(&ipv4_dst_ops) < 0)
3362 panic("IP: failed to allocate ipv4_dst_ops counter\n");
3363
3364 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
3365 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
3366
David S. Miller89aef892012-07-17 11:00:09 -07003367 ipv4_dst_ops.gc_thresh = ~0;
3368 ip_rt_max_size = INT_MAX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003369
Linus Torvalds1da177e2005-04-16 15:20:36 -07003370 devinet_init();
3371 ip_fib_init();
3372
Denis V. Lunev73b38712008-02-28 20:51:18 -08003373 if (ip_rt_proc_init())
Joe Perches058bd4d2012-03-11 18:36:11 +00003374 pr_err("Unable to create route proc files\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003375#ifdef CONFIG_XFRM
3376 xfrm_init();
Steffen Klassert703fb942012-11-13 08:52:24 +01003377 xfrm4_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003378#endif
Florian Westphal394f51a2017-08-15 16:34:44 +02003379 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL,
3380 RTNL_FLAG_DOIT_UNLOCKED);
Thomas Graf63f34442007-03-22 11:55:17 -07003381
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003382#ifdef CONFIG_SYSCTL
3383 register_pernet_subsys(&sysctl_route_ops);
3384#endif
Neil Horman3ee94372010-05-08 01:57:52 -07003385 register_pernet_subsys(&rt_genid_ops);
David S. Millerc3426b42012-06-09 16:27:05 -07003386 register_pernet_subsys(&ipv4_inetpeer_ops);
Tim Hansen1bcdca32017-10-04 15:59:49 -04003387 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003388}
3389
Al Viroa1bc6eb2008-07-30 06:32:52 -04003390#ifdef CONFIG_SYSCTL
Al Viroeeb61f72008-07-27 08:59:33 +01003391/*
3392 * We really need to sanitize the damn ipv4 init order, then all
3393 * this nonsense will go away.
3394 */
3395void __init ip_static_sysctl_init(void)
3396{
Eric W. Biederman4e5ca782012-04-19 13:32:39 +00003397 register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
Al Viroeeb61f72008-07-27 08:59:33 +01003398}
Al Viroa1bc6eb2008-07-30 06:32:52 -04003399#endif