blob: 7dcce724c78bd6bfbffb66671d594803031b13b6 [file] [log] [blame]
Thomas Gleixner2874c5f2019-05-27 08:55:01 +02001// SPDX-License-Identifier: GPL-2.0-or-later
Linus Torvalds1da177e2005-04-16 15:20:36 -07002/*
3 * INET An implementation of the TCP/IP protocol suite for the LINUX
4 * operating system. INET is implemented using the BSD Socket
5 * interface as the means of communication with the user level.
6 *
7 * ROUTE - implementation of the IP router.
8 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07009 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -070010 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
11 * Alan Cox, <gw4pts@gw4pts.ampr.org>
12 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
13 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
14 *
15 * Fixes:
16 * Alan Cox : Verify area fixes.
17 * Alan Cox : cli() protects routing changes
18 * Rui Oliveira : ICMP routing table updates
19 * (rco@di.uminho.pt) Routing table insertion and update
20 * Linus Torvalds : Rewrote bits to be sensible
21 * Alan Cox : Added BSD route gw semantics
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090022 * Alan Cox : Super /proc >4K
Linus Torvalds1da177e2005-04-16 15:20:36 -070023 * Alan Cox : MTU in route table
24 * Alan Cox : MSS actually. Also added the window
25 * clamper.
26 * Sam Lantinga : Fixed route matching in rt_del()
27 * Alan Cox : Routing cache support.
28 * Alan Cox : Removed compatibility cruft.
29 * Alan Cox : RTF_REJECT support.
30 * Alan Cox : TCP irtt support.
31 * Jonathan Naylor : Added Metric support.
32 * Miquel van Smoorenburg : BSD API fixes.
33 * Miquel van Smoorenburg : Metrics.
34 * Alan Cox : Use __u32 properly
35 * Alan Cox : Aligned routing errors more closely with BSD
36 * our system is still very different.
37 * Alan Cox : Faster /proc handling
38 * Alexey Kuznetsov : Massive rework to support tree based routing,
39 * routing caches and better behaviour.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090040 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070041 * Olaf Erb : irtt wasn't being copied right.
42 * Bjorn Ekwall : Kerneld route support.
43 * Alan Cox : Multicast fixed (I hope)
44 * Pavel Krauz : Limited broadcast fixed
45 * Mike McLagan : Routing by source
46 * Alexey Kuznetsov : End of old history. Split to fib.c and
47 * route.c and rewritten from scratch.
48 * Andi Kleen : Load-limit warning messages.
49 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
50 * Vitaly E. Lavrov : Race condition in ip_route_input_slow.
51 * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow.
52 * Vladimir V. Ivanov : IP rule info (flowid) is really useful.
53 * Marc Boucher : routing by fwmark
54 * Robert Olsson : Added rt_cache statistics
55 * Arnaldo C. Melo : Convert proc stuff to seq_file
Eric Dumazetbb1d23b2005-07-05 15:00:32 -070056 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
Ilia Sotnikovcef26852006-03-25 01:38:55 -080057 * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect
58 * Ilia Sotnikov : Removed TOS from hash calculations
Linus Torvalds1da177e2005-04-16 15:20:36 -070059 */
60
Joe Perchesafd465032012-03-12 07:03:32 +000061#define pr_fmt(fmt) "IPv4: " fmt
62
Linus Torvalds1da177e2005-04-16 15:20:36 -070063#include <linux/module.h>
Linus Torvalds7c0f6ba2016-12-24 11:46:01 -080064#include <linux/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070065#include <linux/bitops.h>
66#include <linux/types.h>
67#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070068#include <linux/mm.h>
69#include <linux/string.h>
70#include <linux/socket.h>
71#include <linux/sockios.h>
72#include <linux/errno.h>
73#include <linux/in.h>
74#include <linux/inet.h>
75#include <linux/netdevice.h>
76#include <linux/proc_fs.h>
77#include <linux/init.h>
78#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070079#include <linux/inetdevice.h>
80#include <linux/igmp.h>
81#include <linux/pkt_sched.h>
82#include <linux/mroute.h>
83#include <linux/netfilter_ipv4.h>
84#include <linux/random.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070085#include <linux/rcupdate.h>
86#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090087#include <linux/slab.h>
Eric Dumazet73f156a2014-06-02 05:26:03 -070088#include <linux/jhash.h>
Herbert Xu352e5122007-11-13 21:34:06 -080089#include <net/dst.h>
Thomas Graf1b7179d2015-07-21 10:43:59 +020090#include <net/dst_metadata.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020091#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070092#include <net/protocol.h>
93#include <net/ip.h>
94#include <net/route.h>
95#include <net/inetpeer.h>
96#include <net/sock.h>
97#include <net/ip_fib.h>
David Ahern5481d732019-06-03 20:19:49 -070098#include <net/nexthop.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070099#include <net/arp.h>
100#include <net/tcp.h>
101#include <net/icmp.h>
102#include <net/xfrm.h>
Roopa Prabhu571e7222015-07-21 10:43:47 +0200103#include <net/lwtunnel.h>
Tom Tucker8d717402006-07-30 20:43:36 -0700104#include <net/netevent.h>
Thomas Graf63f34442007-03-22 11:55:17 -0700105#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106#ifdef CONFIG_SYSCTL
107#include <linux/sysctl.h>
108#endif
David S. Miller6e5714e2011-08-03 20:50:44 -0700109#include <net/secure_seq.h>
Thomas Graf1b7179d2015-07-21 10:43:59 +0200110#include <net/ip_tunnels.h>
David Ahern385add92015-09-29 20:07:13 -0700111#include <net/l3mdev.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112
Roopa Prabhub6179812017-05-25 10:42:39 -0700113#include "fib_lookup.h"
114
David S. Miller68a5e3d2011-03-11 20:07:33 -0500115#define RT_FL_TOS(oldflp4) \
Julian Anastasovf61759e2011-12-02 11:39:42 +0000116 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118#define RT_GC_TIMEOUT (300*HZ)
119
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120static int ip_rt_max_size;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700121static int ip_rt_redirect_number __read_mostly = 9;
122static int ip_rt_redirect_load __read_mostly = HZ / 50;
123static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
124static int ip_rt_error_cost __read_mostly = HZ;
125static int ip_rt_error_burst __read_mostly = 5 * HZ;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700126static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
Sabrina Dubrocac7272c22018-02-26 16:13:43 +0100127static u32 ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700128static int ip_rt_min_advmss __read_mostly = 256;
Eric Dumazet9f28a2f2011-12-21 15:47:16 -0500129
Xin Longdeed49d2016-02-18 21:21:19 +0800130static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
Sabrina Dubrocac7272c22018-02-26 16:13:43 +0100131
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132/*
133 * Interface to generic destination cache.
134 */
135
136static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -0800137static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
Steffen Klassertebb762f2011-11-23 02:12:51 +0000138static unsigned int ipv4_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
140static void ipv4_link_failure(struct sk_buff *skb);
David S. Miller6700c272012-07-17 03:29:28 -0700141static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
142 struct sk_buff *skb, u32 mtu);
143static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
144 struct sk_buff *skb);
David S. Millercaacf052012-07-31 15:06:50 -0700145static void ipv4_dst_destroy(struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146
David S. Miller62fa8a82011-01-26 20:51:05 -0800147static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
148{
David S. Miller31248732012-07-10 07:08:18 -0700149 WARN_ON(1);
150 return NULL;
David S. Miller62fa8a82011-01-26 20:51:05 -0800151}
152
David S. Millerf894cbf2012-07-02 21:52:24 -0700153static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
154 struct sk_buff *skb,
155 const void *daddr);
Julian Anastasov63fca652017-02-06 23:14:15 +0200156static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700157
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158static struct dst_ops ipv4_dst_ops = {
159 .family = AF_INET,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160 .check = ipv4_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800161 .default_advmss = ipv4_default_advmss,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000162 .mtu = ipv4_mtu,
David S. Miller62fa8a82011-01-26 20:51:05 -0800163 .cow_metrics = ipv4_cow_metrics,
David S. Millercaacf052012-07-31 15:06:50 -0700164 .destroy = ipv4_dst_destroy,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700165 .negative_advice = ipv4_negative_advice,
166 .link_failure = ipv4_link_failure,
167 .update_pmtu = ip_rt_update_pmtu,
David S. Millere47a1852012-07-11 20:55:47 -0700168 .redirect = ip_do_redirect,
Eric W. Biedermanb92dacd2015-10-07 16:48:37 -0500169 .local_out = __ip_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700170 .neigh_lookup = ipv4_neigh_lookup,
Julian Anastasov63fca652017-02-06 23:14:15 +0200171 .confirm_neigh = ipv4_confirm_neigh,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172};
173
174#define ECN_OR_COST(class) TC_PRIO_##class
175
Philippe De Muyter4839c522007-07-09 15:32:57 -0700176const __u8 ip_tos2prio[16] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177 TC_PRIO_BESTEFFORT,
Dan Siemon4a2b9c32011-03-15 13:56:07 +0000178 ECN_OR_COST(BESTEFFORT),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700179 TC_PRIO_BESTEFFORT,
180 ECN_OR_COST(BESTEFFORT),
181 TC_PRIO_BULK,
182 ECN_OR_COST(BULK),
183 TC_PRIO_BULK,
184 ECN_OR_COST(BULK),
185 TC_PRIO_INTERACTIVE,
186 ECN_OR_COST(INTERACTIVE),
187 TC_PRIO_INTERACTIVE,
188 ECN_OR_COST(INTERACTIVE),
189 TC_PRIO_INTERACTIVE_BULK,
190 ECN_OR_COST(INTERACTIVE_BULK),
191 TC_PRIO_INTERACTIVE_BULK,
192 ECN_OR_COST(INTERACTIVE_BULK)
193};
Amir Vadaid4a96862012-04-04 21:33:28 +0000194EXPORT_SYMBOL(ip_tos2prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700195
Eric Dumazet2f970d82006-01-17 02:54:36 -0800196static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
Christoph Lameter3ed66e92014-04-07 15:39:40 -0700197#define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700198
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
201{
Eric Dumazet29e75252008-01-31 17:05:09 -0800202 if (*pos)
David S. Miller89aef892012-07-17 11:00:09 -0700203 return NULL;
Eric Dumazet29e75252008-01-31 17:05:09 -0800204 return SEQ_START_TOKEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205}
206
207static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
208{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209 ++*pos;
David S. Miller89aef892012-07-17 11:00:09 -0700210 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211}
212
213static void rt_cache_seq_stop(struct seq_file *seq, void *v)
214{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215}
216
217static int rt_cache_seq_show(struct seq_file *seq, void *v)
218{
219 if (v == SEQ_START_TOKEN)
220 seq_printf(seq, "%-127s\n",
221 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
222 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
223 "HHUptod\tSpecDst");
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900224 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225}
226
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700227static const struct seq_operations rt_cache_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700228 .start = rt_cache_seq_start,
229 .next = rt_cache_seq_next,
230 .stop = rt_cache_seq_stop,
231 .show = rt_cache_seq_show,
232};
233
234static int rt_cache_seq_open(struct inode *inode, struct file *file)
235{
David S. Miller89aef892012-07-17 11:00:09 -0700236 return seq_open(file, &rt_cache_seq_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237}
238
Arjan van de Ven9a321442007-02-12 00:55:35 -0800239static const struct file_operations rt_cache_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240 .open = rt_cache_seq_open,
241 .read = seq_read,
242 .llseek = seq_lseek,
David S. Miller89aef892012-07-17 11:00:09 -0700243 .release = seq_release,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244};
245
246
247static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
248{
249 int cpu;
250
251 if (*pos == 0)
252 return SEQ_START_TOKEN;
253
Rusty Russell0f23174a2008-12-29 12:23:42 +0000254 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 if (!cpu_possible(cpu))
256 continue;
257 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800258 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 }
260 return NULL;
261}
262
263static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
264{
265 int cpu;
266
Rusty Russell0f23174a2008-12-29 12:23:42 +0000267 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268 if (!cpu_possible(cpu))
269 continue;
270 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800271 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272 }
273 return NULL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900274
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275}
276
277static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
278{
279
280}
281
282static int rt_cpu_seq_show(struct seq_file *seq, void *v)
283{
284 struct rt_cache_stat *st = v;
285
286 if (v == SEQ_START_TOKEN) {
Olaf Rempel5bec0032005-04-28 12:16:08 -0700287 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700288 return 0;
289 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900290
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
292 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
Eric Dumazetfc66f952010-10-08 06:37:34 +0000293 dst_entries_get_slow(&ipv4_dst_ops),
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700294 0, /* st->in_hit */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295 st->in_slow_tot,
296 st->in_slow_mc,
297 st->in_no_route,
298 st->in_brd,
299 st->in_martian_dst,
300 st->in_martian_src,
301
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700302 0, /* st->out_hit */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303 st->out_slow_tot,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900304 st->out_slow_mc,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700306 0, /* st->gc_total */
307 0, /* st->gc_ignored */
308 0, /* st->gc_goal_miss */
309 0, /* st->gc_dst_overflow */
310 0, /* st->in_hlist_search */
311 0 /* st->out_hlist_search */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312 );
313 return 0;
314}
315
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700316static const struct seq_operations rt_cpu_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317 .start = rt_cpu_seq_start,
318 .next = rt_cpu_seq_next,
319 .stop = rt_cpu_seq_stop,
320 .show = rt_cpu_seq_show,
321};
322
323
324static int rt_cpu_seq_open(struct inode *inode, struct file *file)
325{
326 return seq_open(file, &rt_cpu_seq_ops);
327}
328
Arjan van de Ven9a321442007-02-12 00:55:35 -0800329static const struct file_operations rt_cpu_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330 .open = rt_cpu_seq_open,
331 .read = seq_read,
332 .llseek = seq_lseek,
333 .release = seq_release,
334};
335
Patrick McHardyc7066f72011-01-14 13:36:42 +0100336#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800337static int rt_acct_proc_show(struct seq_file *m, void *v)
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800338{
Alexey Dobriyana661c412009-11-25 15:40:35 -0800339 struct ip_rt_acct *dst, *src;
340 unsigned int i, j;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800341
Alexey Dobriyana661c412009-11-25 15:40:35 -0800342 dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
343 if (!dst)
344 return -ENOMEM;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800345
Alexey Dobriyana661c412009-11-25 15:40:35 -0800346 for_each_possible_cpu(i) {
347 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
348 for (j = 0; j < 256; j++) {
349 dst[j].o_bytes += src[j].o_bytes;
350 dst[j].o_packets += src[j].o_packets;
351 dst[j].i_bytes += src[j].i_bytes;
352 dst[j].i_packets += src[j].i_packets;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800353 }
354 }
Alexey Dobriyana661c412009-11-25 15:40:35 -0800355
356 seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
357 kfree(dst);
358 return 0;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800359}
360#endif
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800361
Denis V. Lunev73b38712008-02-28 20:51:18 -0800362static int __net_init ip_rt_do_proc_init(struct net *net)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800363{
364 struct proc_dir_entry *pde;
365
Joe Perchesd6444062018-03-23 15:54:38 -0700366 pde = proc_create("rt_cache", 0444, net->proc_net,
Gao fengd4beaa62013-02-18 01:34:54 +0000367 &rt_cache_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800368 if (!pde)
369 goto err1;
370
Joe Perchesd6444062018-03-23 15:54:38 -0700371 pde = proc_create("rt_cache", 0444,
Wang Chen77020722008-02-28 14:14:25 -0800372 net->proc_net_stat, &rt_cpu_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800373 if (!pde)
374 goto err2;
375
Patrick McHardyc7066f72011-01-14 13:36:42 +0100376#ifdef CONFIG_IP_ROUTE_CLASSID
Christoph Hellwig3f3942a2018-05-15 15:57:23 +0200377 pde = proc_create_single("rt_acct", 0, net->proc_net,
378 rt_acct_proc_show);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800379 if (!pde)
380 goto err3;
381#endif
382 return 0;
383
Patrick McHardyc7066f72011-01-14 13:36:42 +0100384#ifdef CONFIG_IP_ROUTE_CLASSID
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800385err3:
386 remove_proc_entry("rt_cache", net->proc_net_stat);
387#endif
388err2:
389 remove_proc_entry("rt_cache", net->proc_net);
390err1:
391 return -ENOMEM;
392}
Denis V. Lunev73b38712008-02-28 20:51:18 -0800393
394static void __net_exit ip_rt_do_proc_exit(struct net *net)
395{
396 remove_proc_entry("rt_cache", net->proc_net_stat);
397 remove_proc_entry("rt_cache", net->proc_net);
Patrick McHardyc7066f72011-01-14 13:36:42 +0100398#ifdef CONFIG_IP_ROUTE_CLASSID
Denis V. Lunev73b38712008-02-28 20:51:18 -0800399 remove_proc_entry("rt_acct", net->proc_net);
Alexey Dobriyan0a931ac2010-01-17 03:32:50 +0000400#endif
Denis V. Lunev73b38712008-02-28 20:51:18 -0800401}
402
403static struct pernet_operations ip_rt_proc_ops __net_initdata = {
404 .init = ip_rt_do_proc_init,
405 .exit = ip_rt_do_proc_exit,
406};
407
408static int __init ip_rt_proc_init(void)
409{
410 return register_pernet_subsys(&ip_rt_proc_ops);
411}
412
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800413#else
Denis V. Lunev73b38712008-02-28 20:51:18 -0800414static inline int ip_rt_proc_init(void)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800415{
416 return 0;
417}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418#endif /* CONFIG_PROC_FS */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900419
Eric Dumazet4331deb2012-07-25 05:11:23 +0000420static inline bool rt_is_expired(const struct rtable *rth)
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700421{
fan.duca4c3fc2013-07-30 08:33:53 +0800422 return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev));
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700423}
424
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +0000425void rt_cache_flush(struct net *net)
Eric Dumazet29e75252008-01-31 17:05:09 -0800426{
fan.duca4c3fc2013-07-30 08:33:53 +0800427 rt_genid_bump_ipv4(net);
Eric Dumazet98376382010-03-08 03:20:00 +0000428}
429
David S. Millerf894cbf2012-07-02 21:52:24 -0700430static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
431 struct sk_buff *skb,
432 const void *daddr)
David Miller3769cff2011-07-11 22:44:24 +0000433{
David Ahern1550c172019-04-05 16:30:27 -0700434 const struct rtable *rt = container_of(dst, struct rtable, dst);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700435 struct net_device *dev = dst->dev;
David Miller3769cff2011-07-11 22:44:24 +0000436 struct neighbour *n;
437
David Ahern5c9f7c12019-04-05 16:30:34 -0700438 rcu_read_lock_bh();
David S. Millerd3aaeb32011-07-18 00:40:17 -0700439
David Ahern5c9f7c12019-04-05 16:30:34 -0700440 if (likely(rt->rt_gw_family == AF_INET)) {
441 n = ip_neigh_gw4(dev, rt->rt_gw4);
442 } else if (rt->rt_gw_family == AF_INET6) {
443 n = ip_neigh_gw6(dev, &rt->rt_gw6);
444 } else {
445 __be32 pkey;
446
447 pkey = skb ? ip_hdr(skb)->daddr : *((__be32 *) daddr);
448 n = ip_neigh_gw4(dev, pkey);
449 }
450
Ido Schimmel537de0c2019-07-04 19:26:38 +0300451 if (!IS_ERR(n) && !refcount_inc_not_zero(&n->refcnt))
David Ahern5c9f7c12019-04-05 16:30:34 -0700452 n = NULL;
453
454 rcu_read_unlock_bh();
455
456 return n;
David S. Millerd3aaeb32011-07-18 00:40:17 -0700457}
458
Julian Anastasov63fca652017-02-06 23:14:15 +0200459static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)
460{
David Ahern1550c172019-04-05 16:30:27 -0700461 const struct rtable *rt = container_of(dst, struct rtable, dst);
Julian Anastasov63fca652017-02-06 23:14:15 +0200462 struct net_device *dev = dst->dev;
463 const __be32 *pkey = daddr;
Julian Anastasov63fca652017-02-06 23:14:15 +0200464
David Ahern6de9c052019-04-05 16:30:36 -0700465 if (rt->rt_gw_family == AF_INET) {
David Ahern1550c172019-04-05 16:30:27 -0700466 pkey = (const __be32 *)&rt->rt_gw4;
David Ahern6de9c052019-04-05 16:30:36 -0700467 } else if (rt->rt_gw_family == AF_INET6) {
468 return __ipv6_confirm_neigh_stub(dev, &rt->rt_gw6);
469 } else if (!daddr ||
Julian Anastasov63fca652017-02-06 23:14:15 +0200470 (rt->rt_flags &
David Ahern6de9c052019-04-05 16:30:36 -0700471 (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) {
Julian Anastasov63fca652017-02-06 23:14:15 +0200472 return;
David Ahern6de9c052019-04-05 16:30:36 -0700473 }
Julian Anastasov63fca652017-02-06 23:14:15 +0200474 __ipv4_confirm_neigh(dev, *(__force u32 *)pkey);
475}
476
Eric Dumazet04ca6972014-07-26 08:58:10 +0200477#define IP_IDENTS_SZ 2048u
Eric Dumazet04ca6972014-07-26 08:58:10 +0200478
Eric Dumazet355b590c2015-05-01 10:37:49 -0700479static atomic_t *ip_idents __read_mostly;
480static u32 *ip_tstamps __read_mostly;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200481
482/* In order to protect privacy, we add a perturbation to identifiers
483 * if one generator is seldom used. This makes hard for an attacker
484 * to infer how many packets were sent between two points in time.
485 */
486u32 ip_idents_reserve(u32 hash, int segs)
487{
Eric Dumazet355b590c2015-05-01 10:37:49 -0700488 u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
489 atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
Mark Rutland6aa7de02017-10-23 14:07:29 -0700490 u32 old = READ_ONCE(*p_tstamp);
Eric Dumazet04ca6972014-07-26 08:58:10 +0200491 u32 now = (u32)jiffies;
Eric Dumazetadb03112016-09-20 18:06:17 -0700492 u32 new, delta = 0;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200493
Eric Dumazet355b590c2015-05-01 10:37:49 -0700494 if (old != now && cmpxchg(p_tstamp, old, now) == old)
Eric Dumazet04ca6972014-07-26 08:58:10 +0200495 delta = prandom_u32_max(now - old);
496
Eric Dumazetadb03112016-09-20 18:06:17 -0700497 /* Do not use atomic_add_return() as it makes UBSAN unhappy */
498 do {
499 old = (u32)atomic_read(p_id);
500 new = old + delta + segs;
501 } while (atomic_cmpxchg(p_id, old, new) != old);
502
503 return new - segs;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200504}
505EXPORT_SYMBOL(ip_idents_reserve);
Eric Dumazet73f156a2014-06-02 05:26:03 -0700506
Hannes Frederic Sowab6a77192015-03-25 17:07:44 +0100507void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508{
Eric Dumazet73f156a2014-06-02 05:26:03 -0700509 u32 hash, id;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510
Eric Dumazetdf453702019-03-27 12:40:33 -0700511 /* Note the following code is not safe, but this is okay. */
512 if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key)))
513 get_random_bytes(&net->ipv4.ip_id_key,
514 sizeof(net->ipv4.ip_id_key));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515
Eric Dumazetdf453702019-03-27 12:40:33 -0700516 hash = siphash_3u32((__force u32)iph->daddr,
Eric Dumazet04ca6972014-07-26 08:58:10 +0200517 (__force u32)iph->saddr,
Eric Dumazetdf453702019-03-27 12:40:33 -0700518 iph->protocol,
519 &net->ipv4.ip_id_key);
Eric Dumazet73f156a2014-06-02 05:26:03 -0700520 id = ip_idents_reserve(hash, segs);
521 iph->id = htons(id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000523EXPORT_SYMBOL(__ip_select_ident);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700524
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900525static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
526 const struct sock *sk,
David S. Miller4895c772012-07-17 04:19:00 -0700527 const struct iphdr *iph,
528 int oif, u8 tos,
529 u8 prot, u32 mark, int flow_flags)
530{
531 if (sk) {
532 const struct inet_sock *inet = inet_sk(sk);
533
534 oif = sk->sk_bound_dev_if;
535 mark = sk->sk_mark;
536 tos = RT_CONN_FLAGS(sk);
537 prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
538 }
539 flowi4_init_output(fl4, oif, mark, tos,
540 RT_SCOPE_UNIVERSE, prot,
541 flow_flags,
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900542 iph->daddr, iph->saddr, 0, 0,
543 sock_net_uid(net, sk));
David S. Miller4895c772012-07-17 04:19:00 -0700544}
545
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200546static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
547 const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700548{
Lorenzo Colittid109e612016-11-30 02:56:47 +0900549 const struct net *net = dev_net(skb->dev);
David S. Miller4895c772012-07-17 04:19:00 -0700550 const struct iphdr *iph = ip_hdr(skb);
551 int oif = skb->dev->ifindex;
552 u8 tos = RT_TOS(iph->tos);
553 u8 prot = iph->protocol;
554 u32 mark = skb->mark;
555
Lorenzo Colittid109e612016-11-30 02:56:47 +0900556 __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0);
David S. Miller4895c772012-07-17 04:19:00 -0700557}
558
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200559static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700560{
561 const struct inet_sock *inet = inet_sk(sk);
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200562 const struct ip_options_rcu *inet_opt;
David S. Miller4895c772012-07-17 04:19:00 -0700563 __be32 daddr = inet->inet_daddr;
564
565 rcu_read_lock();
566 inet_opt = rcu_dereference(inet->inet_opt);
567 if (inet_opt && inet_opt->opt.srr)
568 daddr = inet_opt->opt.faddr;
569 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
570 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
571 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
572 inet_sk_flowi_flags(sk),
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900573 daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
David S. Miller4895c772012-07-17 04:19:00 -0700574 rcu_read_unlock();
575}
576
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200577static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
578 const struct sk_buff *skb)
David S. Miller4895c772012-07-17 04:19:00 -0700579{
580 if (skb)
581 build_skb_flow_key(fl4, skb, sk);
582 else
583 build_sk_flow_key(fl4, sk);
584}
585
David S. Millerc5038a82012-07-31 15:02:02 -0700586static DEFINE_SPINLOCK(fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700587
Timo Teräs2ffae992013-06-27 10:27:05 +0300588static void fnhe_flush_routes(struct fib_nh_exception *fnhe)
589{
590 struct rtable *rt;
591
592 rt = rcu_dereference(fnhe->fnhe_rth_input);
593 if (rt) {
594 RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL);
Wei Wang95c47f92017-06-17 10:42:30 -0700595 dst_dev_put(&rt->dst);
Wei Wang08301062017-06-17 10:42:29 -0700596 dst_release(&rt->dst);
Timo Teräs2ffae992013-06-27 10:27:05 +0300597 }
598 rt = rcu_dereference(fnhe->fnhe_rth_output);
599 if (rt) {
600 RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL);
Wei Wang95c47f92017-06-17 10:42:30 -0700601 dst_dev_put(&rt->dst);
Wei Wang08301062017-06-17 10:42:29 -0700602 dst_release(&rt->dst);
Timo Teräs2ffae992013-06-27 10:27:05 +0300603 }
604}
605
Julian Anastasovaee06da2012-07-18 10:15:35 +0000606static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
David S. Miller4895c772012-07-17 04:19:00 -0700607{
608 struct fib_nh_exception *fnhe, *oldest;
609
610 oldest = rcu_dereference(hash->chain);
611 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
612 fnhe = rcu_dereference(fnhe->fnhe_next)) {
613 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
614 oldest = fnhe;
615 }
Timo Teräs2ffae992013-06-27 10:27:05 +0300616 fnhe_flush_routes(oldest);
David S. Miller4895c772012-07-17 04:19:00 -0700617 return oldest;
618}
619
David S. Millerd3a25c92012-07-17 13:23:08 -0700620static inline u32 fnhe_hashfun(__be32 daddr)
621{
Eric Dumazetd546c622014-09-04 08:21:31 -0700622 static u32 fnhe_hashrnd __read_mostly;
David S. Millerd3a25c92012-07-17 13:23:08 -0700623 u32 hval;
624
Eric Dumazetd546c622014-09-04 08:21:31 -0700625 net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd));
626 hval = jhash_1word((__force u32) daddr, fnhe_hashrnd);
627 return hash_32(hval, FNHE_HASH_SHIFT);
David S. Millerd3a25c92012-07-17 13:23:08 -0700628}
629
Timo Teräs387aa652013-05-27 20:46:31 +0000630static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
631{
632 rt->rt_pmtu = fnhe->fnhe_pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100633 rt->rt_mtu_locked = fnhe->fnhe_mtu_locked;
Timo Teräs387aa652013-05-27 20:46:31 +0000634 rt->dst.expires = fnhe->fnhe_expires;
635
636 if (fnhe->fnhe_gw) {
637 rt->rt_flags |= RTCF_REDIRECTED;
David Ahern77d5bc72019-09-17 10:39:49 -0700638 rt->rt_uses_gateway = 1;
David Ahern1550c172019-04-05 16:30:27 -0700639 rt->rt_gw_family = AF_INET;
640 rt->rt_gw4 = fnhe->fnhe_gw;
Timo Teräs387aa652013-05-27 20:46:31 +0000641 }
642}
643
David Aherna5995e72019-04-30 07:45:50 -0700644static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr,
645 __be32 gw, u32 pmtu, bool lock,
646 unsigned long expires)
David S. Miller4895c772012-07-17 04:19:00 -0700647{
Julian Anastasovaee06da2012-07-18 10:15:35 +0000648 struct fnhe_hash_bucket *hash;
David S. Miller4895c772012-07-17 04:19:00 -0700649 struct fib_nh_exception *fnhe;
Timo Teräs387aa652013-05-27 20:46:31 +0000650 struct rtable *rt;
Xin Longcebe84c2017-11-17 14:27:18 +0800651 u32 genid, hval;
Timo Teräs387aa652013-05-27 20:46:31 +0000652 unsigned int i;
David S. Miller4895c772012-07-17 04:19:00 -0700653 int depth;
Xin Longcebe84c2017-11-17 14:27:18 +0800654
David Aherna5995e72019-04-30 07:45:50 -0700655 genid = fnhe_genid(dev_net(nhc->nhc_dev));
Xin Longcebe84c2017-11-17 14:27:18 +0800656 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -0700657
David S. Millerc5038a82012-07-31 15:02:02 -0700658 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000659
David Aherna5995e72019-04-30 07:45:50 -0700660 hash = rcu_dereference(nhc->nhc_exceptions);
David S. Miller4895c772012-07-17 04:19:00 -0700661 if (!hash) {
Kees Cook6396bb22018-06-12 14:03:40 -0700662 hash = kcalloc(FNHE_HASH_SIZE, sizeof(*hash), GFP_ATOMIC);
David S. Miller4895c772012-07-17 04:19:00 -0700663 if (!hash)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000664 goto out_unlock;
David Aherna5995e72019-04-30 07:45:50 -0700665 rcu_assign_pointer(nhc->nhc_exceptions, hash);
David S. Miller4895c772012-07-17 04:19:00 -0700666 }
667
David S. Miller4895c772012-07-17 04:19:00 -0700668 hash += hval;
669
670 depth = 0;
671 for (fnhe = rcu_dereference(hash->chain); fnhe;
672 fnhe = rcu_dereference(fnhe->fnhe_next)) {
673 if (fnhe->fnhe_daddr == daddr)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000674 break;
David S. Miller4895c772012-07-17 04:19:00 -0700675 depth++;
676 }
677
Julian Anastasovaee06da2012-07-18 10:15:35 +0000678 if (fnhe) {
Xin Longcebe84c2017-11-17 14:27:18 +0800679 if (fnhe->fnhe_genid != genid)
680 fnhe->fnhe_genid = genid;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000681 if (gw)
682 fnhe->fnhe_gw = gw;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100683 if (pmtu) {
Julian Anastasovaee06da2012-07-18 10:15:35 +0000684 fnhe->fnhe_pmtu = pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100685 fnhe->fnhe_mtu_locked = lock;
686 }
Xin Longe39d5242017-11-17 14:27:06 +0800687 fnhe->fnhe_expires = max(1UL, expires);
Timo Teräs387aa652013-05-27 20:46:31 +0000688 /* Update all cached dsts too */
Timo Teräs2ffae992013-06-27 10:27:05 +0300689 rt = rcu_dereference(fnhe->fnhe_rth_input);
690 if (rt)
691 fill_route_from_fnhe(rt, fnhe);
692 rt = rcu_dereference(fnhe->fnhe_rth_output);
Timo Teräs387aa652013-05-27 20:46:31 +0000693 if (rt)
694 fill_route_from_fnhe(rt, fnhe);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000695 } else {
696 if (depth > FNHE_RECLAIM_DEPTH)
697 fnhe = fnhe_oldest(hash);
698 else {
699 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
700 if (!fnhe)
701 goto out_unlock;
702
703 fnhe->fnhe_next = hash->chain;
704 rcu_assign_pointer(hash->chain, fnhe);
705 }
Xin Longcebe84c2017-11-17 14:27:18 +0800706 fnhe->fnhe_genid = genid;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000707 fnhe->fnhe_daddr = daddr;
708 fnhe->fnhe_gw = gw;
709 fnhe->fnhe_pmtu = pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100710 fnhe->fnhe_mtu_locked = lock;
Julian Anastasov94720e32018-05-02 09:41:19 +0300711 fnhe->fnhe_expires = max(1UL, expires);
Timo Teräs387aa652013-05-27 20:46:31 +0000712
713 /* Exception created; mark the cached routes for the nexthop
714 * stale, so anyone caching it rechecks if this exception
715 * applies to them.
716 */
David Ahern0f457a32019-04-30 07:45:48 -0700717 rt = rcu_dereference(nhc->nhc_rth_input);
Timo Teräs2ffae992013-06-27 10:27:05 +0300718 if (rt)
719 rt->dst.obsolete = DST_OBSOLETE_KILL;
720
Timo Teräs387aa652013-05-27 20:46:31 +0000721 for_each_possible_cpu(i) {
722 struct rtable __rcu **prt;
David Ahern0f457a32019-04-30 07:45:48 -0700723 prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i);
Timo Teräs387aa652013-05-27 20:46:31 +0000724 rt = rcu_dereference(*prt);
725 if (rt)
726 rt->dst.obsolete = DST_OBSOLETE_KILL;
727 }
David S. Miller4895c772012-07-17 04:19:00 -0700728 }
David S. Miller4895c772012-07-17 04:19:00 -0700729
David S. Miller4895c772012-07-17 04:19:00 -0700730 fnhe->fnhe_stamp = jiffies;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000731
732out_unlock:
David S. Millerc5038a82012-07-31 15:02:02 -0700733 spin_unlock_bh(&fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700734}
735
David S. Millerceb33202012-07-17 11:31:28 -0700736static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
737 bool kill_route)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700738{
David S. Millere47a1852012-07-11 20:55:47 -0700739 __be32 new_gw = icmp_hdr(skb)->un.gateway;
David S. Miller94206122012-07-11 20:38:08 -0700740 __be32 old_gw = ip_hdr(skb)->saddr;
David S. Millere47a1852012-07-11 20:55:47 -0700741 struct net_device *dev = skb->dev;
David S. Millere47a1852012-07-11 20:55:47 -0700742 struct in_device *in_dev;
David S. Miller4895c772012-07-17 04:19:00 -0700743 struct fib_result res;
David S. Millere47a1852012-07-11 20:55:47 -0700744 struct neighbour *n;
Denis V. Lunev317805b2008-02-28 20:50:06 -0800745 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700746
David S. Miller94206122012-07-11 20:38:08 -0700747 switch (icmp_hdr(skb)->code & 7) {
748 case ICMP_REDIR_NET:
749 case ICMP_REDIR_NETTOS:
750 case ICMP_REDIR_HOST:
751 case ICMP_REDIR_HOSTTOS:
752 break;
753
754 default:
755 return;
756 }
757
David Ahern1550c172019-04-05 16:30:27 -0700758 if (rt->rt_gw_family != AF_INET || rt->rt_gw4 != old_gw)
David S. Millere47a1852012-07-11 20:55:47 -0700759 return;
760
761 in_dev = __in_dev_get_rcu(dev);
762 if (!in_dev)
763 return;
764
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900765 net = dev_net(dev);
Joe Perches9d4fb272009-11-23 10:41:23 -0800766 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
767 ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
768 ipv4_is_zeronet(new_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769 goto reject_redirect;
770
771 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
772 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
773 goto reject_redirect;
774 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
775 goto reject_redirect;
776 } else {
Denis V. Lunev317805b2008-02-28 20:50:06 -0800777 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700778 goto reject_redirect;
779 }
780
Stephen Suryaputra Lin969447f2016-11-10 11:16:15 -0500781 n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
782 if (!n)
783 n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
WANG Cong2c1a4312014-09-24 17:07:53 -0700784 if (!IS_ERR(n)) {
David S. Millere47a1852012-07-11 20:55:47 -0700785 if (!(n->nud_state & NUD_VALID)) {
786 neigh_event_send(n, NULL);
787 } else {
Andy Gospodarek0eeb0752015-06-23 13:45:37 -0400788 if (fib_lookup(net, fl4, &res, 0) == 0) {
David Aherneba618a2019-04-02 14:11:55 -0700789 struct fib_nh_common *nhc = FIB_RES_NHC(res);
David S. Miller4895c772012-07-17 04:19:00 -0700790
David Aherna5995e72019-04-30 07:45:50 -0700791 update_or_create_fnhe(nhc, fl4->daddr, new_gw,
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100792 0, false,
793 jiffies + ip_rt_gc_timeout);
David S. Miller4895c772012-07-17 04:19:00 -0700794 }
David S. Millerceb33202012-07-17 11:31:28 -0700795 if (kill_route)
796 rt->dst.obsolete = DST_OBSOLETE_KILL;
David S. Millere47a1852012-07-11 20:55:47 -0700797 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
798 }
799 neigh_release(n);
800 }
801 return;
802
803reject_redirect:
804#ifdef CONFIG_IP_ROUTE_VERBOSE
David S. Miller99ee0382012-07-12 07:40:05 -0700805 if (IN_DEV_LOG_MARTIANS(in_dev)) {
806 const struct iphdr *iph = (const struct iphdr *) skb->data;
807 __be32 daddr = iph->daddr;
808 __be32 saddr = iph->saddr;
809
David S. Millere47a1852012-07-11 20:55:47 -0700810 net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
811 " Advised path = %pI4 -> %pI4\n",
812 &old_gw, dev->name, &new_gw,
813 &saddr, &daddr);
David S. Miller99ee0382012-07-12 07:40:05 -0700814 }
David S. Millere47a1852012-07-11 20:55:47 -0700815#endif
816 ;
817}
818
David S. Miller4895c772012-07-17 04:19:00 -0700819static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
820{
821 struct rtable *rt;
822 struct flowi4 fl4;
Michal Kubecekf96ef982013-05-28 08:26:49 +0200823 const struct iphdr *iph = (const struct iphdr *) skb->data;
Lorenzo Colitti7d995692016-12-23 00:33:57 +0900824 struct net *net = dev_net(skb->dev);
Michal Kubecekf96ef982013-05-28 08:26:49 +0200825 int oif = skb->dev->ifindex;
826 u8 tos = RT_TOS(iph->tos);
827 u8 prot = iph->protocol;
828 u32 mark = skb->mark;
David S. Miller4895c772012-07-17 04:19:00 -0700829
830 rt = (struct rtable *) dst;
831
Lorenzo Colitti7d995692016-12-23 00:33:57 +0900832 __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
David S. Millerceb33202012-07-17 11:31:28 -0700833 __ip_do_redirect(rt, skb, &fl4, true);
David S. Miller4895c772012-07-17 04:19:00 -0700834}
835
Linus Torvalds1da177e2005-04-16 15:20:36 -0700836static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
837{
Eric Dumazetee6b9672008-03-05 18:30:47 -0800838 struct rtable *rt = (struct rtable *)dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839 struct dst_entry *ret = dst;
840
841 if (rt) {
Timo Teräsd11a4dc2010-03-18 23:20:20 +0000842 if (dst->obsolete > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843 ip_rt_put(rt);
844 ret = NULL;
David S. Miller59436342012-07-10 06:58:42 -0700845 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
846 rt->dst.expires) {
David S. Miller89aef892012-07-17 11:00:09 -0700847 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848 ret = NULL;
849 }
850 }
851 return ret;
852}
853
854/*
855 * Algorithm:
856 * 1. The first ip_rt_redirect_number redirects are sent
857 * with exponential backoff, then we stop sending them at all,
858 * assuming that the host ignores our redirects.
859 * 2. If we did not see packets requiring redirects
860 * during ip_rt_redirect_silence, we assume that the host
861 * forgot redirected route and start to send redirects again.
862 *
863 * This algorithm is much cheaper and more intelligent than dumb load limiting
864 * in icmp.c.
865 *
866 * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
867 * and "frag. need" (breaks PMTU discovery) in icmp.c.
868 */
869
870void ip_rt_send_redirect(struct sk_buff *skb)
871{
Eric Dumazet511c3f92009-06-02 05:14:27 +0000872 struct rtable *rt = skb_rtable(skb);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700873 struct in_device *in_dev;
David S. Miller92d86822011-02-04 15:55:25 -0800874 struct inet_peer *peer;
David S. Miller1d861aa2012-07-10 03:58:16 -0700875 struct net *net;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700876 int log_martians;
David Ahern192132b2015-08-27 16:07:03 -0700877 int vif;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700878
Eric Dumazet30038fc2009-08-28 23:52:01 -0700879 rcu_read_lock();
Changli Gaod8d1f302010-06-10 23:31:35 -0700880 in_dev = __in_dev_get_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700881 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
882 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700883 return;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700884 }
885 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
David Ahern385add92015-09-29 20:07:13 -0700886 vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700887 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700888
David S. Miller1d861aa2012-07-10 03:58:16 -0700889 net = dev_net(rt->dst.dev);
David Ahern192132b2015-08-27 16:07:03 -0700890 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800891 if (!peer) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000892 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
893 rt_nexthop(rt, ip_hdr(skb)->daddr));
David S. Miller92d86822011-02-04 15:55:25 -0800894 return;
895 }
896
Linus Torvalds1da177e2005-04-16 15:20:36 -0700897 /* No redirected packets during ip_rt_redirect_silence;
898 * reset the algorithm.
899 */
Lorenzo Bianconic09551c62019-02-06 19:18:04 +0100900 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) {
David S. Miller92d86822011-02-04 15:55:25 -0800901 peer->rate_tokens = 0;
Lorenzo Bianconic09551c62019-02-06 19:18:04 +0100902 peer->n_redirects = 0;
903 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904
905 /* Too many ignored redirects; do not send anything
Changli Gaod8d1f302010-06-10 23:31:35 -0700906 * set dst.rate_last to the last seen redirected packet.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907 */
Lorenzo Bianconic09551c62019-02-06 19:18:04 +0100908 if (peer->n_redirects >= ip_rt_redirect_number) {
David S. Miller92d86822011-02-04 15:55:25 -0800909 peer->rate_last = jiffies;
David S. Miller1d861aa2012-07-10 03:58:16 -0700910 goto out_put_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911 }
912
913 /* Check for load limit; set rate_last to the latest sent
914 * redirect.
915 */
David S. Miller92d86822011-02-04 15:55:25 -0800916 if (peer->rate_tokens == 0 ||
Li Yewang14fb8a72006-12-18 00:26:35 -0800917 time_after(jiffies,
David S. Miller92d86822011-02-04 15:55:25 -0800918 (peer->rate_last +
919 (ip_rt_redirect_load << peer->rate_tokens)))) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000920 __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
921
922 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
David S. Miller92d86822011-02-04 15:55:25 -0800923 peer->rate_last = jiffies;
924 ++peer->rate_tokens;
Lorenzo Bianconic09551c62019-02-06 19:18:04 +0100925 ++peer->n_redirects;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700926#ifdef CONFIG_IP_ROUTE_VERBOSE
Eric Dumazet30038fc2009-08-28 23:52:01 -0700927 if (log_martians &&
Joe Perchese87cc472012-05-13 21:56:26 +0000928 peer->rate_tokens == ip_rt_redirect_number)
929 net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
David S. Miller92101b32012-07-23 16:29:00 -0700930 &ip_hdr(skb)->saddr, inet_iif(skb),
Julian Anastasove81da0e2012-10-08 11:41:15 +0000931 &ip_hdr(skb)->daddr, &gw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700932#endif
933 }
David S. Miller1d861aa2012-07-10 03:58:16 -0700934out_put_peer:
935 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700936}
937
938static int ip_error(struct sk_buff *skb)
939{
Eric Dumazet511c3f92009-06-02 05:14:27 +0000940 struct rtable *rt = skb_rtable(skb);
Stephen Suryaputrae2c0dc12018-02-28 12:20:44 -0500941 struct net_device *dev = skb->dev;
942 struct in_device *in_dev;
David S. Miller92d86822011-02-04 15:55:25 -0800943 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944 unsigned long now;
David S. Miller251da412012-06-26 16:27:09 -0700945 struct net *net;
David S. Miller92d86822011-02-04 15:55:25 -0800946 bool send;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700947 int code;
948
Stephen Suryaputrae2c0dc12018-02-28 12:20:44 -0500949 if (netif_is_l3_master(skb->dev)) {
950 dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif);
951 if (!dev)
952 goto out;
953 }
954
955 in_dev = __in_dev_get_rcu(dev);
956
Eric W. Biederman381c7592015-05-22 04:58:12 -0500957 /* IP on this device is disabled. */
958 if (!in_dev)
959 goto out;
960
David S. Miller251da412012-06-26 16:27:09 -0700961 net = dev_net(rt->dst.dev);
962 if (!IN_DEV_FORWARD(in_dev)) {
963 switch (rt->dst.error) {
964 case EHOSTUNREACH:
Eric Dumazetb45386e2016-04-27 16:44:35 -0700965 __IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS);
David S. Miller251da412012-06-26 16:27:09 -0700966 break;
967
968 case ENETUNREACH:
Eric Dumazetb45386e2016-04-27 16:44:35 -0700969 __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
David S. Miller251da412012-06-26 16:27:09 -0700970 break;
971 }
972 goto out;
973 }
974
Changli Gaod8d1f302010-06-10 23:31:35 -0700975 switch (rt->dst.error) {
Joe Perches4500ebf2011-07-01 09:43:07 +0000976 case EINVAL:
977 default:
978 goto out;
979 case EHOSTUNREACH:
980 code = ICMP_HOST_UNREACH;
981 break;
982 case ENETUNREACH:
983 code = ICMP_NET_UNREACH;
Eric Dumazetb45386e2016-04-27 16:44:35 -0700984 __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
Joe Perches4500ebf2011-07-01 09:43:07 +0000985 break;
986 case EACCES:
987 code = ICMP_PKT_FILTERED;
988 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989 }
990
David Ahern192132b2015-08-27 16:07:03 -0700991 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
David Ahern385add92015-09-29 20:07:13 -0700992 l3mdev_master_ifindex(skb->dev), 1);
David S. Miller92d86822011-02-04 15:55:25 -0800993
994 send = true;
995 if (peer) {
996 now = jiffies;
997 peer->rate_tokens += now - peer->rate_last;
998 if (peer->rate_tokens > ip_rt_error_burst)
999 peer->rate_tokens = ip_rt_error_burst;
1000 peer->rate_last = now;
1001 if (peer->rate_tokens >= ip_rt_error_cost)
1002 peer->rate_tokens -= ip_rt_error_cost;
1003 else
1004 send = false;
David S. Miller1d861aa2012-07-10 03:58:16 -07001005 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001006 }
David S. Miller92d86822011-02-04 15:55:25 -08001007 if (send)
1008 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009
1010out: kfree_skb(skb);
1011 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001012}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013
Steffen Klassertd851c122012-10-07 22:47:25 +00001014static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001015{
Steffen Klassertd851c122012-10-07 22:47:25 +00001016 struct dst_entry *dst = &rt->dst;
Sabrina Dubroca28d35bc2018-10-09 17:48:15 +02001017 u32 old_mtu = ipv4_mtu(dst);
David S. Miller4895c772012-07-17 04:19:00 -07001018 struct fib_result res;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001019 bool lock = false;
David S. Miller2c8cec52011-02-09 20:42:07 -08001020
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001021 if (ip_mtu_locked(dst))
Steffen Klassertfa1e4922013-01-16 20:58:10 +00001022 return;
1023
Sabrina Dubroca28d35bc2018-10-09 17:48:15 +02001024 if (old_mtu < mtu)
Li Wei3cdaa5b2015-01-29 16:09:03 +08001025 return;
1026
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001027 if (mtu < ip_rt_min_pmtu) {
1028 lock = true;
Sabrina Dubroca28d35bc2018-10-09 17:48:15 +02001029 mtu = min(old_mtu, ip_rt_min_pmtu);
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001030 }
Eric Dumazetfe6fe792011-06-08 06:07:07 +00001031
Sabrina Dubroca28d35bc2018-10-09 17:48:15 +02001032 if (rt->rt_pmtu == mtu && !lock &&
Timo Teräsf0162292013-05-27 20:46:32 +00001033 time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
1034 return;
1035
Eric Dumazetc5ae7d42012-08-28 12:33:07 +00001036 rcu_read_lock();
Andy Gospodarek0eeb0752015-06-23 13:45:37 -04001037 if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
David Aherneba618a2019-04-02 14:11:55 -07001038 struct fib_nh_common *nhc = FIB_RES_NHC(res);
David S. Miller4895c772012-07-17 04:19:00 -07001039
David Aherna5995e72019-04-30 07:45:50 -07001040 update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
Julian Anastasovaee06da2012-07-18 10:15:35 +00001041 jiffies + ip_rt_mtu_expires);
David S. Miller4895c772012-07-17 04:19:00 -07001042 }
Eric Dumazetc5ae7d42012-08-28 12:33:07 +00001043 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001044}
1045
David S. Miller4895c772012-07-17 04:19:00 -07001046static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1047 struct sk_buff *skb, u32 mtu)
1048{
1049 struct rtable *rt = (struct rtable *) dst;
1050 struct flowi4 fl4;
1051
1052 ip_rt_build_flow_key(&fl4, sk, skb);
Steffen Klassertd851c122012-10-07 22:47:25 +00001053 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller4895c772012-07-17 04:19:00 -07001054}
1055
David S. Miller36393392012-06-14 22:21:46 -07001056void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
Maciej Żenczykowskid888f392018-09-25 20:56:26 -07001057 int oif, u8 protocol)
David S. Miller36393392012-06-14 22:21:46 -07001058{
David S. Miller4895c772012-07-17 04:19:00 -07001059 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Miller36393392012-06-14 22:21:46 -07001060 struct flowi4 fl4;
1061 struct rtable *rt;
Maciej Żenczykowskid888f392018-09-25 20:56:26 -07001062 u32 mark = IP4_REPLY_MARK(net, skb->mark);
Lorenzo Colitti1b3c61d2014-05-13 10:17:34 -07001063
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001064 __build_flow_key(net, &fl4, NULL, iph, oif,
Maciej Żenczykowskid888f392018-09-25 20:56:26 -07001065 RT_TOS(iph->tos), protocol, mark, 0);
David S. Miller36393392012-06-14 22:21:46 -07001066 rt = __ip_route_output_key(net, &fl4);
1067 if (!IS_ERR(rt)) {
David S. Miller4895c772012-07-17 04:19:00 -07001068 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller36393392012-06-14 22:21:46 -07001069 ip_rt_put(rt);
1070 }
1071}
1072EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
1073
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001074static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
David S. Miller36393392012-06-14 22:21:46 -07001075{
David S. Miller4895c772012-07-17 04:19:00 -07001076 const struct iphdr *iph = (const struct iphdr *) skb->data;
1077 struct flowi4 fl4;
1078 struct rtable *rt;
David S. Miller36393392012-06-14 22:21:46 -07001079
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001080 __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0);
Lorenzo Colitti1b3c61d2014-05-13 10:17:34 -07001081
1082 if (!fl4.flowi4_mark)
1083 fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
1084
David S. Miller4895c772012-07-17 04:19:00 -07001085 rt = __ip_route_output_key(sock_net(sk), &fl4);
1086 if (!IS_ERR(rt)) {
1087 __ip_rt_update_pmtu(rt, &fl4, mtu);
1088 ip_rt_put(rt);
1089 }
David S. Miller36393392012-06-14 22:21:46 -07001090}
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001091
1092void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1093{
1094 const struct iphdr *iph = (const struct iphdr *) skb->data;
1095 struct flowi4 fl4;
1096 struct rtable *rt;
Eric Dumazet7f502362014-06-30 01:26:23 -07001097 struct dst_entry *odst = NULL;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001098 bool new = false;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001099 struct net *net = sock_net(sk);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001100
1101 bh_lock_sock(sk);
Hannes Frederic Sowa482fc602013-11-05 02:24:17 +01001102
1103 if (!ip_sk_accept_pmtu(sk))
1104 goto out;
1105
Eric Dumazet7f502362014-06-30 01:26:23 -07001106 odst = sk_dst_get(sk);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001107
Eric Dumazet7f502362014-06-30 01:26:23 -07001108 if (sock_owned_by_user(sk) || !odst) {
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001109 __ipv4_sk_update_pmtu(skb, sk, mtu);
1110 goto out;
1111 }
1112
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001113 __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001114
Eric Dumazet7f502362014-06-30 01:26:23 -07001115 rt = (struct rtable *)odst;
Ian Morris51456b22015-04-03 09:17:26 +01001116 if (odst->obsolete && !odst->ops->check(odst, 0)) {
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001117 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1118 if (IS_ERR(rt))
1119 goto out;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001120
1121 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001122 }
1123
David Miller0f6c4802017-11-28 15:40:46 -05001124 __ip_rt_update_pmtu((struct rtable *) xfrm_dst_path(&rt->dst), &fl4, mtu);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001125
Eric Dumazet7f502362014-06-30 01:26:23 -07001126 if (!dst_check(&rt->dst, 0)) {
Steffen Klassertb44108d2013-01-22 00:01:28 +00001127 if (new)
1128 dst_release(&rt->dst);
1129
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001130 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1131 if (IS_ERR(rt))
1132 goto out;
1133
Steffen Klassertb44108d2013-01-22 00:01:28 +00001134 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001135 }
1136
Steffen Klassertb44108d2013-01-22 00:01:28 +00001137 if (new)
Eric Dumazet7f502362014-06-30 01:26:23 -07001138 sk_dst_set(sk, &rt->dst);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001139
1140out:
1141 bh_unlock_sock(sk);
Eric Dumazet7f502362014-06-30 01:26:23 -07001142 dst_release(odst);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001143}
David S. Miller36393392012-06-14 22:21:46 -07001144EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
David S. Millerf39925d2011-02-09 22:00:16 -08001145
David S. Millerb42597e2012-07-11 21:25:45 -07001146void ipv4_redirect(struct sk_buff *skb, struct net *net,
Maciej Żenczykowski1042caa2018-09-25 20:56:27 -07001147 int oif, u8 protocol)
David S. Millerb42597e2012-07-11 21:25:45 -07001148{
David S. Miller4895c772012-07-17 04:19:00 -07001149 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Millerb42597e2012-07-11 21:25:45 -07001150 struct flowi4 fl4;
1151 struct rtable *rt;
1152
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001153 __build_flow_key(net, &fl4, NULL, iph, oif,
Maciej Żenczykowski1042caa2018-09-25 20:56:27 -07001154 RT_TOS(iph->tos), protocol, 0, 0);
David S. Millerb42597e2012-07-11 21:25:45 -07001155 rt = __ip_route_output_key(net, &fl4);
1156 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001157 __ip_do_redirect(rt, skb, &fl4, false);
David S. Millerb42597e2012-07-11 21:25:45 -07001158 ip_rt_put(rt);
1159 }
1160}
1161EXPORT_SYMBOL_GPL(ipv4_redirect);
1162
1163void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
1164{
David S. Miller4895c772012-07-17 04:19:00 -07001165 const struct iphdr *iph = (const struct iphdr *) skb->data;
1166 struct flowi4 fl4;
1167 struct rtable *rt;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001168 struct net *net = sock_net(sk);
David S. Millerb42597e2012-07-11 21:25:45 -07001169
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001170 __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
1171 rt = __ip_route_output_key(net, &fl4);
David S. Miller4895c772012-07-17 04:19:00 -07001172 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001173 __ip_do_redirect(rt, skb, &fl4, false);
David S. Miller4895c772012-07-17 04:19:00 -07001174 ip_rt_put(rt);
1175 }
David S. Millerb42597e2012-07-11 21:25:45 -07001176}
1177EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1178
David S. Millerefbc368d2011-12-01 13:38:59 -05001179static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1180{
1181 struct rtable *rt = (struct rtable *) dst;
1182
David S. Millerceb33202012-07-17 11:31:28 -07001183 /* All IPV4 dsts are created with ->obsolete set to the value
1184 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1185 * into this function always.
1186 *
Timo Teräs387aa652013-05-27 20:46:31 +00001187 * When a PMTU/redirect information update invalidates a route,
1188 * this is indicated by setting obsolete to DST_OBSOLETE_KILL or
Julian Wiedmann02afc7a2019-03-20 20:02:56 +01001189 * DST_OBSOLETE_DEAD.
David S. Millerceb33202012-07-17 11:31:28 -07001190 */
Timo Teräs387aa652013-05-27 20:46:31 +00001191 if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
David S. Millerefbc368d2011-12-01 13:38:59 -05001192 return NULL;
Timo Teräsd11a4dc2010-03-18 23:20:20 +00001193 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194}
1195
Eric Dumazet20ff83f2019-04-24 08:04:05 -07001196static void ipv4_send_dest_unreach(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001197{
Stephen Suryaputraed0de452019-04-12 16:19:27 -04001198 struct ip_options opt;
Eric Dumazetc543cb42019-04-13 17:32:21 -07001199 int res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001200
Stephen Suryaputraed0de452019-04-12 16:19:27 -04001201 /* Recompile ip options since IPCB may not be valid anymore.
Eric Dumazet20ff83f2019-04-24 08:04:05 -07001202 * Also check we have a reasonable ipv4 header.
Stephen Suryaputraed0de452019-04-12 16:19:27 -04001203 */
Eric Dumazet20ff83f2019-04-24 08:04:05 -07001204 if (!pskb_network_may_pull(skb, sizeof(struct iphdr)) ||
1205 ip_hdr(skb)->version != 4 || ip_hdr(skb)->ihl < 5)
Stephen Suryaputraed0de452019-04-12 16:19:27 -04001206 return;
1207
Eric Dumazet20ff83f2019-04-24 08:04:05 -07001208 memset(&opt, 0, sizeof(opt));
1209 if (ip_hdr(skb)->ihl > 5) {
1210 if (!pskb_network_may_pull(skb, ip_hdr(skb)->ihl * 4))
1211 return;
1212 opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr);
1213
1214 rcu_read_lock();
1215 res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL);
1216 rcu_read_unlock();
1217
1218 if (res)
1219 return;
1220 }
Stephen Suryaputraed0de452019-04-12 16:19:27 -04001221 __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt);
Eric Dumazet20ff83f2019-04-24 08:04:05 -07001222}
1223
1224static void ipv4_link_failure(struct sk_buff *skb)
1225{
1226 struct rtable *rt;
1227
1228 ipv4_send_dest_unreach(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001229
Eric Dumazet511c3f92009-06-02 05:14:27 +00001230 rt = skb_rtable(skb);
David S. Miller59436342012-07-10 06:58:42 -07001231 if (rt)
1232 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233}
1234
Eric W. Biedermanede20592015-10-07 16:48:47 -05001235static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001236{
Joe Perches91df42b2012-05-15 14:11:54 +00001237 pr_debug("%s: %pI4 -> %pI4, %s\n",
1238 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1239 skb->dev ? skb->dev->name : "?");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240 kfree_skb(skb);
Dave Jonesc378a9c2011-05-21 07:16:42 +00001241 WARN_ON(1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001242 return 0;
1243}
1244
1245/*
1246 We do not cache source address of outgoing interface,
1247 because it is used only by IP RR, TS and SRR options,
1248 so that it out of fast path.
1249
1250 BTW remember: "addr" is allowed to be not aligned
1251 in IP options!
1252 */
1253
David S. Miller8e363602011-05-13 17:29:41 -04001254void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001255{
Al Viroa61ced52006-09-26 21:27:54 -07001256 __be32 src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257
David S. Millerc7537962010-11-11 17:07:48 -08001258 if (rt_is_output_route(rt))
David S. Millerc5be24f2011-05-13 18:01:21 -04001259 src = ip_hdr(skb)->saddr;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001260 else {
David S. Miller8e363602011-05-13 17:29:41 -04001261 struct fib_result res;
Maciej Żenczykowskie351bb62018-09-29 23:44:46 -07001262 struct iphdr *iph = ip_hdr(skb);
1263 struct flowi4 fl4 = {
1264 .daddr = iph->daddr,
1265 .saddr = iph->saddr,
1266 .flowi4_tos = RT_TOS(iph->tos),
1267 .flowi4_oif = rt->dst.dev->ifindex,
1268 .flowi4_iif = skb->dev->ifindex,
1269 .flowi4_mark = skb->mark,
1270 };
David S. Miller5e2b61f2011-03-04 21:47:09 -08001271
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001272 rcu_read_lock();
Andy Gospodarek0eeb0752015-06-23 13:45:37 -04001273 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0)
David Aherneba618a2019-04-02 14:11:55 -07001274 src = fib_result_prefsrc(dev_net(rt->dst.dev), &res);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001275 else
David S. Millerf8126f12012-07-13 05:03:45 -07001276 src = inet_select_addr(rt->dst.dev,
1277 rt_nexthop(rt, iph->daddr),
1278 RT_SCOPE_UNIVERSE);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001279 rcu_read_unlock();
1280 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001281 memcpy(addr, &src, 4);
1282}
1283
Patrick McHardyc7066f72011-01-14 13:36:42 +01001284#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001285static void set_class_tag(struct rtable *rt, u32 tag)
1286{
Changli Gaod8d1f302010-06-10 23:31:35 -07001287 if (!(rt->dst.tclassid & 0xFFFF))
1288 rt->dst.tclassid |= tag & 0xFFFF;
1289 if (!(rt->dst.tclassid & 0xFFFF0000))
1290 rt->dst.tclassid |= tag & 0xFFFF0000;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001291}
1292#endif
1293
David S. Miller0dbaee32010-12-13 12:52:14 -08001294static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1295{
Gao Feng7ed14d92017-04-12 12:34:03 +08001296 unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
Eric Dumazet164a5e72017-10-18 17:02:03 -07001297 unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
Gao Feng7ed14d92017-04-12 12:34:03 +08001298 ip_rt_min_advmss);
David S. Miller0dbaee32010-12-13 12:52:14 -08001299
Gao Feng7ed14d92017-04-12 12:34:03 +08001300 return min(advmss, IPV4_MAX_PMTU - header_size);
David S. Miller0dbaee32010-12-13 12:52:14 -08001301}
1302
Steffen Klassertebb762f2011-11-23 02:12:51 +00001303static unsigned int ipv4_mtu(const struct dst_entry *dst)
David S. Millerd33e4552010-12-14 13:01:14 -08001304{
Steffen Klassert261663b2011-11-23 02:14:50 +00001305 const struct rtable *rt = (const struct rtable *) dst;
David S. Miller59436342012-07-10 06:58:42 -07001306 unsigned int mtu = rt->rt_pmtu;
1307
Alexander Duyck98d75c32012-08-27 06:30:01 +00001308 if (!mtu || time_after_eq(jiffies, rt->dst.expires))
David S. Miller59436342012-07-10 06:58:42 -07001309 mtu = dst_metric_raw(dst, RTAX_MTU);
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001310
Steffen Klassert38d523e2013-01-16 20:55:01 +00001311 if (mtu)
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001312 return mtu;
1313
Eric Dumazetc780a042017-08-16 11:09:12 -07001314 mtu = READ_ONCE(dst->dev->mtu);
David S. Millerd33e4552010-12-14 13:01:14 -08001315
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001316 if (unlikely(ip_mtu_locked(dst))) {
David Ahern77d5bc72019-09-17 10:39:49 -07001317 if (rt->rt_uses_gateway && mtu > 576)
David S. Millerd33e4552010-12-14 13:01:14 -08001318 mtu = 576;
1319 }
1320
Roopa Prabhu14972cb2016-08-24 20:10:43 -07001321 mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
1322
1323 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
David S. Millerd33e4552010-12-14 13:01:14 -08001324}
1325
David Aherna5995e72019-04-30 07:45:50 -07001326static void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr)
Julian Anastasov94720e32018-05-02 09:41:19 +03001327{
1328 struct fnhe_hash_bucket *hash;
1329 struct fib_nh_exception *fnhe, __rcu **fnhe_p;
1330 u32 hval = fnhe_hashfun(daddr);
1331
1332 spin_lock_bh(&fnhe_lock);
1333
David Aherna5995e72019-04-30 07:45:50 -07001334 hash = rcu_dereference_protected(nhc->nhc_exceptions,
Julian Anastasov94720e32018-05-02 09:41:19 +03001335 lockdep_is_held(&fnhe_lock));
1336 hash += hval;
1337
1338 fnhe_p = &hash->chain;
1339 fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
1340 while (fnhe) {
1341 if (fnhe->fnhe_daddr == daddr) {
1342 rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
1343 fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
Xin Longee60ad22019-03-08 14:50:54 +08001344 /* set fnhe_daddr to 0 to ensure it won't bind with
1345 * new dsts in rt_bind_exception().
1346 */
1347 fnhe->fnhe_daddr = 0;
Julian Anastasov94720e32018-05-02 09:41:19 +03001348 fnhe_flush_routes(fnhe);
1349 kfree_rcu(fnhe, rcu);
1350 break;
1351 }
1352 fnhe_p = &fnhe->fnhe_next;
1353 fnhe = rcu_dereference_protected(fnhe->fnhe_next,
1354 lockdep_is_held(&fnhe_lock));
1355 }
1356
1357 spin_unlock_bh(&fnhe_lock);
1358}
1359
David Aherna5995e72019-04-30 07:45:50 -07001360static struct fib_nh_exception *find_exception(struct fib_nh_common *nhc,
1361 __be32 daddr)
David S. Miller4895c772012-07-17 04:19:00 -07001362{
David Aherna5995e72019-04-30 07:45:50 -07001363 struct fnhe_hash_bucket *hash = rcu_dereference(nhc->nhc_exceptions);
David S. Miller4895c772012-07-17 04:19:00 -07001364 struct fib_nh_exception *fnhe;
1365 u32 hval;
1366
David S. Millerf2bb4be2012-07-17 12:20:47 -07001367 if (!hash)
1368 return NULL;
1369
David S. Millerd3a25c92012-07-17 13:23:08 -07001370 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -07001371
1372 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1373 fnhe = rcu_dereference(fnhe->fnhe_next)) {
Julian Anastasov94720e32018-05-02 09:41:19 +03001374 if (fnhe->fnhe_daddr == daddr) {
1375 if (fnhe->fnhe_expires &&
1376 time_after(jiffies, fnhe->fnhe_expires)) {
David Aherna5995e72019-04-30 07:45:50 -07001377 ip_del_fnhe(nhc, daddr);
Julian Anastasov94720e32018-05-02 09:41:19 +03001378 break;
1379 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001380 return fnhe;
Julian Anastasov94720e32018-05-02 09:41:19 +03001381 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001382 }
1383 return NULL;
1384}
David S. Miller4895c772012-07-17 04:19:00 -07001385
David Ahern50d889b2018-05-21 09:08:13 -07001386/* MTU selection:
1387 * 1. mtu on route is locked - use it
1388 * 2. mtu from nexthop exception
1389 * 3. mtu from egress device
1390 */
1391
1392u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
1393{
David Aherneba618a2019-04-02 14:11:55 -07001394 struct fib_nh_common *nhc = res->nhc;
1395 struct net_device *dev = nhc->nhc_dev;
David Ahern50d889b2018-05-21 09:08:13 -07001396 struct fib_info *fi = res->fi;
David Ahern50d889b2018-05-21 09:08:13 -07001397 u32 mtu = 0;
1398
1399 if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu ||
1400 fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU))
1401 mtu = fi->fib_mtu;
1402
1403 if (likely(!mtu)) {
1404 struct fib_nh_exception *fnhe;
1405
David Aherna5995e72019-04-30 07:45:50 -07001406 fnhe = find_exception(nhc, daddr);
David Ahern50d889b2018-05-21 09:08:13 -07001407 if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires))
1408 mtu = fnhe->fnhe_pmtu;
1409 }
1410
1411 if (likely(!mtu))
1412 mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU);
1413
David Aherneba618a2019-04-02 14:11:55 -07001414 return mtu - lwtunnel_headroom(nhc->nhc_lwtstate, mtu);
David Ahern50d889b2018-05-21 09:08:13 -07001415}
1416
David S. Millercaacf052012-07-31 15:06:50 -07001417static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
Wei Wanga4c2fd72017-06-17 10:42:42 -07001418 __be32 daddr, const bool do_cache)
David S. Millerf2bb4be2012-07-17 12:20:47 -07001419{
David S. Millercaacf052012-07-31 15:06:50 -07001420 bool ret = false;
1421
David S. Millerc5038a82012-07-31 15:02:02 -07001422 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +00001423
David S. Millerc5038a82012-07-31 15:02:02 -07001424 if (daddr == fnhe->fnhe_daddr) {
Timo Teräs2ffae992013-06-27 10:27:05 +03001425 struct rtable __rcu **porig;
1426 struct rtable *orig;
Timo Teräs5aad1de2013-05-27 20:46:33 +00001427 int genid = fnhe_genid(dev_net(rt->dst.dev));
Timo Teräs2ffae992013-06-27 10:27:05 +03001428
1429 if (rt_is_input_route(rt))
1430 porig = &fnhe->fnhe_rth_input;
1431 else
1432 porig = &fnhe->fnhe_rth_output;
1433 orig = rcu_dereference(*porig);
Timo Teräs5aad1de2013-05-27 20:46:33 +00001434
1435 if (fnhe->fnhe_genid != genid) {
1436 fnhe->fnhe_genid = genid;
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001437 fnhe->fnhe_gw = 0;
1438 fnhe->fnhe_pmtu = 0;
1439 fnhe->fnhe_expires = 0;
Hangbin Liu0e8411e42018-05-09 18:06:44 +08001440 fnhe->fnhe_mtu_locked = false;
Timo Teräs2ffae992013-06-27 10:27:05 +03001441 fnhe_flush_routes(fnhe);
1442 orig = NULL;
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001443 }
Timo Teräs387aa652013-05-27 20:46:31 +00001444 fill_route_from_fnhe(rt, fnhe);
David Ahern1550c172019-04-05 16:30:27 -07001445 if (!rt->rt_gw4) {
1446 rt->rt_gw4 = daddr;
1447 rt->rt_gw_family = AF_INET;
1448 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001449
Wei Wanga4c2fd72017-06-17 10:42:42 -07001450 if (do_cache) {
Wei Wang08301062017-06-17 10:42:29 -07001451 dst_hold(&rt->dst);
Timo Teräs2ffae992013-06-27 10:27:05 +03001452 rcu_assign_pointer(*porig, rt);
Wei Wang08301062017-06-17 10:42:29 -07001453 if (orig) {
Wei Wang95c47f92017-06-17 10:42:30 -07001454 dst_dev_put(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001455 dst_release(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001456 }
Timo Teräs2ffae992013-06-27 10:27:05 +03001457 ret = true;
1458 }
David S. Millerc5038a82012-07-31 15:02:02 -07001459
1460 fnhe->fnhe_stamp = jiffies;
David S. Millerc5038a82012-07-31 15:02:02 -07001461 }
1462 spin_unlock_bh(&fnhe_lock);
David S. Millercaacf052012-07-31 15:06:50 -07001463
1464 return ret;
Eric Dumazet54764bb2012-07-31 01:08:23 +00001465}
1466
David Ahern87063a1f2019-04-30 07:45:49 -07001467static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt)
David S. Millerf2bb4be2012-07-17 12:20:47 -07001468{
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001469 struct rtable *orig, *prev, **p;
David S. Millercaacf052012-07-31 15:06:50 -07001470 bool ret = true;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001471
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001472 if (rt_is_input_route(rt)) {
David Ahern0f457a32019-04-30 07:45:48 -07001473 p = (struct rtable **)&nhc->nhc_rth_input;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001474 } else {
David Ahern0f457a32019-04-30 07:45:48 -07001475 p = (struct rtable **)raw_cpu_ptr(nhc->nhc_pcpu_rth_output);
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001476 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001477 orig = *p;
1478
Wei Wang08301062017-06-17 10:42:29 -07001479 /* hold dst before doing cmpxchg() to avoid race condition
1480 * on this dst
1481 */
1482 dst_hold(&rt->dst);
David S. Millerf2bb4be2012-07-17 12:20:47 -07001483 prev = cmpxchg(p, orig, rt);
1484 if (prev == orig) {
Wei Wang08301062017-06-17 10:42:29 -07001485 if (orig) {
Wei Wang95c47f92017-06-17 10:42:30 -07001486 dst_dev_put(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001487 dst_release(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001488 }
1489 } else {
1490 dst_release(&rt->dst);
David S. Millercaacf052012-07-31 15:06:50 -07001491 ret = false;
Wei Wang08301062017-06-17 10:42:29 -07001492 }
David S. Millercaacf052012-07-31 15:06:50 -07001493
1494 return ret;
1495}
1496
Eric Dumazet5055c372015-01-14 15:17:06 -08001497struct uncached_list {
1498 spinlock_t lock;
1499 struct list_head head;
1500};
1501
1502static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
David S. Millercaacf052012-07-31 15:06:50 -07001503
Xin Long510c3212018-02-14 19:06:02 +08001504void rt_add_uncached_list(struct rtable *rt)
David S. Millercaacf052012-07-31 15:06:50 -07001505{
Eric Dumazet5055c372015-01-14 15:17:06 -08001506 struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list);
1507
1508 rt->rt_uncached_list = ul;
1509
1510 spin_lock_bh(&ul->lock);
1511 list_add_tail(&rt->rt_uncached, &ul->head);
1512 spin_unlock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001513}
1514
Xin Long510c3212018-02-14 19:06:02 +08001515void rt_del_uncached_list(struct rtable *rt)
David S. Millercaacf052012-07-31 15:06:50 -07001516{
Eric Dumazet78df76a2012-08-24 05:40:47 +00001517 if (!list_empty(&rt->rt_uncached)) {
Eric Dumazet5055c372015-01-14 15:17:06 -08001518 struct uncached_list *ul = rt->rt_uncached_list;
1519
1520 spin_lock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001521 list_del(&rt->rt_uncached);
Eric Dumazet5055c372015-01-14 15:17:06 -08001522 spin_unlock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001523 }
1524}
1525
Xin Long510c3212018-02-14 19:06:02 +08001526static void ipv4_dst_destroy(struct dst_entry *dst)
1527{
Xin Long510c3212018-02-14 19:06:02 +08001528 struct rtable *rt = (struct rtable *)dst;
1529
David Ahern1620a332018-10-04 20:07:54 -07001530 ip_dst_metrics_put(dst);
Xin Long510c3212018-02-14 19:06:02 +08001531 rt_del_uncached_list(rt);
1532}
1533
David S. Millercaacf052012-07-31 15:06:50 -07001534void rt_flush_dev(struct net_device *dev)
1535{
Eric Dumazet5055c372015-01-14 15:17:06 -08001536 struct rtable *rt;
1537 int cpu;
David S. Millercaacf052012-07-31 15:06:50 -07001538
Eric Dumazet5055c372015-01-14 15:17:06 -08001539 for_each_possible_cpu(cpu) {
1540 struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
1541
1542 spin_lock_bh(&ul->lock);
1543 list_for_each_entry(rt, &ul->head, rt_uncached) {
David S. Millercaacf052012-07-31 15:06:50 -07001544 if (rt->dst.dev != dev)
1545 continue;
Mahesh Bandewar8d7017f2019-07-01 14:38:57 -07001546 rt->dst.dev = blackhole_netdev;
David S. Millercaacf052012-07-31 15:06:50 -07001547 dev_hold(rt->dst.dev);
1548 dev_put(dev);
1549 }
Eric Dumazet5055c372015-01-14 15:17:06 -08001550 spin_unlock_bh(&ul->lock);
David S. Miller4895c772012-07-17 04:19:00 -07001551 }
1552}
1553
Eric Dumazet4331deb2012-07-25 05:11:23 +00001554static bool rt_cache_valid(const struct rtable *rt)
David S. Millerd2d68ba92012-07-17 12:58:50 -07001555{
Eric Dumazet4331deb2012-07-25 05:11:23 +00001556 return rt &&
1557 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1558 !rt_is_expired(rt);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001559}
1560
David S. Millerf2bb4be2012-07-17 12:20:47 -07001561static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
David S. Miller5e2b61f2011-03-04 21:47:09 -08001562 const struct fib_result *res,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001563 struct fib_nh_exception *fnhe,
Wei Wanga4c2fd72017-06-17 10:42:42 -07001564 struct fib_info *fi, u16 type, u32 itag,
1565 const bool do_cache)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001566{
David S. Millercaacf052012-07-31 15:06:50 -07001567 bool cached = false;
1568
Linus Torvalds1da177e2005-04-16 15:20:36 -07001569 if (fi) {
David Aherneba618a2019-04-02 14:11:55 -07001570 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
David S. Miller4895c772012-07-17 04:19:00 -07001571
David Ahern0f5f7d72019-04-05 16:30:29 -07001572 if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) {
David Ahern77d5bc72019-09-17 10:39:49 -07001573 rt->rt_uses_gateway = 1;
David Ahern0f5f7d72019-04-05 16:30:29 -07001574 rt->rt_gw_family = nhc->nhc_gw_family;
1575 /* only INET and INET6 are supported */
1576 if (likely(nhc->nhc_gw_family == AF_INET))
1577 rt->rt_gw4 = nhc->nhc_gw.ipv4;
1578 else
1579 rt->rt_gw6 = nhc->nhc_gw.ipv6;
Julian Anastasov155e8332012-10-08 11:41:18 +00001580 }
David Ahern0f5f7d72019-04-05 16:30:29 -07001581
David Aherne1255ed2018-10-04 20:07:53 -07001582 ip_dst_init_metrics(&rt->dst, fi->fib_metrics);
1583
Patrick McHardyc7066f72011-01-14 13:36:42 +01001584#ifdef CONFIG_IP_ROUTE_CLASSID
David Aherndcb1ecb2019-06-03 20:19:50 -07001585 if (nhc->nhc_family == AF_INET) {
David Ahern87063a1f2019-04-30 07:45:49 -07001586 struct fib_nh *nh;
1587
1588 nh = container_of(nhc, struct fib_nh, nh_common);
1589 rt->dst.tclassid = nh->nh_tclassid;
1590 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001591#endif
David Ahern87063a1f2019-04-30 07:45:49 -07001592 rt->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);
David S. Millerc5038a82012-07-31 15:02:02 -07001593 if (unlikely(fnhe))
Wei Wanga4c2fd72017-06-17 10:42:42 -07001594 cached = rt_bind_exception(rt, fnhe, daddr, do_cache);
1595 else if (do_cache)
David Ahern87063a1f2019-04-30 07:45:49 -07001596 cached = rt_cache_route(nhc, rt);
Julian Anastasov155e8332012-10-08 11:41:18 +00001597 if (unlikely(!cached)) {
1598 /* Routes we intend to cache in nexthop exception or
1599 * FIB nexthop have the DST_NOCACHE bit clear.
1600 * However, if we are unsuccessful at storing this
1601 * route into the cache we really need to set it.
1602 */
David Ahern1550c172019-04-05 16:30:27 -07001603 if (!rt->rt_gw4) {
1604 rt->rt_gw_family = AF_INET;
1605 rt->rt_gw4 = daddr;
1606 }
Julian Anastasov155e8332012-10-08 11:41:18 +00001607 rt_add_uncached_list(rt);
1608 }
1609 } else
David S. Millercaacf052012-07-31 15:06:50 -07001610 rt_add_uncached_list(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001611
Patrick McHardyc7066f72011-01-14 13:36:42 +01001612#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001613#ifdef CONFIG_IP_MULTIPLE_TABLES
David S. Miller85b91b02012-07-13 08:21:29 -07001614 set_class_tag(rt, res->tclassid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001615#endif
1616 set_class_tag(rt, itag);
1617#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001618}
1619
David Ahern9ab179d2016-04-07 11:10:06 -07001620struct rtable *rt_dst_alloc(struct net_device *dev,
1621 unsigned int flags, u16 type,
1622 bool nopolicy, bool noxfrm, bool will_cache)
David S. Miller0c4dcd52011-02-17 15:42:37 -08001623{
David Ahernd08c4f32015-09-02 13:58:34 -07001624 struct rtable *rt;
1625
1626 rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
Wei Wanga4c2fd72017-06-17 10:42:42 -07001627 (will_cache ? 0 : DST_HOST) |
David Ahernd08c4f32015-09-02 13:58:34 -07001628 (nopolicy ? DST_NOPOLICY : 0) |
Wei Wangb2a9c0e2017-06-17 10:42:41 -07001629 (noxfrm ? DST_NOXFRM : 0));
David Ahernd08c4f32015-09-02 13:58:34 -07001630
1631 if (rt) {
1632 rt->rt_genid = rt_genid_ipv4(dev_net(dev));
1633 rt->rt_flags = flags;
1634 rt->rt_type = type;
1635 rt->rt_is_input = 0;
1636 rt->rt_iif = 0;
1637 rt->rt_pmtu = 0;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001638 rt->rt_mtu_locked = 0;
David Ahern77d5bc72019-09-17 10:39:49 -07001639 rt->rt_uses_gateway = 0;
David Ahern1550c172019-04-05 16:30:27 -07001640 rt->rt_gw_family = 0;
1641 rt->rt_gw4 = 0;
David Ahernd08c4f32015-09-02 13:58:34 -07001642 INIT_LIST_HEAD(&rt->rt_uncached);
1643
1644 rt->dst.output = ip_output;
1645 if (flags & RTCF_LOCAL)
1646 rt->dst.input = ip_local_deliver;
1647 }
1648
1649 return rt;
David S. Miller0c4dcd52011-02-17 15:42:37 -08001650}
David Ahern9ab179d2016-04-07 11:10:06 -07001651EXPORT_SYMBOL(rt_dst_alloc);
David S. Miller0c4dcd52011-02-17 15:42:37 -08001652
Stephen Suryaputra5b18f122019-06-26 02:21:16 -04001653struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt)
1654{
1655 struct rtable *new_rt;
1656
1657 new_rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
1658 rt->dst.flags);
1659
1660 if (new_rt) {
1661 new_rt->rt_genid = rt_genid_ipv4(dev_net(dev));
1662 new_rt->rt_flags = rt->rt_flags;
1663 new_rt->rt_type = rt->rt_type;
1664 new_rt->rt_is_input = rt->rt_is_input;
1665 new_rt->rt_iif = rt->rt_iif;
1666 new_rt->rt_pmtu = rt->rt_pmtu;
1667 new_rt->rt_mtu_locked = rt->rt_mtu_locked;
1668 new_rt->rt_gw_family = rt->rt_gw_family;
1669 if (rt->rt_gw_family == AF_INET)
1670 new_rt->rt_gw4 = rt->rt_gw4;
1671 else if (rt->rt_gw_family == AF_INET6)
1672 new_rt->rt_gw6 = rt->rt_gw6;
1673 INIT_LIST_HEAD(&new_rt->rt_uncached);
1674
1675 new_rt->dst.flags |= DST_HOST;
1676 new_rt->dst.input = rt->dst.input;
1677 new_rt->dst.output = rt->dst.output;
1678 new_rt->dst.error = rt->dst.error;
1679 new_rt->dst.lastuse = jiffies;
1680 new_rt->dst.lwtstate = lwtstate_get(rt->dst.lwtstate);
1681 }
1682 return new_rt;
1683}
1684EXPORT_SYMBOL(rt_dst_clone);
1685
Eric Dumazet96d36222010-06-02 19:21:31 +00001686/* called in rcu_read_lock() section */
Paolo Abenibc044e82017-09-28 15:51:37 +02001687int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1688 u8 tos, struct net_device *dev,
1689 struct in_device *in_dev, u32 *itag)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001690{
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001691 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001692
1693 /* Primary sanity checks. */
Ian Morris51456b22015-04-03 09:17:26 +01001694 if (!in_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695 return -EINVAL;
1696
Jan Engelhardt1e637c72008-01-21 03:18:08 -08001697 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
Thomas Grafd0daebc32012-06-12 00:44:01 +00001698 skb->protocol != htons(ETH_P_IP))
Paolo Abenibc044e82017-09-28 15:51:37 +02001699 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001700
Alexander Duyck75fea732015-09-28 11:10:38 -07001701 if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
Paolo Abenibc044e82017-09-28 15:51:37 +02001702 return -EINVAL;
Thomas Grafd0daebc32012-06-12 00:44:01 +00001703
Joe Perchesf97c1e02007-12-16 13:45:43 -08001704 if (ipv4_is_zeronet(saddr)) {
Edward Chron1d2f4eb2019-01-31 15:00:40 -08001705 if (!ipv4_is_local_multicast(daddr) &&
1706 ip_hdr(skb)->protocol != IPPROTO_IGMP)
Paolo Abenibc044e82017-09-28 15:51:37 +02001707 return -EINVAL;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001708 } else {
David S. Miller9e56e382012-06-28 18:54:02 -07001709 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
Paolo Abenibc044e82017-09-28 15:51:37 +02001710 in_dev, itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001711 if (err < 0)
Paolo Abenibc044e82017-09-28 15:51:37 +02001712 return err;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001713 }
Paolo Abenibc044e82017-09-28 15:51:37 +02001714 return 0;
1715}
1716
1717/* called in rcu_read_lock() section */
1718static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1719 u8 tos, struct net_device *dev, int our)
1720{
1721 struct in_device *in_dev = __in_dev_get_rcu(dev);
1722 unsigned int flags = RTCF_MULTICAST;
1723 struct rtable *rth;
1724 u32 itag = 0;
1725 int err;
1726
1727 err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag);
1728 if (err)
1729 return err;
1730
David Ahernd08c4f32015-09-02 13:58:34 -07001731 if (our)
1732 flags |= RTCF_LOCAL;
1733
1734 rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001735 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001736 if (!rth)
Paolo Abenibc044e82017-09-28 15:51:37 +02001737 return -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001738
Patrick McHardyc7066f72011-01-14 13:36:42 +01001739#ifdef CONFIG_IP_ROUTE_CLASSID
Changli Gaod8d1f302010-06-10 23:31:35 -07001740 rth->dst.tclassid = itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001741#endif
David S. Millercf911662011-04-28 14:31:47 -07001742 rth->dst.output = ip_rt_bug;
David S. Miller9917e1e82012-07-17 14:44:26 -07001743 rth->rt_is_input= 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001744
1745#ifdef CONFIG_IP_MROUTE
Joe Perchesf97c1e02007-12-16 13:45:43 -08001746 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
Changli Gaod8d1f302010-06-10 23:31:35 -07001747 rth->dst.input = ip_mr_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001748#endif
1749 RT_CACHE_STAT_INC(in_slow_mc);
1750
David S. Miller89aef892012-07-17 11:00:09 -07001751 skb_dst_set(skb, &rth->dst);
1752 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001753}
1754
1755
1756static void ip_handle_martian_source(struct net_device *dev,
1757 struct in_device *in_dev,
1758 struct sk_buff *skb,
Al Viro9e12bb22006-09-26 21:25:20 -07001759 __be32 daddr,
1760 __be32 saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001761{
1762 RT_CACHE_STAT_INC(in_martian_src);
1763#ifdef CONFIG_IP_ROUTE_VERBOSE
1764 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1765 /*
1766 * RFC1812 recommendation, if source is martian,
1767 * the only hint is MAC header.
1768 */
Joe Perches058bd4d2012-03-11 18:36:11 +00001769 pr_warn("martian source %pI4 from %pI4, on dev %s\n",
Harvey Harrison673d57e2008-10-31 00:53:57 -07001770 &daddr, &saddr, dev->name);
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001771 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
Joe Perches058bd4d2012-03-11 18:36:11 +00001772 print_hex_dump(KERN_WARNING, "ll header: ",
1773 DUMP_PREFIX_OFFSET, 16, 1,
1774 skb_mac_header(skb),
David S. Millerb2c85102018-11-20 10:15:36 -08001775 dev->hard_header_len, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001776 }
1777 }
1778#endif
1779}
1780
Eric Dumazet47360222010-06-03 04:13:21 +00001781/* called in rcu_read_lock() section */
Stephen Hemminger5969f712008-04-10 01:52:09 -07001782static int __mkroute_input(struct sk_buff *skb,
David S. Miller982721f2011-02-16 21:44:24 -08001783 const struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001784 struct in_device *in_dev,
David S. Millerc6cffba2012-07-26 11:14:38 +00001785 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001786{
David Aherneba618a2019-04-02 14:11:55 -07001787 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
1788 struct net_device *dev = nhc->nhc_dev;
Timo Teräs2ffae992013-06-27 10:27:05 +03001789 struct fib_nh_exception *fnhe;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001790 struct rtable *rth;
1791 int err;
1792 struct in_device *out_dev;
David S. Millerd2d68ba92012-07-17 12:58:50 -07001793 bool do_cache;
Li RongQingfbdc0ad2014-05-22 16:36:55 +08001794 u32 itag = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001795
1796 /* get a working reference to the output device */
David Aherneba618a2019-04-02 14:11:55 -07001797 out_dev = __in_dev_get_rcu(dev);
Ian Morris51456b22015-04-03 09:17:26 +01001798 if (!out_dev) {
Joe Perchese87cc472012-05-13 21:56:26 +00001799 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001800 return -EINVAL;
1801 }
1802
Michael Smith5c04c812011-04-07 04:51:50 +00001803 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
David S. Miller9e56e382012-06-28 18:54:02 -07001804 in_dev->dev, in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001805 if (err < 0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001806 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001807 saddr);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001808
Linus Torvalds1da177e2005-04-16 15:20:36 -07001809 goto cleanup;
1810 }
1811
Julian Anastasove81da0e2012-10-08 11:41:15 +00001812 do_cache = res->fi && !itag;
1813 if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
David Aherneba618a2019-04-02 14:11:55 -07001814 skb->protocol == htons(ETH_P_IP)) {
David Ahernbdf00462019-04-05 16:30:26 -07001815 __be32 gw;
David Aherneba618a2019-04-02 14:11:55 -07001816
David Ahernbdf00462019-04-05 16:30:26 -07001817 gw = nhc->nhc_gw_family == AF_INET ? nhc->nhc_gw.ipv4 : 0;
David Aherneba618a2019-04-02 14:11:55 -07001818 if (IN_DEV_SHARED_MEDIA(out_dev) ||
1819 inet_addr_onlink(out_dev, saddr, gw))
1820 IPCB(skb)->flags |= IPSKB_DOREDIRECT;
1821 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001822
1823 if (skb->protocol != htons(ETH_P_IP)) {
1824 /* Not IP (i.e. ARP). Do not create route, if it is
1825 * invalid for proxy arp. DNAT routes are always valid.
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001826 *
1827 * Proxy arp feature have been extended to allow, ARP
1828 * replies back to the same interface, to support
1829 * Private VLAN switch technologies. See arp.c.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001830 */
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001831 if (out_dev == in_dev &&
1832 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001833 err = -EINVAL;
1834 goto cleanup;
1835 }
1836 }
1837
David Aherna5995e72019-04-30 07:45:50 -07001838 fnhe = find_exception(nhc, daddr);
Julian Anastasove81da0e2012-10-08 11:41:15 +00001839 if (do_cache) {
Julian Anastasov94720e32018-05-02 09:41:19 +03001840 if (fnhe)
Timo Teräs2ffae992013-06-27 10:27:05 +03001841 rth = rcu_dereference(fnhe->fnhe_rth_input);
Julian Anastasov94720e32018-05-02 09:41:19 +03001842 else
David Ahern0f457a32019-04-30 07:45:48 -07001843 rth = rcu_dereference(nhc->nhc_rth_input);
Julian Anastasove81da0e2012-10-08 11:41:15 +00001844 if (rt_cache_valid(rth)) {
1845 skb_dst_set_noref(skb, &rth->dst);
1846 goto out;
David S. Millerd2d68ba92012-07-17 12:58:50 -07001847 }
1848 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001849
David Ahernd08c4f32015-09-02 13:58:34 -07001850 rth = rt_dst_alloc(out_dev->dev, 0, res->type,
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001851 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerd2d68ba92012-07-17 12:58:50 -07001852 IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001853 if (!rth) {
1854 err = -ENOBUFS;
1855 goto cleanup;
1856 }
1857
David S. Miller9917e1e82012-07-17 14:44:26 -07001858 rth->rt_is_input = 1;
Duan Jionga6254862014-02-17 15:23:43 +08001859 RT_CACHE_STAT_INC(in_slow_tot);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001860
Changli Gaod8d1f302010-06-10 23:31:35 -07001861 rth->dst.input = ip_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001862
Wei Wanga4c2fd72017-06-17 10:42:42 -07001863 rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag,
1864 do_cache);
David Ahern99428952018-02-13 20:32:04 -08001865 lwtunnel_set_redirect(&rth->dst);
David S. Millerc6cffba2012-07-26 11:14:38 +00001866 skb_dst_set(skb, &rth->dst);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001867out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001868 err = 0;
1869 cleanup:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001870 return err;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001871}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001872
Peter Nørlund79a13152015-09-30 10:12:22 +02001873#ifdef CONFIG_IP_ROUTE_MULTIPATH
Peter Nørlund79a13152015-09-30 10:12:22 +02001874/* To make ICMP packets follow the right flow, the multipath hash is
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001875 * calculated from the inner IP addresses.
Peter Nørlund79a13152015-09-30 10:12:22 +02001876 */
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001877static void ip_multipath_l3_keys(const struct sk_buff *skb,
1878 struct flow_keys *hash_keys)
Peter Nørlund79a13152015-09-30 10:12:22 +02001879{
1880 const struct iphdr *outer_iph = ip_hdr(skb);
David Ahern6f74b6c2018-03-02 08:32:13 -08001881 const struct iphdr *key_iph = outer_iph;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001882 const struct iphdr *inner_iph;
Peter Nørlund79a13152015-09-30 10:12:22 +02001883 const struct icmphdr *icmph;
1884 struct iphdr _inner_iph;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001885 struct icmphdr _icmph;
1886
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001887 if (likely(outer_iph->protocol != IPPROTO_ICMP))
David Ahern6f74b6c2018-03-02 08:32:13 -08001888 goto out;
Peter Nørlund79a13152015-09-30 10:12:22 +02001889
1890 if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
David Ahern6f74b6c2018-03-02 08:32:13 -08001891 goto out;
Peter Nørlund79a13152015-09-30 10:12:22 +02001892
1893 icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
1894 &_icmph);
1895 if (!icmph)
David Ahern6f74b6c2018-03-02 08:32:13 -08001896 goto out;
Peter Nørlund79a13152015-09-30 10:12:22 +02001897
1898 if (icmph->type != ICMP_DEST_UNREACH &&
1899 icmph->type != ICMP_REDIRECT &&
1900 icmph->type != ICMP_TIME_EXCEEDED &&
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001901 icmph->type != ICMP_PARAMETERPROB)
David Ahern6f74b6c2018-03-02 08:32:13 -08001902 goto out;
Peter Nørlund79a13152015-09-30 10:12:22 +02001903
1904 inner_iph = skb_header_pointer(skb,
1905 outer_iph->ihl * 4 + sizeof(_icmph),
1906 sizeof(_inner_iph), &_inner_iph);
1907 if (!inner_iph)
David Ahern6f74b6c2018-03-02 08:32:13 -08001908 goto out;
1909
1910 key_iph = inner_iph;
1911out:
1912 hash_keys->addrs.v4addrs.src = key_iph->saddr;
1913 hash_keys->addrs.v4addrs.dst = key_iph->daddr;
Peter Nørlund79a13152015-09-30 10:12:22 +02001914}
1915
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001916/* if skb is set it will be used and fl4 can be NULL */
David Ahern7efc0b62018-03-02 08:32:12 -08001917int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001918 const struct sk_buff *skb, struct flow_keys *flkeys)
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001919{
Ido Schimmel2a8e4992019-03-01 13:38:43 +00001920 u32 multipath_hash = fl4 ? fl4->flowi4_multipath_hash : 0;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001921 struct flow_keys hash_keys;
1922 u32 mhash;
1923
1924 switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
1925 case 0:
1926 memset(&hash_keys, 0, sizeof(hash_keys));
1927 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1928 if (skb) {
1929 ip_multipath_l3_keys(skb, &hash_keys);
1930 } else {
1931 hash_keys.addrs.v4addrs.src = fl4->saddr;
1932 hash_keys.addrs.v4addrs.dst = fl4->daddr;
1933 }
1934 break;
1935 case 1:
1936 /* skb is currently provided only when forwarding */
1937 if (skb) {
1938 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
1939 struct flow_keys keys;
1940
1941 /* short-circuit if we already have L4 hash present */
1942 if (skb->l4_hash)
1943 return skb_get_hash_raw(skb) >> 1;
David Ahernec7127a2018-03-02 08:32:14 -08001944
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001945 memset(&hash_keys, 0, sizeof(hash_keys));
David Ahern1fe4b112018-02-21 11:00:54 -08001946
David Ahernec7127a2018-03-02 08:32:14 -08001947 if (!flkeys) {
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001948 skb_flow_dissect_flow_keys(skb, &keys, flag);
David Ahernec7127a2018-03-02 08:32:14 -08001949 flkeys = &keys;
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001950 }
David Ahernec7127a2018-03-02 08:32:14 -08001951
1952 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1953 hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
1954 hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
1955 hash_keys.ports.src = flkeys->ports.src;
1956 hash_keys.ports.dst = flkeys->ports.dst;
1957 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001958 } else {
1959 memset(&hash_keys, 0, sizeof(hash_keys));
1960 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1961 hash_keys.addrs.v4addrs.src = fl4->saddr;
1962 hash_keys.addrs.v4addrs.dst = fl4->daddr;
1963 hash_keys.ports.src = fl4->fl4_sport;
1964 hash_keys.ports.dst = fl4->fl4_dport;
1965 hash_keys.basic.ip_proto = fl4->flowi4_proto;
1966 }
1967 break;
Stephen Suryaputra363887a2019-06-13 14:38:58 -04001968 case 2:
1969 memset(&hash_keys, 0, sizeof(hash_keys));
Stephen Suryaputra363887a2019-06-13 14:38:58 -04001970 /* skb is currently provided only when forwarding */
1971 if (skb) {
1972 struct flow_keys keys;
1973
1974 skb_flow_dissect_flow_keys(skb, &keys, 0);
Stephen Suryaputra828b2b42019-07-06 10:55:17 -04001975 /* Inner can be v4 or v6 */
1976 if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
1977 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1978 hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
1979 hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
1980 } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
1981 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1982 hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
1983 hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
1984 hash_keys.tags.flow_label = keys.tags.flow_label;
1985 hash_keys.basic.ip_proto = keys.basic.ip_proto;
1986 } else {
1987 /* Same as case 0 */
1988 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1989 ip_multipath_l3_keys(skb, &hash_keys);
1990 }
Stephen Suryaputra363887a2019-06-13 14:38:58 -04001991 } else {
1992 /* Same as case 0 */
Stephen Suryaputra828b2b42019-07-06 10:55:17 -04001993 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
Stephen Suryaputra363887a2019-06-13 14:38:58 -04001994 hash_keys.addrs.v4addrs.src = fl4->saddr;
1995 hash_keys.addrs.v4addrs.dst = fl4->daddr;
1996 }
1997 break;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001998 }
1999 mhash = flow_hash_from_keys(&hash_keys);
2000
wenxu24ba1442019-02-24 11:36:20 +08002001 if (multipath_hash)
2002 mhash = jhash_2words(mhash, multipath_hash, 0);
2003
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02002004 return mhash >> 1;
2005}
Peter Nørlund79a13152015-09-30 10:12:22 +02002006#endif /* CONFIG_IP_ROUTE_MULTIPATH */
2007
Stephen Hemminger5969f712008-04-10 01:52:09 -07002008static int ip_mkroute_input(struct sk_buff *skb,
2009 struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07002010 struct in_device *in_dev,
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002011 __be32 daddr, __be32 saddr, u32 tos,
2012 struct flow_keys *hkeys)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002013{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002014#ifdef CONFIG_IP_ROUTE_MULTIPATH
David Ahern5481d732019-06-03 20:19:49 -07002015 if (res->fi && fib_info_num_path(res->fi) > 1) {
David Ahern7efc0b62018-03-02 08:32:12 -08002016 int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
Peter Nørlund0e884c72015-09-30 10:12:21 +02002017
Peter Nørlund0e884c72015-09-30 10:12:21 +02002018 fib_select_multipath(res, h);
2019 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002020#endif
2021
2022 /* create a routing cache entry */
David S. Millerc6cffba2012-07-26 11:14:38 +00002023 return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002024}
2025
Linus Torvalds1da177e2005-04-16 15:20:36 -07002026/*
2027 * NOTE. We drop all the packets that has local source
2028 * addresses, because every properly looped back packet
2029 * must have correct destination already attached by output routine.
2030 *
2031 * Such approach solves two big problems:
2032 * 1. Not simplex devices are handled properly.
2033 * 2. IP spoofing attempts are filtered with 100% of guarantee.
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002034 * called with rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -07002035 */
2036
Al Viro9e12bb22006-09-26 21:25:20 -07002037static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
David Ahern5510cdf2017-05-25 10:42:34 -07002038 u8 tos, struct net_device *dev,
2039 struct fib_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002040{
Eric Dumazet96d36222010-06-02 19:21:31 +00002041 struct in_device *in_dev = __in_dev_get_rcu(dev);
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002042 struct flow_keys *flkeys = NULL, _flkeys;
2043 struct net *net = dev_net(dev);
Thomas Graf1b7179d2015-07-21 10:43:59 +02002044 struct ip_tunnel_info *tun_info;
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002045 int err = -EINVAL;
Eric Dumazet95c96172012-04-15 05:58:06 +00002046 unsigned int flags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002047 u32 itag = 0;
Eric Dumazet95c96172012-04-15 05:58:06 +00002048 struct rtable *rth;
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002049 struct flowi4 fl4;
Xin Long0a904782019-06-02 19:10:24 +08002050 bool do_cache = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002051
2052 /* IP on this device is disabled. */
2053
2054 if (!in_dev)
2055 goto out;
2056
2057 /* Check for the most weird martians, which can be not detected
2058 by fib_lookup.
2059 */
2060
Jiri Benc61adedf2015-08-20 13:56:25 +02002061 tun_info = skb_tunnel_info(skb);
Jiri Benc46fa0622015-08-28 20:48:19 +02002062 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
Thomas Graf1b7179d2015-07-21 10:43:59 +02002063 fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id;
2064 else
2065 fl4.flowi4_tun_key.tun_id = 0;
Thomas Graff38a9eb2015-07-21 10:43:56 +02002066 skb_dst_drop(skb);
2067
Thomas Grafd0daebc32012-06-12 00:44:01 +00002068 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002069 goto martian_source;
2070
David Ahern5510cdf2017-05-25 10:42:34 -07002071 res->fi = NULL;
2072 res->table = NULL;
Andy Walls27a954b2010-10-17 15:11:22 +00002073 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002074 goto brd_input;
2075
2076 /* Accept zero addresses only to limited broadcast;
2077 * I even do not know to fix it or not. Waiting for complains :-)
2078 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08002079 if (ipv4_is_zeronet(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002080 goto martian_source;
2081
Thomas Grafd0daebc32012-06-12 00:44:01 +00002082 if (ipv4_is_zeronet(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002083 goto martian_destination;
2084
Eric Dumazet9eb43e72012-08-03 21:27:25 +00002085 /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
2086 * and call it once if daddr or/and saddr are loopback addresses
2087 */
2088 if (ipv4_is_loopback(daddr)) {
2089 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00002090 goto martian_destination;
Eric Dumazet9eb43e72012-08-03 21:27:25 +00002091 } else if (ipv4_is_loopback(saddr)) {
2092 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00002093 goto martian_source;
2094 }
2095
Linus Torvalds1da177e2005-04-16 15:20:36 -07002096 /*
2097 * Now we are ready to route packet.
2098 */
David S. Miller68a5e3d2011-03-11 20:07:33 -05002099 fl4.flowi4_oif = 0;
David Aherne0d56fd2016-09-10 12:09:57 -07002100 fl4.flowi4_iif = dev->ifindex;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002101 fl4.flowi4_mark = skb->mark;
2102 fl4.flowi4_tos = tos;
2103 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
David Ahernb84f7872015-09-29 19:07:07 -07002104 fl4.flowi4_flags = 0;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002105 fl4.daddr = daddr;
2106 fl4.saddr = saddr;
Julian Anastasov8bcfd092017-02-26 15:50:52 +02002107 fl4.flowi4_uid = sock_net_uid(net, NULL);
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002108
David Ahern5a847a62018-05-16 13:36:40 -07002109 if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) {
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002110 flkeys = &_flkeys;
David Ahern5a847a62018-05-16 13:36:40 -07002111 } else {
2112 fl4.flowi4_proto = 0;
2113 fl4.fl4_sport = 0;
2114 fl4.fl4_dport = 0;
2115 }
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002116
David Ahern5510cdf2017-05-25 10:42:34 -07002117 err = fib_lookup(net, &fl4, res, 0);
Duan Jiongcd0f0b92014-02-14 18:26:22 +08002118 if (err != 0) {
2119 if (!IN_DEV_FORWARD(in_dev))
2120 err = -EHOSTUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002121 goto no_route;
Duan Jiongcd0f0b92014-02-14 18:26:22 +08002122 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002123
Xin Long5cbf7772018-07-27 16:37:28 +08002124 if (res->type == RTN_BROADCAST) {
2125 if (IN_DEV_BFORWARD(in_dev))
2126 goto make_route;
Xin Long0a904782019-06-02 19:10:24 +08002127 /* not do cache if bc_forwarding is enabled */
2128 if (IPV4_DEVCONF_ALL(net, BC_FORWARDING))
2129 do_cache = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002130 goto brd_input;
Xin Long5cbf7772018-07-27 16:37:28 +08002131 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002132
David Ahern5510cdf2017-05-25 10:42:34 -07002133 if (res->type == RTN_LOCAL) {
Michael Smith5c04c812011-04-07 04:51:50 +00002134 err = fib_validate_source(skb, saddr, daddr, tos,
Cong Wang0d5edc62014-04-15 16:25:35 -07002135 0, dev, in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00002136 if (err < 0)
David Ahern0d753962015-09-28 11:10:44 -07002137 goto martian_source;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002138 goto local_input;
2139 }
2140
Duan Jiongcd0f0b92014-02-14 18:26:22 +08002141 if (!IN_DEV_FORWARD(in_dev)) {
2142 err = -EHOSTUNREACH;
David S. Miller251da412012-06-26 16:27:09 -07002143 goto no_route;
Duan Jiongcd0f0b92014-02-14 18:26:22 +08002144 }
David Ahern5510cdf2017-05-25 10:42:34 -07002145 if (res->type != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002146 goto martian_destination;
2147
Xin Long5cbf7772018-07-27 16:37:28 +08002148make_route:
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002149 err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002150out: return err;
2151
2152brd_input:
2153 if (skb->protocol != htons(ETH_P_IP))
2154 goto e_inval;
2155
David S. Miller41347dc2012-06-28 04:05:27 -07002156 if (!ipv4_is_zeronet(saddr)) {
David S. Miller9e56e382012-06-28 18:54:02 -07002157 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
2158 in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002159 if (err < 0)
David Ahern0d753962015-09-28 11:10:44 -07002160 goto martian_source;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002161 }
2162 flags |= RTCF_BROADCAST;
David Ahern5510cdf2017-05-25 10:42:34 -07002163 res->type = RTN_BROADCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002164 RT_CACHE_STAT_INC(in_brd);
2165
2166local_input:
Xin Long0a904782019-06-02 19:10:24 +08002167 do_cache &= res->fi && !itag;
2168 if (do_cache) {
2169 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
David Aherneba618a2019-04-02 14:11:55 -07002170
Xin Long0a904782019-06-02 19:10:24 +08002171 rth = rcu_dereference(nhc->nhc_rth_input);
2172 if (rt_cache_valid(rth)) {
2173 skb_dst_set_noref(skb, &rth->dst);
2174 err = 0;
2175 goto out;
David S. Millerd2d68ba92012-07-17 12:58:50 -07002176 }
2177 }
2178
David Ahernf5a0aab2016-12-29 15:29:03 -08002179 rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
David Ahern5510cdf2017-05-25 10:42:34 -07002180 flags | RTCF_LOCAL, res->type,
David S. Millerd2d68ba92012-07-17 12:58:50 -07002181 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002182 if (!rth)
2183 goto e_nobufs;
2184
Changli Gaod8d1f302010-06-10 23:31:35 -07002185 rth->dst.output= ip_rt_bug;
David S. Millercf911662011-04-28 14:31:47 -07002186#ifdef CONFIG_IP_ROUTE_CLASSID
2187 rth->dst.tclassid = itag;
2188#endif
David S. Miller9917e1e82012-07-17 14:44:26 -07002189 rth->rt_is_input = 1;
Roopa Prabhu571e7222015-07-21 10:43:47 +02002190
Duan Jionga6254862014-02-17 15:23:43 +08002191 RT_CACHE_STAT_INC(in_slow_tot);
David Ahern5510cdf2017-05-25 10:42:34 -07002192 if (res->type == RTN_UNREACHABLE) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002193 rth->dst.input= ip_error;
2194 rth->dst.error= -err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002195 rth->rt_flags &= ~RTCF_LOCAL;
2196 }
Thomas Grafefd85702016-11-30 17:10:09 +01002197
Alexei Starovoitovdcdfdf52013-11-19 19:12:34 -08002198 if (do_cache) {
David Aherneba618a2019-04-02 14:11:55 -07002199 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
Thomas Grafefd85702016-11-30 17:10:09 +01002200
David Aherneba618a2019-04-02 14:11:55 -07002201 rth->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);
Thomas Grafefd85702016-11-30 17:10:09 +01002202 if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
2203 WARN_ON(rth->dst.input == lwtunnel_input);
2204 rth->dst.lwtstate->orig_input = rth->dst.input;
2205 rth->dst.input = lwtunnel_input;
2206 }
2207
David Ahern87063a1f2019-04-30 07:45:49 -07002208 if (unlikely(!rt_cache_route(nhc, rth)))
Alexei Starovoitovdcdfdf52013-11-19 19:12:34 -08002209 rt_add_uncached_list(rth);
Alexei Starovoitovdcdfdf52013-11-19 19:12:34 -08002210 }
David S. Miller89aef892012-07-17 11:00:09 -07002211 skb_dst_set(skb, &rth->dst);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002212 err = 0;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002213 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002214
2215no_route:
2216 RT_CACHE_STAT_INC(in_no_route);
David Ahern5510cdf2017-05-25 10:42:34 -07002217 res->type = RTN_UNREACHABLE;
2218 res->fi = NULL;
2219 res->table = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002220 goto local_input;
2221
2222 /*
2223 * Do not cache martian addresses: they should be logged (RFC1812)
2224 */
2225martian_destination:
2226 RT_CACHE_STAT_INC(in_martian_dst);
2227#ifdef CONFIG_IP_ROUTE_VERBOSE
Joe Perchese87cc472012-05-13 21:56:26 +00002228 if (IN_DEV_LOG_MARTIANS(in_dev))
2229 net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
2230 &daddr, &saddr, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002231#endif
Dietmar Eggemann2c2910a2005-06-28 13:06:23 -07002232
Linus Torvalds1da177e2005-04-16 15:20:36 -07002233e_inval:
2234 err = -EINVAL;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002235 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002236
2237e_nobufs:
2238 err = -ENOBUFS;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002239 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002240
2241martian_source:
2242 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002243 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002244}
2245
David S. Millerc6cffba2012-07-26 11:14:38 +00002246int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2247 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002248{
David Ahern5510cdf2017-05-25 10:42:34 -07002249 struct fib_result res;
2250 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002251
Julian Anastasov6e280992017-02-26 17:14:35 +02002252 tos &= IPTOS_RT_MASK;
Eric Dumazet96d36222010-06-02 19:21:31 +00002253 rcu_read_lock();
David Ahern5510cdf2017-05-25 10:42:34 -07002254 err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res);
2255 rcu_read_unlock();
Eric Dumazet96d36222010-06-02 19:21:31 +00002256
David Ahern5510cdf2017-05-25 10:42:34 -07002257 return err;
2258}
2259EXPORT_SYMBOL(ip_route_input_noref);
2260
2261/* called with rcu_read_lock held */
2262int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2263 u8 tos, struct net_device *dev, struct fib_result *res)
2264{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002265 /* Multicast recognition logic is moved from route cache to here.
2266 The problem was that too many Ethernet cards have broken/missing
2267 hardware multicast filters :-( As result the host on multicasting
2268 network acquires a lot of useless route cache entries, sort of
2269 SDR messages from all the world. Now we try to get rid of them.
2270 Really, provided software IP multicast filter is organized
2271 reasonably (at least, hashed), it does not result in a slowdown
2272 comparing with route cache reject entries.
2273 Note, that multicast routers are not affected, because
2274 route cache entry is created eventually.
2275 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08002276 if (ipv4_is_multicast(daddr)) {
Eric Dumazet96d36222010-06-02 19:21:31 +00002277 struct in_device *in_dev = __in_dev_get_rcu(dev);
David Aherne58e4152016-10-31 15:54:00 -07002278 int our = 0;
David Ahern5510cdf2017-05-25 10:42:34 -07002279 int err = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002280
Paolo Abeni22c74762019-03-06 10:42:53 +01002281 if (!in_dev)
2282 return err;
2283 our = ip_check_mc_rcu(in_dev, daddr, saddr,
2284 ip_hdr(skb)->protocol);
David Aherne58e4152016-10-31 15:54:00 -07002285
2286 /* check l3 master if no match yet */
Paolo Abeni22c74762019-03-06 10:42:53 +01002287 if (!our && netif_is_l3_slave(dev)) {
David Aherne58e4152016-10-31 15:54:00 -07002288 struct in_device *l3_in_dev;
2289
2290 l3_in_dev = __in_dev_get_rcu(skb->dev);
2291 if (l3_in_dev)
2292 our = ip_check_mc_rcu(l3_in_dev, daddr, saddr,
2293 ip_hdr(skb)->protocol);
2294 }
2295
David Aherne58e4152016-10-31 15:54:00 -07002296 if (our
Linus Torvalds1da177e2005-04-16 15:20:36 -07002297#ifdef CONFIG_IP_MROUTE
David Aherne58e4152016-10-31 15:54:00 -07002298 ||
2299 (!ipv4_is_local_multicast(daddr) &&
2300 IN_DEV_MFORWARD(in_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002301#endif
David Aherne58e4152016-10-31 15:54:00 -07002302 ) {
David Ahern5510cdf2017-05-25 10:42:34 -07002303 err = ip_route_input_mc(skb, daddr, saddr,
David Aherne58e4152016-10-31 15:54:00 -07002304 tos, dev, our);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002305 }
David Ahern5510cdf2017-05-25 10:42:34 -07002306 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002307 }
David Ahern5510cdf2017-05-25 10:42:34 -07002308
2309 return ip_route_input_slow(skb, daddr, saddr, tos, dev, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002310}
2311
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002312/* called with rcu_read_lock() */
David S. Miller982721f2011-02-16 21:44:24 -08002313static struct rtable *__mkroute_output(const struct fib_result *res,
David Miller1a00fee2012-07-01 02:02:56 +00002314 const struct flowi4 *fl4, int orig_oif,
Julian Anastasovf61759e2011-12-02 11:39:42 +00002315 struct net_device *dev_out,
David S. Miller5ada5522011-02-17 15:29:00 -08002316 unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002317{
David S. Miller982721f2011-02-16 21:44:24 -08002318 struct fib_info *fi = res->fi;
David S. Millerf2bb4be2012-07-17 12:20:47 -07002319 struct fib_nh_exception *fnhe;
David S. Miller5ada5522011-02-17 15:29:00 -08002320 struct in_device *in_dev;
David S. Miller982721f2011-02-16 21:44:24 -08002321 u16 type = res->type;
David S. Miller5ada5522011-02-17 15:29:00 -08002322 struct rtable *rth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00002323 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002324
Thomas Grafd0daebc32012-06-12 00:44:01 +00002325 in_dev = __in_dev_get_rcu(dev_out);
2326 if (!in_dev)
David S. Miller5ada5522011-02-17 15:29:00 -08002327 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002328
Thomas Grafd0daebc32012-06-12 00:44:01 +00002329 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
David Ahern5f02ce242016-09-10 12:09:54 -07002330 if (ipv4_is_loopback(fl4->saddr) &&
2331 !(dev_out->flags & IFF_LOOPBACK) &&
2332 !netif_is_l3_master(dev_out))
Thomas Grafd0daebc32012-06-12 00:44:01 +00002333 return ERR_PTR(-EINVAL);
2334
David S. Miller68a5e3d2011-03-11 20:07:33 -05002335 if (ipv4_is_lbcast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08002336 type = RTN_BROADCAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002337 else if (ipv4_is_multicast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08002338 type = RTN_MULTICAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002339 else if (ipv4_is_zeronet(fl4->daddr))
David S. Miller5ada5522011-02-17 15:29:00 -08002340 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002341
2342 if (dev_out->flags & IFF_LOOPBACK)
2343 flags |= RTCF_LOCAL;
2344
Julian Anastasov63617422012-11-22 23:04:14 +02002345 do_cache = true;
David S. Miller982721f2011-02-16 21:44:24 -08002346 if (type == RTN_BROADCAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002347 flags |= RTCF_BROADCAST | RTCF_LOCAL;
David S. Miller982721f2011-02-16 21:44:24 -08002348 fi = NULL;
2349 } else if (type == RTN_MULTICAST) {
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00002350 flags |= RTCF_MULTICAST | RTCF_LOCAL;
David S. Miller813b3b52011-04-28 14:48:42 -07002351 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
2352 fl4->flowi4_proto))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002353 flags &= ~RTCF_LOCAL;
Julian Anastasov63617422012-11-22 23:04:14 +02002354 else
2355 do_cache = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002356 /* If multicast route do not exist use
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00002357 * default one, but do not gateway in this case.
2358 * Yes, it is hack.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002359 */
David S. Miller982721f2011-02-16 21:44:24 -08002360 if (fi && res->prefixlen < 4)
2361 fi = NULL;
Chris Friesend6d5e992016-04-08 15:21:30 -06002362 } else if ((type == RTN_LOCAL) && (orig_oif != 0) &&
2363 (orig_oif != dev_out->ifindex)) {
2364 /* For local routes that require a particular output interface
2365 * we do not want to cache the result. Caching the result
2366 * causes incorrect behaviour when there are multiple source
2367 * addresses on the interface, the end result being that if the
2368 * intended recipient is waiting on that interface for the
2369 * packet he won't receive it because it will be delivered on
2370 * the loopback interface and the IP_PKTINFO ipi_ifindex will
2371 * be set to the loopback interface as well.
2372 */
Julian Anastasov94720e32018-05-02 09:41:19 +03002373 do_cache = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002374 }
2375
David S. Millerf2bb4be2012-07-17 12:20:47 -07002376 fnhe = NULL;
Julian Anastasov63617422012-11-22 23:04:14 +02002377 do_cache &= fi != NULL;
Julian Anastasov94720e32018-05-02 09:41:19 +03002378 if (fi) {
David Aherneba618a2019-04-02 14:11:55 -07002379 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
David S. Millerc5038a82012-07-31 15:02:02 -07002380 struct rtable __rcu **prth;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00002381
David Aherna5995e72019-04-30 07:45:50 -07002382 fnhe = find_exception(nhc, fl4->daddr);
Julian Anastasov94720e32018-05-02 09:41:19 +03002383 if (!do_cache)
2384 goto add;
Xin Longdeed49d2016-02-18 21:21:19 +08002385 if (fnhe) {
Timo Teräs2ffae992013-06-27 10:27:05 +03002386 prth = &fnhe->fnhe_rth_output;
Julian Anastasov94720e32018-05-02 09:41:19 +03002387 } else {
2388 if (unlikely(fl4->flowi4_flags &
2389 FLOWI_FLAG_KNOWN_NH &&
David Ahernbdf00462019-04-05 16:30:26 -07002390 !(nhc->nhc_gw_family &&
David Aherneba618a2019-04-02 14:11:55 -07002391 nhc->nhc_scope == RT_SCOPE_LINK))) {
Julian Anastasov94720e32018-05-02 09:41:19 +03002392 do_cache = false;
2393 goto add;
Julian Anastasovc92b9652012-10-08 11:41:19 +00002394 }
David Ahern0f457a32019-04-30 07:45:48 -07002395 prth = raw_cpu_ptr(nhc->nhc_pcpu_rth_output);
Julian Anastasovc92b9652012-10-08 11:41:19 +00002396 }
David S. Millerc5038a82012-07-31 15:02:02 -07002397 rth = rcu_dereference(*prth);
Wei Wang9df16ef2017-06-17 10:42:31 -07002398 if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst))
David S. Millerc5038a82012-07-31 15:02:02 -07002399 return rth;
David S. Millerf2bb4be2012-07-17 12:20:47 -07002400 }
Julian Anastasovc92b9652012-10-08 11:41:19 +00002401
2402add:
David Ahernd08c4f32015-09-02 13:58:34 -07002403 rth = rt_dst_alloc(dev_out, flags, type,
David S. Miller5c1e6aa2011-04-28 14:13:38 -07002404 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerf2bb4be2012-07-17 12:20:47 -07002405 IN_DEV_CONF_GET(in_dev, NOXFRM),
Julian Anastasovc92b9652012-10-08 11:41:19 +00002406 do_cache);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00002407 if (!rth)
David S. Miller5ada5522011-02-17 15:29:00 -08002408 return ERR_PTR(-ENOBUFS);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00002409
David Ahern9438c872017-08-11 17:02:02 -07002410 rth->rt_iif = orig_oif;
David Ahernb7503e02015-09-02 13:58:35 -07002411
Linus Torvalds1da177e2005-04-16 15:20:36 -07002412 RT_CACHE_STAT_INC(out_slow_tot);
2413
Linus Torvalds1da177e2005-04-16 15:20:36 -07002414 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002415 if (flags & RTCF_LOCAL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07002416 !(dev_out->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002417 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002418 RT_CACHE_STAT_INC(out_slow_mc);
2419 }
2420#ifdef CONFIG_IP_MROUTE
David S. Miller982721f2011-02-16 21:44:24 -08002421 if (type == RTN_MULTICAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002422 if (IN_DEV_MFORWARD(in_dev) &&
David S. Miller813b3b52011-04-28 14:48:42 -07002423 !ipv4_is_local_multicast(fl4->daddr)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002424 rth->dst.input = ip_mr_input;
2425 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002426 }
2427 }
2428#endif
2429 }
2430
Wei Wanga4c2fd72017-06-17 10:42:42 -07002431 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache);
David Ahern99428952018-02-13 20:32:04 -08002432 lwtunnel_set_redirect(&rth->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002433
David S. Miller5ada5522011-02-17 15:29:00 -08002434 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002435}
2436
Linus Torvalds1da177e2005-04-16 15:20:36 -07002437/*
2438 * Major route resolver routine.
2439 */
2440
David Ahern3abd1ade2017-05-25 10:42:33 -07002441struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
2442 const struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002443{
Julian Anastasovf61759e2011-12-02 11:39:42 +00002444 __u8 tos = RT_FL_TOS(fl4);
Eric Dumazetd0ea2b12018-04-07 13:42:42 -07002445 struct fib_result res = {
2446 .type = RTN_UNSPEC,
2447 .fi = NULL,
2448 .table = NULL,
2449 .tclassid = 0,
2450 };
David S. Miller5ada5522011-02-17 15:29:00 -08002451 struct rtable *rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002452
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002453 fl4->flowi4_iif = LOOPBACK_IFINDEX;
David S. Miller813b3b52011-04-28 14:48:42 -07002454 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
2455 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
2456 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
David S. Miller44713b62011-03-04 21:24:47 -08002457
David S. Miller010c2702011-02-17 15:37:09 -08002458 rcu_read_lock();
David Ahern3abd1ade2017-05-25 10:42:33 -07002459 rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
2460 rcu_read_unlock();
2461
2462 return rth;
2463}
2464EXPORT_SYMBOL_GPL(ip_route_output_key_hash);
2465
2466struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
2467 struct fib_result *res,
2468 const struct sk_buff *skb)
2469{
2470 struct net_device *dev_out = NULL;
2471 int orig_oif = fl4->flowi4_oif;
2472 unsigned int flags = 0;
2473 struct rtable *rth;
2474 int err = -ENETUNREACH;
2475
David S. Miller813b3b52011-04-28 14:48:42 -07002476 if (fl4->saddr) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08002477 rth = ERR_PTR(-EINVAL);
David S. Miller813b3b52011-04-28 14:48:42 -07002478 if (ipv4_is_multicast(fl4->saddr) ||
2479 ipv4_is_lbcast(fl4->saddr) ||
2480 ipv4_is_zeronet(fl4->saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002481 goto out;
2482
Linus Torvalds1da177e2005-04-16 15:20:36 -07002483 /* I removed check for oif == dev_out->oif here.
2484 It was wrong for two reasons:
Denis V. Lunev1ab35272008-01-22 22:04:30 -08002485 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
2486 is assigned to multiple interfaces.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002487 2. Moreover, we are allowed to send packets with saddr
2488 of another iface. --ANK
2489 */
2490
David S. Miller813b3b52011-04-28 14:48:42 -07002491 if (fl4->flowi4_oif == 0 &&
2492 (ipv4_is_multicast(fl4->daddr) ||
2493 ipv4_is_lbcast(fl4->daddr))) {
Julian Anastasova210d012008-10-01 07:28:28 -07002494 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07002495 dev_out = __ip_dev_find(net, fl4->saddr, false);
Ian Morris51456b22015-04-03 09:17:26 +01002496 if (!dev_out)
Julian Anastasova210d012008-10-01 07:28:28 -07002497 goto out;
2498
Linus Torvalds1da177e2005-04-16 15:20:36 -07002499 /* Special hack: user can direct multicasts
2500 and limited broadcast via necessary interface
2501 without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
2502 This hack is not just for fun, it allows
2503 vic,vat and friends to work.
2504 They bind socket to loopback, set ttl to zero
2505 and expect that it will work.
2506 From the viewpoint of routing cache they are broken,
2507 because we are not allowed to build multicast path
2508 with loopback source addr (look, routing cache
2509 cannot know, that ttl is zero, so that packet
2510 will not leave this host and route is valid).
2511 Luckily, this hack is good workaround.
2512 */
2513
David S. Miller813b3b52011-04-28 14:48:42 -07002514 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002515 goto make_route;
2516 }
Julian Anastasova210d012008-10-01 07:28:28 -07002517
David S. Miller813b3b52011-04-28 14:48:42 -07002518 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
Julian Anastasova210d012008-10-01 07:28:28 -07002519 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07002520 if (!__ip_dev_find(net, fl4->saddr, false))
Julian Anastasova210d012008-10-01 07:28:28 -07002521 goto out;
Julian Anastasova210d012008-10-01 07:28:28 -07002522 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002523 }
2524
2525
David S. Miller813b3b52011-04-28 14:48:42 -07002526 if (fl4->flowi4_oif) {
2527 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002528 rth = ERR_PTR(-ENODEV);
Ian Morris51456b22015-04-03 09:17:26 +01002529 if (!dev_out)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002530 goto out;
Herbert Xue5ed6392005-10-03 14:35:55 -07002531
2532 /* RACE: Check return value of inet_select_addr instead. */
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002533 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08002534 rth = ERR_PTR(-ENETUNREACH);
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002535 goto out;
2536 }
David S. Miller813b3b52011-04-28 14:48:42 -07002537 if (ipv4_is_local_multicast(fl4->daddr) ||
Andrew Lunn6a211652015-05-01 16:39:54 +02002538 ipv4_is_lbcast(fl4->daddr) ||
2539 fl4->flowi4_proto == IPPROTO_IGMP) {
David S. Miller813b3b52011-04-28 14:48:42 -07002540 if (!fl4->saddr)
2541 fl4->saddr = inet_select_addr(dev_out, 0,
2542 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002543 goto make_route;
2544 }
Jiri Benc0a7e2262013-10-04 17:04:48 +02002545 if (!fl4->saddr) {
David S. Miller813b3b52011-04-28 14:48:42 -07002546 if (ipv4_is_multicast(fl4->daddr))
2547 fl4->saddr = inet_select_addr(dev_out, 0,
2548 fl4->flowi4_scope);
2549 else if (!fl4->daddr)
2550 fl4->saddr = inet_select_addr(dev_out, 0,
2551 RT_SCOPE_HOST);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002552 }
2553 }
2554
David S. Miller813b3b52011-04-28 14:48:42 -07002555 if (!fl4->daddr) {
2556 fl4->daddr = fl4->saddr;
2557 if (!fl4->daddr)
2558 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
Denis V. Lunevb40afd02008-01-22 22:06:19 -08002559 dev_out = net->loopback_dev;
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002560 fl4->flowi4_oif = LOOPBACK_IFINDEX;
David Ahern3abd1ade2017-05-25 10:42:33 -07002561 res->type = RTN_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002562 flags |= RTCF_LOCAL;
2563 goto make_route;
2564 }
2565
David Ahern3abd1ade2017-05-25 10:42:33 -07002566 err = fib_lookup(net, fl4, res, 0);
Nikola ForrĂ³0315e382015-09-17 16:01:32 +02002567 if (err) {
David Ahern3abd1ade2017-05-25 10:42:33 -07002568 res->fi = NULL;
2569 res->table = NULL;
David Ahern6104e112016-10-12 13:20:11 -07002570 if (fl4->flowi4_oif &&
David Aherne58e4152016-10-31 15:54:00 -07002571 (ipv4_is_multicast(fl4->daddr) ||
2572 !netif_index_is_l3_master(net, fl4->flowi4_oif))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002573 /* Apparently, routing tables are wrong. Assume,
2574 that the destination is on link.
2575
2576 WHY? DW.
2577 Because we are allowed to send to iface
2578 even if it has NO routes and NO assigned
2579 addresses. When oif is specified, routing
2580 tables are looked up with only one purpose:
2581 to catch if destination is gatewayed, rather than
2582 direct. Moreover, if MSG_DONTROUTE is set,
2583 we send packet, ignoring both routing tables
2584 and ifaddr state. --ANK
2585
2586
2587 We could make it even if oif is unknown,
2588 likely IPv6, but we do not.
2589 */
2590
David S. Miller813b3b52011-04-28 14:48:42 -07002591 if (fl4->saddr == 0)
2592 fl4->saddr = inet_select_addr(dev_out, 0,
2593 RT_SCOPE_LINK);
David Ahern3abd1ade2017-05-25 10:42:33 -07002594 res->type = RTN_UNICAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002595 goto make_route;
2596 }
Nikola ForrĂ³0315e382015-09-17 16:01:32 +02002597 rth = ERR_PTR(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002598 goto out;
2599 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002600
David Ahern3abd1ade2017-05-25 10:42:33 -07002601 if (res->type == RTN_LOCAL) {
David S. Miller813b3b52011-04-28 14:48:42 -07002602 if (!fl4->saddr) {
David Ahern3abd1ade2017-05-25 10:42:33 -07002603 if (res->fi->fib_prefsrc)
2604 fl4->saddr = res->fi->fib_prefsrc;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002605 else
David S. Miller813b3b52011-04-28 14:48:42 -07002606 fl4->saddr = fl4->daddr;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002607 }
David Ahern5f02ce242016-09-10 12:09:54 -07002608
2609 /* L3 master device is the loopback for that domain */
David Ahern3abd1ade2017-05-25 10:42:33 -07002610 dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? :
Robert Shearmanb7c84872017-04-21 21:34:59 +01002611 net->loopback_dev;
David Ahern839da4d2017-08-10 13:49:10 -07002612
2613 /* make sure orig_oif points to fib result device even
2614 * though packet rx/tx happens over loopback or l3mdev
2615 */
2616 orig_oif = FIB_RES_OIF(*res);
2617
David S. Miller813b3b52011-04-28 14:48:42 -07002618 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002619 flags |= RTCF_LOCAL;
2620 goto make_route;
2621 }
2622
David Ahern3abd1ade2017-05-25 10:42:33 -07002623 fib_select_path(net, res, fl4, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002624
David Ahern3abd1ade2017-05-25 10:42:33 -07002625 dev_out = FIB_RES_DEV(*res);
David S. Miller813b3b52011-04-28 14:48:42 -07002626 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002627
2628
2629make_route:
David Ahern3abd1ade2017-05-25 10:42:33 -07002630 rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002631
David S. Miller010c2702011-02-17 15:37:09 -08002632out:
David S. Millerb23dd4f2011-03-02 14:31:35 -08002633 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002634}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002635
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002636static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2637{
2638 return NULL;
2639}
2640
Steffen Klassertebb762f2011-11-23 02:12:51 +00002641static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
Roland Dreierec831ea2011-01-31 13:16:00 -08002642{
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002643 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2644
2645 return mtu ? : dst->dev->mtu;
Roland Dreierec831ea2011-01-31 13:16:00 -08002646}
2647
David S. Miller6700c272012-07-17 03:29:28 -07002648static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2649 struct sk_buff *skb, u32 mtu)
David S. Miller14e50e52007-05-24 18:17:54 -07002650{
2651}
2652
David S. Miller6700c272012-07-17 03:29:28 -07002653static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2654 struct sk_buff *skb)
David S. Millerb587ee32012-07-12 00:39:24 -07002655{
2656}
2657
Held Bernhard0972ddb2011-04-24 22:07:32 +00002658static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2659 unsigned long old)
2660{
2661 return NULL;
2662}
2663
David S. Miller14e50e52007-05-24 18:17:54 -07002664static struct dst_ops ipv4_dst_blackhole_ops = {
2665 .family = AF_INET,
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002666 .check = ipv4_blackhole_dst_check,
Steffen Klassertebb762f2011-11-23 02:12:51 +00002667 .mtu = ipv4_blackhole_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -08002668 .default_advmss = ipv4_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -07002669 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
David S. Millerb587ee32012-07-12 00:39:24 -07002670 .redirect = ipv4_rt_blackhole_redirect,
Held Bernhard0972ddb2011-04-24 22:07:32 +00002671 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -07002672 .neigh_lookup = ipv4_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -07002673};
2674
David S. Miller2774c132011-03-01 14:59:04 -08002675struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -07002676{
David S. Miller2774c132011-03-01 14:59:04 -08002677 struct rtable *ort = (struct rtable *) dst_orig;
David S. Millerf5b0a872012-07-19 12:31:33 -07002678 struct rtable *rt;
David S. Miller14e50e52007-05-24 18:17:54 -07002679
Steffen Klassert6c0e7282017-10-09 08:43:55 +02002680 rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0);
David S. Miller14e50e52007-05-24 18:17:54 -07002681 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002682 struct dst_entry *new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -07002683
David S. Miller14e50e52007-05-24 18:17:54 -07002684 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -08002685 new->input = dst_discard;
Eric W. Biedermanede20592015-10-07 16:48:47 -05002686 new->output = dst_discard_out;
David S. Miller14e50e52007-05-24 18:17:54 -07002687
Wei Wang1dbe32522017-06-17 10:42:26 -07002688 new->dev = net->loopback_dev;
David S. Miller14e50e52007-05-24 18:17:54 -07002689 if (new->dev)
2690 dev_hold(new->dev);
2691
David S. Miller9917e1e82012-07-17 14:44:26 -07002692 rt->rt_is_input = ort->rt_is_input;
David S. Miller5e2b61f2011-03-04 21:47:09 -08002693 rt->rt_iif = ort->rt_iif;
David S. Miller59436342012-07-10 06:58:42 -07002694 rt->rt_pmtu = ort->rt_pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01002695 rt->rt_mtu_locked = ort->rt_mtu_locked;
David S. Miller14e50e52007-05-24 18:17:54 -07002696
fan.duca4c3fc2013-07-30 08:33:53 +08002697 rt->rt_genid = rt_genid_ipv4(net);
David S. Miller14e50e52007-05-24 18:17:54 -07002698 rt->rt_flags = ort->rt_flags;
2699 rt->rt_type = ort->rt_type;
David Ahern77d5bc72019-09-17 10:39:49 -07002700 rt->rt_uses_gateway = ort->rt_uses_gateway;
David Ahern1550c172019-04-05 16:30:27 -07002701 rt->rt_gw_family = ort->rt_gw_family;
2702 if (rt->rt_gw_family == AF_INET)
2703 rt->rt_gw4 = ort->rt_gw4;
David Ahern0f5f7d72019-04-05 16:30:29 -07002704 else if (rt->rt_gw_family == AF_INET6)
2705 rt->rt_gw6 = ort->rt_gw6;
David S. Miller14e50e52007-05-24 18:17:54 -07002706
David S. Millercaacf052012-07-31 15:06:50 -07002707 INIT_LIST_HEAD(&rt->rt_uncached);
David S. Miller14e50e52007-05-24 18:17:54 -07002708 }
2709
David S. Miller2774c132011-03-01 14:59:04 -08002710 dst_release(dst_orig);
2711
2712 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -07002713}
2714
David S. Miller9d6ec932011-03-12 01:12:47 -05002715struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
Eric Dumazet6f9c9612015-09-25 07:39:10 -07002716 const struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002717{
David S. Miller9d6ec932011-03-12 01:12:47 -05002718 struct rtable *rt = __ip_route_output_key(net, flp4);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002719
David S. Millerb23dd4f2011-03-02 14:31:35 -08002720 if (IS_ERR(rt))
2721 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002722
David S. Miller56157872011-05-02 14:37:45 -07002723 if (flp4->flowi4_proto)
Steffen Klassertf92ee612014-09-16 10:08:40 +02002724 rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
2725 flowi4_to_flowi(flp4),
2726 sk, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002727
David S. Millerb23dd4f2011-03-02 14:31:35 -08002728 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002729}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002730EXPORT_SYMBOL_GPL(ip_route_output_flow);
2731
David Ahern3765d352017-05-25 10:42:36 -07002732/* called with rcu_read_lock held */
Roopa Prabhu404eb772018-05-22 14:03:27 -07002733static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
2734 struct rtable *rt, u32 table_id, struct flowi4 *fl4,
John Fastabende93fb3e2019-08-23 17:11:38 -07002735 struct sk_buff *skb, u32 portid, u32 seq,
2736 unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002737{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002738 struct rtmsg *r;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002739 struct nlmsghdr *nlh;
Steffen Klassert2bc8ca42011-10-11 01:12:02 +00002740 unsigned long expires = 0;
David S. Millerf1850712012-07-10 07:26:01 -07002741 u32 error;
Julian Anastasov521f5492012-07-20 12:02:08 +03002742 u32 metrics[RTAX_MAX];
Thomas Grafbe403ea2006-08-17 18:15:17 -07002743
John Fastabende93fb3e2019-08-23 17:11:38 -07002744 nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), flags);
Ian Morris51456b22015-04-03 09:17:26 +01002745 if (!nlh)
Patrick McHardy26932562007-01-31 23:16:40 -08002746 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002747
2748 r = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002749 r->rtm_family = AF_INET;
2750 r->rtm_dst_len = 32;
2751 r->rtm_src_len = 0;
Stefano Briviod9489742019-06-21 17:45:22 +02002752 r->rtm_tos = fl4 ? fl4->flowi4_tos : 0;
David Ahern8a430ed2017-01-11 15:42:17 -08002753 r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT;
David Ahernc36ba662015-09-02 13:58:36 -07002754 if (nla_put_u32(skb, RTA_TABLE, table_id))
David S. Millerf3756b72012-04-01 20:39:02 -04002755 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002756 r->rtm_type = rt->rt_type;
2757 r->rtm_scope = RT_SCOPE_UNIVERSE;
2758 r->rtm_protocol = RTPROT_UNSPEC;
2759 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2760 if (rt->rt_flags & RTCF_NOTIFY)
2761 r->rtm_flags |= RTM_F_NOTIFY;
Hannes Frederic Sowadf4d9252015-01-23 12:01:26 +01002762 if (IPCB(skb)->flags & IPSKB_DOREDIRECT)
2763 r->rtm_flags |= RTCF_DOREDIRECT;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002764
Jiri Benc930345e2015-03-29 16:59:25 +02002765 if (nla_put_in_addr(skb, RTA_DST, dst))
David S. Millerf3756b72012-04-01 20:39:02 -04002766 goto nla_put_failure;
David Miller1a00fee2012-07-01 02:02:56 +00002767 if (src) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002768 r->rtm_src_len = 32;
Jiri Benc930345e2015-03-29 16:59:25 +02002769 if (nla_put_in_addr(skb, RTA_SRC, src))
David S. Millerf3756b72012-04-01 20:39:02 -04002770 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002771 }
David S. Millerf3756b72012-04-01 20:39:02 -04002772 if (rt->dst.dev &&
2773 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2774 goto nla_put_failure;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002775#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf3756b72012-04-01 20:39:02 -04002776 if (rt->dst.tclassid &&
2777 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2778 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002779#endif
Stefano Briviod9489742019-06-21 17:45:22 +02002780 if (fl4 && !rt_is_input_route(rt) &&
David Millerd6c0a4f2012-07-01 02:02:59 +00002781 fl4->saddr != src) {
Jiri Benc930345e2015-03-29 16:59:25 +02002782 if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
David S. Millerf3756b72012-04-01 20:39:02 -04002783 goto nla_put_failure;
2784 }
David Ahern77d5bc72019-09-17 10:39:49 -07002785 if (rt->rt_uses_gateway) {
2786 if (rt->rt_gw_family == AF_INET &&
2787 nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) {
David Ahern0f5f7d72019-04-05 16:30:29 -07002788 goto nla_put_failure;
David Ahern77d5bc72019-09-17 10:39:49 -07002789 } else if (rt->rt_gw_family == AF_INET6) {
2790 int alen = sizeof(struct in6_addr);
2791 struct nlattr *nla;
2792 struct rtvia *via;
David Ahern0f5f7d72019-04-05 16:30:29 -07002793
David Ahern77d5bc72019-09-17 10:39:49 -07002794 nla = nla_reserve(skb, RTA_VIA, alen + 2);
2795 if (!nla)
2796 goto nla_put_failure;
2797
2798 via = nla_data(nla);
2799 via->rtvia_family = AF_INET6;
2800 memcpy(via->rtvia_addr, &rt->rt_gw6, alen);
2801 }
David Ahern0f5f7d72019-04-05 16:30:29 -07002802 }
Thomas Grafbe403ea2006-08-17 18:15:17 -07002803
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002804 expires = rt->dst.expires;
2805 if (expires) {
2806 unsigned long now = jiffies;
2807
2808 if (time_before(now, expires))
2809 expires -= now;
2810 else
2811 expires = 0;
2812 }
2813
Julian Anastasov521f5492012-07-20 12:02:08 +03002814 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002815 if (rt->rt_pmtu && expires)
Julian Anastasov521f5492012-07-20 12:02:08 +03002816 metrics[RTAX_MTU - 1] = rt->rt_pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01002817 if (rt->rt_mtu_locked && expires)
2818 metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU);
Julian Anastasov521f5492012-07-20 12:02:08 +03002819 if (rtnetlink_put_metrics(skb, metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -07002820 goto nla_put_failure;
2821
Stefano Briviod9489742019-06-21 17:45:22 +02002822 if (fl4) {
2823 if (fl4->flowi4_mark &&
2824 nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
2825 goto nla_put_failure;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002826
Stefano Briviod9489742019-06-21 17:45:22 +02002827 if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
2828 nla_put_u32(skb, RTA_UID,
2829 from_kuid_munged(current_user_ns(),
2830 fl4->flowi4_uid)))
2831 goto nla_put_failure;
2832
2833 if (rt_is_input_route(rt)) {
2834#ifdef CONFIG_IP_MROUTE
2835 if (ipv4_is_multicast(dst) &&
2836 !ipv4_is_local_multicast(dst) &&
2837 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2838 int err = ipmr_get_route(net, skb,
2839 fl4->saddr, fl4->daddr,
2840 r, portid);
2841
2842 if (err <= 0) {
2843 if (err == 0)
2844 return 0;
2845 goto nla_put_failure;
2846 }
2847 } else
2848#endif
2849 if (nla_put_u32(skb, RTA_IIF, fl4->flowi4_iif))
2850 goto nla_put_failure;
2851 }
2852 }
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002853
Changli Gaod8d1f302010-06-10 23:31:35 -07002854 error = rt->dst.error;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002855
David S. Millerf1850712012-07-10 07:26:01 -07002856 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002857 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002858
Johannes Berg053c0952015-01-16 22:09:00 +01002859 nlmsg_end(skb, nlh);
2860 return 0;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002861
2862nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002863 nlmsg_cancel(skb, nlh);
2864 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002865}
2866
Stefano Brivioee289062019-06-21 17:45:23 +02002867static int fnhe_dump_bucket(struct net *net, struct sk_buff *skb,
2868 struct netlink_callback *cb, u32 table_id,
2869 struct fnhe_hash_bucket *bucket, int genid,
John Fastabende93fb3e2019-08-23 17:11:38 -07002870 int *fa_index, int fa_start, unsigned int flags)
Stefano Brivioee289062019-06-21 17:45:23 +02002871{
2872 int i;
2873
2874 for (i = 0; i < FNHE_HASH_SIZE; i++) {
2875 struct fib_nh_exception *fnhe;
2876
2877 for (fnhe = rcu_dereference(bucket[i].chain); fnhe;
2878 fnhe = rcu_dereference(fnhe->fnhe_next)) {
2879 struct rtable *rt;
2880 int err;
2881
2882 if (*fa_index < fa_start)
2883 goto next;
2884
2885 if (fnhe->fnhe_genid != genid)
2886 goto next;
2887
2888 if (fnhe->fnhe_expires &&
2889 time_after(jiffies, fnhe->fnhe_expires))
2890 goto next;
2891
2892 rt = rcu_dereference(fnhe->fnhe_rth_input);
2893 if (!rt)
2894 rt = rcu_dereference(fnhe->fnhe_rth_output);
2895 if (!rt)
2896 goto next;
2897
2898 err = rt_fill_info(net, fnhe->fnhe_daddr, 0, rt,
2899 table_id, NULL, skb,
2900 NETLINK_CB(cb->skb).portid,
John Fastabende93fb3e2019-08-23 17:11:38 -07002901 cb->nlh->nlmsg_seq, flags);
Stefano Brivioee289062019-06-21 17:45:23 +02002902 if (err)
2903 return err;
2904next:
2905 (*fa_index)++;
2906 }
2907 }
2908
2909 return 0;
2910}
2911
2912int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb,
2913 u32 table_id, struct fib_info *fi,
John Fastabende93fb3e2019-08-23 17:11:38 -07002914 int *fa_index, int fa_start, unsigned int flags)
Stefano Brivioee289062019-06-21 17:45:23 +02002915{
2916 struct net *net = sock_net(cb->skb->sk);
2917 int nhsel, genid = fnhe_genid(net);
2918
2919 for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) {
2920 struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel);
2921 struct fnhe_hash_bucket *bucket;
2922 int err;
2923
2924 if (nhc->nhc_flags & RTNH_F_DEAD)
2925 continue;
2926
Eric Dumazet93ed54b2019-06-26 03:04:50 -07002927 rcu_read_lock();
Stefano Brivioee289062019-06-21 17:45:23 +02002928 bucket = rcu_dereference(nhc->nhc_exceptions);
Eric Dumazet93ed54b2019-06-26 03:04:50 -07002929 err = 0;
2930 if (bucket)
2931 err = fnhe_dump_bucket(net, skb, cb, table_id, bucket,
John Fastabende93fb3e2019-08-23 17:11:38 -07002932 genid, fa_index, fa_start,
2933 flags);
Eric Dumazet93ed54b2019-06-26 03:04:50 -07002934 rcu_read_unlock();
Stefano Brivioee289062019-06-21 17:45:23 +02002935 if (err)
2936 return err;
2937 }
2938
2939 return 0;
2940}
2941
Roopa Prabhu404eb772018-05-22 14:03:27 -07002942static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst,
2943 u8 ip_proto, __be16 sport,
2944 __be16 dport)
2945{
2946 struct sk_buff *skb;
2947 struct iphdr *iph;
2948
2949 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2950 if (!skb)
2951 return NULL;
2952
2953 /* Reserve room for dummy headers, this skb can pass
2954 * through good chunk of routing engine.
2955 */
2956 skb_reset_mac_header(skb);
2957 skb_reset_network_header(skb);
2958 skb->protocol = htons(ETH_P_IP);
2959 iph = skb_put(skb, sizeof(struct iphdr));
2960 iph->protocol = ip_proto;
2961 iph->saddr = src;
2962 iph->daddr = dst;
2963 iph->version = 0x4;
2964 iph->frag_off = 0;
2965 iph->ihl = 0x5;
2966 skb_set_transport_header(skb, skb->len);
2967
2968 switch (iph->protocol) {
2969 case IPPROTO_UDP: {
2970 struct udphdr *udph;
2971
2972 udph = skb_put_zero(skb, sizeof(struct udphdr));
2973 udph->source = sport;
2974 udph->dest = dport;
2975 udph->len = sizeof(struct udphdr);
2976 udph->check = 0;
2977 break;
2978 }
2979 case IPPROTO_TCP: {
2980 struct tcphdr *tcph;
2981
2982 tcph = skb_put_zero(skb, sizeof(struct tcphdr));
2983 tcph->source = sport;
2984 tcph->dest = dport;
2985 tcph->doff = sizeof(struct tcphdr) / 4;
2986 tcph->rst = 1;
2987 tcph->check = ~tcp_v4_check(sizeof(struct tcphdr),
2988 src, dst, 0);
2989 break;
2990 }
2991 case IPPROTO_ICMP: {
2992 struct icmphdr *icmph;
2993
2994 icmph = skb_put_zero(skb, sizeof(struct icmphdr));
2995 icmph->type = ICMP_ECHO;
2996 icmph->code = 0;
2997 }
2998 }
2999
3000 return skb;
3001}
3002
Jakub Kicinskia00302b62019-01-18 10:46:19 -08003003static int inet_rtm_valid_getroute_req(struct sk_buff *skb,
3004 const struct nlmsghdr *nlh,
3005 struct nlattr **tb,
3006 struct netlink_ext_ack *extack)
3007{
3008 struct rtmsg *rtm;
3009 int i, err;
3010
3011 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
3012 NL_SET_ERR_MSG(extack,
3013 "ipv4: Invalid header for route get request");
3014 return -EINVAL;
3015 }
3016
3017 if (!netlink_strict_get_check(skb))
Johannes Berg8cb08172019-04-26 14:07:28 +02003018 return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
3019 rtm_ipv4_policy, extack);
Jakub Kicinskia00302b62019-01-18 10:46:19 -08003020
3021 rtm = nlmsg_data(nlh);
3022 if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) ||
3023 (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) ||
3024 rtm->rtm_table || rtm->rtm_protocol ||
3025 rtm->rtm_scope || rtm->rtm_type) {
3026 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for route get request");
3027 return -EINVAL;
3028 }
3029
3030 if (rtm->rtm_flags & ~(RTM_F_NOTIFY |
3031 RTM_F_LOOKUP_TABLE |
3032 RTM_F_FIB_MATCH)) {
3033 NL_SET_ERR_MSG(extack, "ipv4: Unsupported rtm_flags for route get request");
3034 return -EINVAL;
3035 }
3036
Johannes Berg8cb08172019-04-26 14:07:28 +02003037 err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
3038 rtm_ipv4_policy, extack);
Jakub Kicinskia00302b62019-01-18 10:46:19 -08003039 if (err)
3040 return err;
3041
3042 if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
3043 (tb[RTA_DST] && !rtm->rtm_dst_len)) {
3044 NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4");
3045 return -EINVAL;
3046 }
3047
3048 for (i = 0; i <= RTA_MAX; i++) {
3049 if (!tb[i])
3050 continue;
3051
3052 switch (i) {
3053 case RTA_IIF:
3054 case RTA_OIF:
3055 case RTA_SRC:
3056 case RTA_DST:
3057 case RTA_IP_PROTO:
3058 case RTA_SPORT:
3059 case RTA_DPORT:
3060 case RTA_MARK:
3061 case RTA_UID:
3062 break;
3063 default:
3064 NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in route get request");
3065 return -EINVAL;
3066 }
3067 }
3068
3069 return 0;
3070}
3071
David Ahernc21ef3e2017-04-16 09:48:24 -07003072static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
3073 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003074{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09003075 struct net *net = sock_net(in_skb->sk);
Thomas Grafd889ce32006-08-17 18:15:44 -07003076 struct nlattr *tb[RTA_MAX+1];
Roopa Prabhu404eb772018-05-22 14:03:27 -07003077 u32 table_id = RT_TABLE_MAIN;
3078 __be16 sport = 0, dport = 0;
David Ahern3765d352017-05-25 10:42:36 -07003079 struct fib_result res = {};
Roopa Prabhu404eb772018-05-22 14:03:27 -07003080 u8 ip_proto = IPPROTO_UDP;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003081 struct rtable *rt = NULL;
Roopa Prabhu404eb772018-05-22 14:03:27 -07003082 struct sk_buff *skb;
3083 struct rtmsg *rtm;
Maciej Żenczykowskie8e3fbe2018-09-29 23:44:47 -07003084 struct flowi4 fl4 = {};
Al Viro9e12bb22006-09-26 21:25:20 -07003085 __be32 dst = 0;
3086 __be32 src = 0;
Roopa Prabhu404eb772018-05-22 14:03:27 -07003087 kuid_t uid;
Al Viro9e12bb22006-09-26 21:25:20 -07003088 u32 iif;
Thomas Grafd889ce32006-08-17 18:15:44 -07003089 int err;
Eric Dumazet963bfee2010-07-20 22:03:14 +00003090 int mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003091
Jakub Kicinskia00302b62019-01-18 10:46:19 -08003092 err = inet_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
Thomas Grafd889ce32006-08-17 18:15:44 -07003093 if (err < 0)
Roopa Prabhu404eb772018-05-22 14:03:27 -07003094 return err;
Thomas Grafd889ce32006-08-17 18:15:44 -07003095
3096 rtm = nlmsg_data(nlh);
Jiri Benc67b61f62015-03-29 16:59:26 +02003097 src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
3098 dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
Thomas Grafd889ce32006-08-17 18:15:44 -07003099 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
Eric Dumazet963bfee2010-07-20 22:03:14 +00003100 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09003101 if (tb[RTA_UID])
3102 uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID]));
3103 else
3104 uid = (iif ? INVALID_UID : current_uid());
Linus Torvalds1da177e2005-04-16 15:20:36 -07003105
Roopa Prabhu404eb772018-05-22 14:03:27 -07003106 if (tb[RTA_IP_PROTO]) {
3107 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
Hangbin Liu5e1a99e2019-02-27 16:15:29 +08003108 &ip_proto, AF_INET, extack);
Roopa Prabhu404eb772018-05-22 14:03:27 -07003109 if (err)
3110 return err;
3111 }
Florian Laryschbbadb9a2017-04-07 14:42:20 +02003112
Roopa Prabhu404eb772018-05-22 14:03:27 -07003113 if (tb[RTA_SPORT])
3114 sport = nla_get_be16(tb[RTA_SPORT]);
3115
3116 if (tb[RTA_DPORT])
3117 dport = nla_get_be16(tb[RTA_DPORT]);
3118
3119 skb = inet_rtm_getroute_build_skb(src, dst, ip_proto, sport, dport);
3120 if (!skb)
3121 return -ENOBUFS;
Florian Laryschbbadb9a2017-04-07 14:42:20 +02003122
David Millerd6c0a4f2012-07-01 02:02:59 +00003123 fl4.daddr = dst;
3124 fl4.saddr = src;
3125 fl4.flowi4_tos = rtm->rtm_tos;
3126 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
3127 fl4.flowi4_mark = mark;
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09003128 fl4.flowi4_uid = uid;
Roopa Prabhu404eb772018-05-22 14:03:27 -07003129 if (sport)
3130 fl4.fl4_sport = sport;
3131 if (dport)
3132 fl4.fl4_dport = dport;
3133 fl4.flowi4_proto = ip_proto;
David Millerd6c0a4f2012-07-01 02:02:59 +00003134
David Ahern3765d352017-05-25 10:42:36 -07003135 rcu_read_lock();
3136
Linus Torvalds1da177e2005-04-16 15:20:36 -07003137 if (iif) {
Thomas Grafd889ce32006-08-17 18:15:44 -07003138 struct net_device *dev;
3139
David Ahern3765d352017-05-25 10:42:36 -07003140 dev = dev_get_by_index_rcu(net, iif);
Ian Morris51456b22015-04-03 09:17:26 +01003141 if (!dev) {
Thomas Grafd889ce32006-08-17 18:15:44 -07003142 err = -ENODEV;
Roopa Prabhu404eb772018-05-22 14:03:27 -07003143 goto errout_rcu;
Thomas Grafd889ce32006-08-17 18:15:44 -07003144 }
3145
Roopa Prabhu404eb772018-05-22 14:03:27 -07003146 fl4.flowi4_iif = iif; /* for rt_fill_info */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003147 skb->dev = dev;
Eric Dumazet963bfee2010-07-20 22:03:14 +00003148 skb->mark = mark;
David Ahern3765d352017-05-25 10:42:36 -07003149 err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos,
3150 dev, &res);
Thomas Grafd889ce32006-08-17 18:15:44 -07003151
Eric Dumazet511c3f92009-06-02 05:14:27 +00003152 rt = skb_rtable(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07003153 if (err == 0 && rt->dst.error)
3154 err = -rt->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003155 } else {
Lorenzo Colitti6503a302018-01-11 18:36:26 +09003156 fl4.flowi4_iif = LOOPBACK_IFINDEX;
Ido Schimmel21f94772018-12-20 17:03:27 +00003157 skb->dev = net->loopback_dev;
David Ahern3765d352017-05-25 10:42:36 -07003158 rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb);
David S. Millerb23dd4f2011-03-02 14:31:35 -08003159 err = 0;
3160 if (IS_ERR(rt))
3161 err = PTR_ERR(rt);
Florian Westphal2c87d632017-08-14 00:52:58 +02003162 else
3163 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003164 }
Thomas Grafd889ce32006-08-17 18:15:44 -07003165
Linus Torvalds1da177e2005-04-16 15:20:36 -07003166 if (err)
Roopa Prabhu404eb772018-05-22 14:03:27 -07003167 goto errout_rcu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003168
Linus Torvalds1da177e2005-04-16 15:20:36 -07003169 if (rtm->rtm_flags & RTM_F_NOTIFY)
3170 rt->rt_flags |= RTCF_NOTIFY;
3171
David Ahernc36ba662015-09-02 13:58:36 -07003172 if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
David Ahern68e813a2018-02-14 14:24:28 -08003173 table_id = res.table ? res.table->tb_id : 0;
David Ahernc36ba662015-09-02 13:58:36 -07003174
Roopa Prabhu404eb772018-05-22 14:03:27 -07003175 /* reset skb for netlink reply msg */
3176 skb_trim(skb, 0);
3177 skb_reset_network_header(skb);
3178 skb_reset_transport_header(skb);
3179 skb_reset_mac_header(skb);
3180
Roopa Prabhubc3aae22017-08-16 12:38:52 -07003181 if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
3182 if (!res.fi) {
3183 err = fib_props[res.type].error;
3184 if (!err)
3185 err = -EHOSTUNREACH;
Roopa Prabhu404eb772018-05-22 14:03:27 -07003186 goto errout_rcu;
Roopa Prabhubc3aae22017-08-16 12:38:52 -07003187 }
Roopa Prabhub6179812017-05-25 10:42:39 -07003188 err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
3189 nlh->nlmsg_seq, RTM_NEWROUTE, table_id,
3190 rt->rt_type, res.prefix, res.prefixlen,
3191 fl4.flowi4_tos, res.fi, 0);
Roopa Prabhubc3aae22017-08-16 12:38:52 -07003192 } else {
Roopa Prabhu404eb772018-05-22 14:03:27 -07003193 err = rt_fill_info(net, dst, src, rt, table_id, &fl4, skb,
John Fastabende93fb3e2019-08-23 17:11:38 -07003194 NETLINK_CB(in_skb).portid,
3195 nlh->nlmsg_seq, 0);
Roopa Prabhubc3aae22017-08-16 12:38:52 -07003196 }
David S. Miller7b46a642015-01-18 23:36:08 -05003197 if (err < 0)
Roopa Prabhu404eb772018-05-22 14:03:27 -07003198 goto errout_rcu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003199
David Ahern3765d352017-05-25 10:42:36 -07003200 rcu_read_unlock();
3201
Eric W. Biederman15e47302012-09-07 20:12:54 +00003202 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003203
Thomas Grafd889ce32006-08-17 18:15:44 -07003204errout_free:
Roopa Prabhu404eb772018-05-22 14:03:27 -07003205 return err;
3206errout_rcu:
David Ahern3765d352017-05-25 10:42:36 -07003207 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003208 kfree_skb(skb);
Roopa Prabhu404eb772018-05-22 14:03:27 -07003209 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003210}
3211
Linus Torvalds1da177e2005-04-16 15:20:36 -07003212void ip_rt_multicast_event(struct in_device *in_dev)
3213{
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00003214 rt_cache_flush(dev_net(in_dev->dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07003215}
3216
3217#ifdef CONFIG_SYSCTL
Gao feng082c7ca2013-02-19 00:43:12 +00003218static int ip_rt_gc_interval __read_mostly = 60 * HZ;
3219static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
3220static int ip_rt_gc_elasticity __read_mostly = 8;
Arnd Bergmann773daa32018-02-28 14:32:48 +01003221static int ip_min_valid_pmtu __read_mostly = IPV4_MIN_MTU;
Gao feng082c7ca2013-02-19 00:43:12 +00003222
Joe Perchesfe2c6332013-06-11 23:04:25 -07003223static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07003224 void __user *buffer,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003225 size_t *lenp, loff_t *ppos)
3226{
Timo Teräs5aad1de2013-05-27 20:46:33 +00003227 struct net *net = (struct net *)__ctl->extra1;
3228
Linus Torvalds1da177e2005-04-16 15:20:36 -07003229 if (write) {
Timo Teräs5aad1de2013-05-27 20:46:33 +00003230 rt_cache_flush(net);
3231 fnhe_genid_bump(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003232 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09003233 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003234
3235 return -EINVAL;
3236}
3237
Joe Perchesfe2c6332013-06-11 23:04:25 -07003238static struct ctl_table ipv4_route_table[] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09003239 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003240 .procname = "gc_thresh",
3241 .data = &ipv4_dst_ops.gc_thresh,
3242 .maxlen = sizeof(int),
3243 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003244 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003245 },
3246 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003247 .procname = "max_size",
3248 .data = &ip_rt_max_size,
3249 .maxlen = sizeof(int),
3250 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003251 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003252 },
3253 {
3254 /* Deprecated. Use gc_min_interval_ms */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09003255
Linus Torvalds1da177e2005-04-16 15:20:36 -07003256 .procname = "gc_min_interval",
3257 .data = &ip_rt_gc_min_interval,
3258 .maxlen = sizeof(int),
3259 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003260 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003261 },
3262 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003263 .procname = "gc_min_interval_ms",
3264 .data = &ip_rt_gc_min_interval,
3265 .maxlen = sizeof(int),
3266 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003267 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003268 },
3269 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003270 .procname = "gc_timeout",
3271 .data = &ip_rt_gc_timeout,
3272 .maxlen = sizeof(int),
3273 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003274 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003275 },
3276 {
Eric Dumazet9f28a2f2011-12-21 15:47:16 -05003277 .procname = "gc_interval",
3278 .data = &ip_rt_gc_interval,
3279 .maxlen = sizeof(int),
3280 .mode = 0644,
3281 .proc_handler = proc_dointvec_jiffies,
3282 },
3283 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003284 .procname = "redirect_load",
3285 .data = &ip_rt_redirect_load,
3286 .maxlen = sizeof(int),
3287 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003288 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003289 },
3290 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003291 .procname = "redirect_number",
3292 .data = &ip_rt_redirect_number,
3293 .maxlen = sizeof(int),
3294 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003295 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003296 },
3297 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003298 .procname = "redirect_silence",
3299 .data = &ip_rt_redirect_silence,
3300 .maxlen = sizeof(int),
3301 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003302 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003303 },
3304 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003305 .procname = "error_cost",
3306 .data = &ip_rt_error_cost,
3307 .maxlen = sizeof(int),
3308 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003309 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003310 },
3311 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003312 .procname = "error_burst",
3313 .data = &ip_rt_error_burst,
3314 .maxlen = sizeof(int),
3315 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003316 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003317 },
3318 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003319 .procname = "gc_elasticity",
3320 .data = &ip_rt_gc_elasticity,
3321 .maxlen = sizeof(int),
3322 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003323 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003324 },
3325 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003326 .procname = "mtu_expires",
3327 .data = &ip_rt_mtu_expires,
3328 .maxlen = sizeof(int),
3329 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003330 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003331 },
3332 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003333 .procname = "min_pmtu",
3334 .data = &ip_rt_min_pmtu,
3335 .maxlen = sizeof(int),
3336 .mode = 0644,
Sabrina Dubrocac7272c22018-02-26 16:13:43 +01003337 .proc_handler = proc_dointvec_minmax,
3338 .extra1 = &ip_min_valid_pmtu,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003339 },
3340 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003341 .procname = "min_adv_mss",
3342 .data = &ip_rt_min_advmss,
3343 .maxlen = sizeof(int),
3344 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003345 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003346 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08003347 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003348};
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003349
Christian Brauner5cdda5f2019-06-24 15:29:23 +02003350static const char ipv4_route_flush_procname[] = "flush";
3351
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003352static struct ctl_table ipv4_route_flush_table[] = {
3353 {
Christian Brauner5cdda5f2019-06-24 15:29:23 +02003354 .procname = ipv4_route_flush_procname,
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003355 .maxlen = sizeof(int),
3356 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003357 .proc_handler = ipv4_sysctl_rtcache_flush,
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003358 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08003359 { },
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003360};
3361
3362static __net_init int sysctl_route_net_init(struct net *net)
3363{
3364 struct ctl_table *tbl;
3365
3366 tbl = ipv4_route_flush_table;
Octavian Purdila09ad9bc2009-11-25 15:14:13 -08003367 if (!net_eq(net, &init_net)) {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003368 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
Ian Morris51456b22015-04-03 09:17:26 +01003369 if (!tbl)
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003370 goto err_dup;
Eric W. Biederman464dc802012-11-16 03:02:59 +00003371
Christian Brauner5cdda5f2019-06-24 15:29:23 +02003372 /* Don't export non-whitelisted sysctls to unprivileged users */
3373 if (net->user_ns != &init_user_ns) {
3374 if (tbl[0].procname != ipv4_route_flush_procname)
3375 tbl[0].procname = NULL;
3376 }
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003377 }
3378 tbl[0].extra1 = net;
3379
Eric W. Biedermanec8f23c2012-04-19 13:44:49 +00003380 net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
Ian Morris51456b22015-04-03 09:17:26 +01003381 if (!net->ipv4.route_hdr)
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003382 goto err_reg;
3383 return 0;
3384
3385err_reg:
3386 if (tbl != ipv4_route_flush_table)
3387 kfree(tbl);
3388err_dup:
3389 return -ENOMEM;
3390}
3391
3392static __net_exit void sysctl_route_net_exit(struct net *net)
3393{
3394 struct ctl_table *tbl;
3395
3396 tbl = net->ipv4.route_hdr->ctl_table_arg;
3397 unregister_net_sysctl_table(net->ipv4.route_hdr);
3398 BUG_ON(tbl == ipv4_route_flush_table);
3399 kfree(tbl);
3400}
3401
3402static __net_initdata struct pernet_operations sysctl_route_ops = {
3403 .init = sysctl_route_net_init,
3404 .exit = sysctl_route_net_exit,
3405};
Linus Torvalds1da177e2005-04-16 15:20:36 -07003406#endif
3407
Neil Horman3ee94372010-05-08 01:57:52 -07003408static __net_init int rt_genid_init(struct net *net)
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003409{
fan.duca4c3fc2013-07-30 08:33:53 +08003410 atomic_set(&net->ipv4.rt_genid, 0);
Timo Teräs5aad1de2013-05-27 20:46:33 +00003411 atomic_set(&net->fnhe_genid, 0);
Jason A. Donenfeld7aed9f72017-06-07 23:01:20 -04003412 atomic_set(&net->ipv4.dev_addr_genid, get_random_int());
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003413 return 0;
3414}
3415
Neil Horman3ee94372010-05-08 01:57:52 -07003416static __net_initdata struct pernet_operations rt_genid_ops = {
3417 .init = rt_genid_init,
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003418};
3419
David S. Millerc3426b42012-06-09 16:27:05 -07003420static int __net_init ipv4_inetpeer_init(struct net *net)
3421{
3422 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3423
3424 if (!bp)
3425 return -ENOMEM;
3426 inet_peer_base_init(bp);
3427 net->ipv4.peers = bp;
3428 return 0;
3429}
3430
3431static void __net_exit ipv4_inetpeer_exit(struct net *net)
3432{
3433 struct inet_peer_base *bp = net->ipv4.peers;
3434
3435 net->ipv4.peers = NULL;
David S. Miller56a6b242012-06-09 16:32:41 -07003436 inetpeer_invalidate_tree(bp);
David S. Millerc3426b42012-06-09 16:27:05 -07003437 kfree(bp);
3438}
3439
3440static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
3441 .init = ipv4_inetpeer_init,
3442 .exit = ipv4_inetpeer_exit,
3443};
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003444
Patrick McHardyc7066f72011-01-14 13:36:42 +01003445#ifdef CONFIG_IP_ROUTE_CLASSID
Tejun Heo7d720c32010-02-16 15:20:26 +00003446struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
Patrick McHardyc7066f72011-01-14 13:36:42 +01003447#endif /* CONFIG_IP_ROUTE_CLASSID */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003448
Linus Torvalds1da177e2005-04-16 15:20:36 -07003449int __init ip_rt_init(void)
3450{
Eric Dumazet5055c372015-01-14 15:17:06 -08003451 int cpu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003452
Kees Cook6da2ec52018-06-12 13:55:00 -07003453 ip_idents = kmalloc_array(IP_IDENTS_SZ, sizeof(*ip_idents),
3454 GFP_KERNEL);
Eric Dumazet73f156a2014-06-02 05:26:03 -07003455 if (!ip_idents)
3456 panic("IP: failed to allocate ip_idents\n");
3457
3458 prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
3459
Eric Dumazet355b590c2015-05-01 10:37:49 -07003460 ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL);
3461 if (!ip_tstamps)
3462 panic("IP: failed to allocate ip_tstamps\n");
3463
Eric Dumazet5055c372015-01-14 15:17:06 -08003464 for_each_possible_cpu(cpu) {
3465 struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
3466
3467 INIT_LIST_HEAD(&ul->head);
3468 spin_lock_init(&ul->lock);
3469 }
Patrick McHardyc7066f72011-01-14 13:36:42 +01003470#ifdef CONFIG_IP_ROUTE_CLASSID
Ingo Molnar0dcec8c2009-02-25 14:07:33 +01003471 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
Linus Torvalds1da177e2005-04-16 15:20:36 -07003472 if (!ip_rt_acct)
3473 panic("IP: failed to allocate ip_rt_acct\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003474#endif
3475
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07003476 ipv4_dst_ops.kmem_cachep =
3477 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09003478 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003479
David S. Miller14e50e52007-05-24 18:17:54 -07003480 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
3481
Eric Dumazetfc66f952010-10-08 06:37:34 +00003482 if (dst_entries_init(&ipv4_dst_ops) < 0)
3483 panic("IP: failed to allocate ipv4_dst_ops counter\n");
3484
3485 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
3486 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
3487
David S. Miller89aef892012-07-17 11:00:09 -07003488 ipv4_dst_ops.gc_thresh = ~0;
3489 ip_rt_max_size = INT_MAX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003490
Linus Torvalds1da177e2005-04-16 15:20:36 -07003491 devinet_init();
3492 ip_fib_init();
3493
Denis V. Lunev73b38712008-02-28 20:51:18 -08003494 if (ip_rt_proc_init())
Joe Perches058bd4d2012-03-11 18:36:11 +00003495 pr_err("Unable to create route proc files\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003496#ifdef CONFIG_XFRM
3497 xfrm_init();
Steffen Klassert703fb942012-11-13 08:52:24 +01003498 xfrm4_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003499#endif
Florian Westphal394f51a2017-08-15 16:34:44 +02003500 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL,
3501 RTNL_FLAG_DOIT_UNLOCKED);
Thomas Graf63f34442007-03-22 11:55:17 -07003502
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003503#ifdef CONFIG_SYSCTL
3504 register_pernet_subsys(&sysctl_route_ops);
3505#endif
Neil Horman3ee94372010-05-08 01:57:52 -07003506 register_pernet_subsys(&rt_genid_ops);
David S. Millerc3426b42012-06-09 16:27:05 -07003507 register_pernet_subsys(&ipv4_inetpeer_ops);
Tim Hansen1bcdca32017-10-04 15:59:49 -04003508 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003509}
3510
Al Viroa1bc6eb2008-07-30 06:32:52 -04003511#ifdef CONFIG_SYSCTL
Al Viroeeb61f72008-07-27 08:59:33 +01003512/*
3513 * We really need to sanitize the damn ipv4 init order, then all
3514 * this nonsense will go away.
3515 */
3516void __init ip_static_sysctl_init(void)
3517{
Eric W. Biederman4e5ca782012-04-19 13:32:39 +00003518 register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
Al Viroeeb61f72008-07-27 08:59:33 +01003519}
Al Viroa1bc6eb2008-07-30 06:32:52 -04003520#endif