blob: 99c06944501ab1a8de0960acfdc9f1825b7079b1 [file] [log] [blame]
Thomas Gleixner2874c5f2019-05-27 08:55:01 +02001// SPDX-License-Identifier: GPL-2.0-or-later
Linus Torvalds1da177e2005-04-16 15:20:36 -07002/*
3 * INET An implementation of the TCP/IP protocol suite for the LINUX
4 * operating system. INET is implemented using the BSD Socket
5 * interface as the means of communication with the user level.
6 *
7 * ROUTE - implementation of the IP router.
8 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07009 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -070010 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
11 * Alan Cox, <gw4pts@gw4pts.ampr.org>
12 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
13 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
14 *
15 * Fixes:
16 * Alan Cox : Verify area fixes.
17 * Alan Cox : cli() protects routing changes
18 * Rui Oliveira : ICMP routing table updates
19 * (rco@di.uminho.pt) Routing table insertion and update
20 * Linus Torvalds : Rewrote bits to be sensible
21 * Alan Cox : Added BSD route gw semantics
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090022 * Alan Cox : Super /proc >4K
Linus Torvalds1da177e2005-04-16 15:20:36 -070023 * Alan Cox : MTU in route table
Shubhankar Kuranagatti6b9c8f42021-03-11 02:43:43 +053024 * Alan Cox : MSS actually. Also added the window
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 * clamper.
26 * Sam Lantinga : Fixed route matching in rt_del()
27 * Alan Cox : Routing cache support.
28 * Alan Cox : Removed compatibility cruft.
29 * Alan Cox : RTF_REJECT support.
30 * Alan Cox : TCP irtt support.
31 * Jonathan Naylor : Added Metric support.
32 * Miquel van Smoorenburg : BSD API fixes.
33 * Miquel van Smoorenburg : Metrics.
34 * Alan Cox : Use __u32 properly
35 * Alan Cox : Aligned routing errors more closely with BSD
36 * our system is still very different.
37 * Alan Cox : Faster /proc handling
38 * Alexey Kuznetsov : Massive rework to support tree based routing,
39 * routing caches and better behaviour.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090040 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070041 * Olaf Erb : irtt wasn't being copied right.
42 * Bjorn Ekwall : Kerneld route support.
43 * Alan Cox : Multicast fixed (I hope)
Shubhankar Kuranagatti6b9c8f42021-03-11 02:43:43 +053044 * Pavel Krauz : Limited broadcast fixed
Linus Torvalds1da177e2005-04-16 15:20:36 -070045 * Mike McLagan : Routing by source
46 * Alexey Kuznetsov : End of old history. Split to fib.c and
47 * route.c and rewritten from scratch.
48 * Andi Kleen : Load-limit warning messages.
49 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
50 * Vitaly E. Lavrov : Race condition in ip_route_input_slow.
51 * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow.
52 * Vladimir V. Ivanov : IP rule info (flowid) is really useful.
53 * Marc Boucher : routing by fwmark
54 * Robert Olsson : Added rt_cache statistics
55 * Arnaldo C. Melo : Convert proc stuff to seq_file
Eric Dumazetbb1d23b2005-07-05 15:00:32 -070056 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
Shubhankar Kuranagatti6b9c8f42021-03-11 02:43:43 +053057 * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect
58 * Ilia Sotnikov : Removed TOS from hash calculations
Linus Torvalds1da177e2005-04-16 15:20:36 -070059 */
60
Joe Perchesafd465032012-03-12 07:03:32 +000061#define pr_fmt(fmt) "IPv4: " fmt
62
Linus Torvalds1da177e2005-04-16 15:20:36 -070063#include <linux/module.h>
Linus Torvalds7c0f6ba2016-12-24 11:46:01 -080064#include <linux/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070065#include <linux/bitops.h>
66#include <linux/types.h>
67#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070068#include <linux/mm.h>
Eric Dumazetaa6dd212021-03-24 14:53:37 -070069#include <linux/memblock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070070#include <linux/string.h>
71#include <linux/socket.h>
72#include <linux/sockios.h>
73#include <linux/errno.h>
74#include <linux/in.h>
75#include <linux/inet.h>
76#include <linux/netdevice.h>
77#include <linux/proc_fs.h>
78#include <linux/init.h>
79#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070080#include <linux/inetdevice.h>
81#include <linux/igmp.h>
82#include <linux/pkt_sched.h>
83#include <linux/mroute.h>
84#include <linux/netfilter_ipv4.h>
85#include <linux/random.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070086#include <linux/rcupdate.h>
87#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090088#include <linux/slab.h>
Eric Dumazet73f156a2014-06-02 05:26:03 -070089#include <linux/jhash.h>
Herbert Xu352e5122007-11-13 21:34:06 -080090#include <net/dst.h>
Thomas Graf1b7179d2015-07-21 10:43:59 +020091#include <net/dst_metadata.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020092#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070093#include <net/protocol.h>
94#include <net/ip.h>
95#include <net/route.h>
96#include <net/inetpeer.h>
97#include <net/sock.h>
98#include <net/ip_fib.h>
David Ahern5481d732019-06-03 20:19:49 -070099#include <net/nexthop.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100#include <net/arp.h>
101#include <net/tcp.h>
102#include <net/icmp.h>
103#include <net/xfrm.h>
Roopa Prabhu571e7222015-07-21 10:43:47 +0200104#include <net/lwtunnel.h>
Tom Tucker8d717402006-07-30 20:43:36 -0700105#include <net/netevent.h>
Thomas Graf63f34442007-03-22 11:55:17 -0700106#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107#ifdef CONFIG_SYSCTL
108#include <linux/sysctl.h>
109#endif
David S. Miller6e5714e2011-08-03 20:50:44 -0700110#include <net/secure_seq.h>
Thomas Graf1b7179d2015-07-21 10:43:59 +0200111#include <net/ip_tunnels.h>
David Ahern385add92015-09-29 20:07:13 -0700112#include <net/l3mdev.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700113
Roopa Prabhub6179812017-05-25 10:42:39 -0700114#include "fib_lookup.h"
115
David S. Miller68a5e3d2011-03-11 20:07:33 -0500116#define RT_FL_TOS(oldflp4) \
Julian Anastasovf61759e2011-12-02 11:39:42 +0000117 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119#define RT_GC_TIMEOUT (300*HZ)
120
Linus Torvalds1da177e2005-04-16 15:20:36 -0700121static int ip_rt_max_size;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700122static int ip_rt_redirect_number __read_mostly = 9;
123static int ip_rt_redirect_load __read_mostly = HZ / 50;
124static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
125static int ip_rt_error_cost __read_mostly = HZ;
126static int ip_rt_error_burst __read_mostly = 5 * HZ;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700127static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
Sabrina Dubrocac7272c22018-02-26 16:13:43 +0100128static u32 ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700129static int ip_rt_min_advmss __read_mostly = 256;
Eric Dumazet9f28a2f2011-12-21 15:47:16 -0500130
Xin Longdeed49d2016-02-18 21:21:19 +0800131static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
Sabrina Dubrocac7272c22018-02-26 16:13:43 +0100132
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133/*
134 * Interface to generic destination cache.
135 */
136
Brian Vazquezbbd807d2021-02-01 17:41:32 +0000137INDIRECT_CALLABLE_SCOPE
138struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -0800139static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
Brian Vazquezf67fbea2021-02-01 17:41:31 +0000140INDIRECT_CALLABLE_SCOPE
141unsigned int ipv4_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
143static void ipv4_link_failure(struct sk_buff *skb);
David S. Miller6700c272012-07-17 03:29:28 -0700144static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
Hangbin Liubd085ef2019-12-22 10:51:09 +0800145 struct sk_buff *skb, u32 mtu,
146 bool confirm_neigh);
David S. Miller6700c272012-07-17 03:29:28 -0700147static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
148 struct sk_buff *skb);
David S. Millercaacf052012-07-31 15:06:50 -0700149static void ipv4_dst_destroy(struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150
David S. Miller62fa8a82011-01-26 20:51:05 -0800151static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
152{
David S. Miller31248732012-07-10 07:08:18 -0700153 WARN_ON(1);
154 return NULL;
David S. Miller62fa8a82011-01-26 20:51:05 -0800155}
156
David S. Millerf894cbf2012-07-02 21:52:24 -0700157static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
158 struct sk_buff *skb,
159 const void *daddr);
Julian Anastasov63fca652017-02-06 23:14:15 +0200160static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700161
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162static struct dst_ops ipv4_dst_ops = {
163 .family = AF_INET,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164 .check = ipv4_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800165 .default_advmss = ipv4_default_advmss,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000166 .mtu = ipv4_mtu,
David S. Miller62fa8a82011-01-26 20:51:05 -0800167 .cow_metrics = ipv4_cow_metrics,
David S. Millercaacf052012-07-31 15:06:50 -0700168 .destroy = ipv4_dst_destroy,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169 .negative_advice = ipv4_negative_advice,
170 .link_failure = ipv4_link_failure,
171 .update_pmtu = ip_rt_update_pmtu,
David S. Millere47a1852012-07-11 20:55:47 -0700172 .redirect = ip_do_redirect,
Eric W. Biedermanb92dacd2015-10-07 16:48:37 -0500173 .local_out = __ip_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700174 .neigh_lookup = ipv4_neigh_lookup,
Julian Anastasov63fca652017-02-06 23:14:15 +0200175 .confirm_neigh = ipv4_confirm_neigh,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176};
177
178#define ECN_OR_COST(class) TC_PRIO_##class
179
Philippe De Muyter4839c522007-07-09 15:32:57 -0700180const __u8 ip_tos2prio[16] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181 TC_PRIO_BESTEFFORT,
Dan Siemon4a2b9c32011-03-15 13:56:07 +0000182 ECN_OR_COST(BESTEFFORT),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183 TC_PRIO_BESTEFFORT,
184 ECN_OR_COST(BESTEFFORT),
185 TC_PRIO_BULK,
186 ECN_OR_COST(BULK),
187 TC_PRIO_BULK,
188 ECN_OR_COST(BULK),
189 TC_PRIO_INTERACTIVE,
190 ECN_OR_COST(INTERACTIVE),
191 TC_PRIO_INTERACTIVE,
192 ECN_OR_COST(INTERACTIVE),
193 TC_PRIO_INTERACTIVE_BULK,
194 ECN_OR_COST(INTERACTIVE_BULK),
195 TC_PRIO_INTERACTIVE_BULK,
196 ECN_OR_COST(INTERACTIVE_BULK)
197};
Amir Vadaid4a96862012-04-04 21:33:28 +0000198EXPORT_SYMBOL(ip_tos2prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199
Eric Dumazet2f970d82006-01-17 02:54:36 -0800200static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
Christoph Lameter3ed66e92014-04-07 15:39:40 -0700201#define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
205{
Eric Dumazet29e75252008-01-31 17:05:09 -0800206 if (*pos)
David S. Miller89aef892012-07-17 11:00:09 -0700207 return NULL;
Eric Dumazet29e75252008-01-31 17:05:09 -0800208 return SEQ_START_TOKEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209}
210
211static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
212{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213 ++*pos;
David S. Miller89aef892012-07-17 11:00:09 -0700214 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215}
216
217static void rt_cache_seq_stop(struct seq_file *seq, void *v)
218{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700219}
220
221static int rt_cache_seq_show(struct seq_file *seq, void *v)
222{
223 if (v == SEQ_START_TOKEN)
224 seq_printf(seq, "%-127s\n",
225 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
226 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
227 "HHUptod\tSpecDst");
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900228 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229}
230
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700231static const struct seq_operations rt_cache_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232 .start = rt_cache_seq_start,
233 .next = rt_cache_seq_next,
234 .stop = rt_cache_seq_stop,
235 .show = rt_cache_seq_show,
236};
237
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
239{
240 int cpu;
241
242 if (*pos == 0)
243 return SEQ_START_TOKEN;
244
Rusty Russell0f23174a2008-12-29 12:23:42 +0000245 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246 if (!cpu_possible(cpu))
247 continue;
248 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800249 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250 }
251 return NULL;
252}
253
254static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
255{
256 int cpu;
257
Rusty Russell0f23174a2008-12-29 12:23:42 +0000258 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 if (!cpu_possible(cpu))
260 continue;
261 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800262 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263 }
Vasily Averina3ea8672020-01-23 10:11:35 +0300264 (*pos)++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265 return NULL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900266
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267}
268
269static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
270{
271
272}
273
274static int rt_cpu_seq_show(struct seq_file *seq, void *v)
275{
276 struct rt_cache_stat *st = v;
277
278 if (v == SEQ_START_TOKEN) {
Olaf Rempel5bec0032005-04-28 12:16:08 -0700279 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280 return 0;
281 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900282
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
284 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
Eric Dumazetfc66f952010-10-08 06:37:34 +0000285 dst_entries_get_slow(&ipv4_dst_ops),
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700286 0, /* st->in_hit */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287 st->in_slow_tot,
288 st->in_slow_mc,
289 st->in_no_route,
290 st->in_brd,
291 st->in_martian_dst,
292 st->in_martian_src,
293
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700294 0, /* st->out_hit */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295 st->out_slow_tot,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900296 st->out_slow_mc,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700297
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700298 0, /* st->gc_total */
299 0, /* st->gc_ignored */
300 0, /* st->gc_goal_miss */
301 0, /* st->gc_dst_overflow */
302 0, /* st->in_hlist_search */
303 0 /* st->out_hlist_search */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304 );
305 return 0;
306}
307
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700308static const struct seq_operations rt_cpu_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309 .start = rt_cpu_seq_start,
310 .next = rt_cpu_seq_next,
311 .stop = rt_cpu_seq_stop,
312 .show = rt_cpu_seq_show,
313};
314
Patrick McHardyc7066f72011-01-14 13:36:42 +0100315#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800316static int rt_acct_proc_show(struct seq_file *m, void *v)
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800317{
Alexey Dobriyana661c412009-11-25 15:40:35 -0800318 struct ip_rt_acct *dst, *src;
319 unsigned int i, j;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800320
Alexey Dobriyana661c412009-11-25 15:40:35 -0800321 dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
322 if (!dst)
323 return -ENOMEM;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800324
Alexey Dobriyana661c412009-11-25 15:40:35 -0800325 for_each_possible_cpu(i) {
326 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
327 for (j = 0; j < 256; j++) {
328 dst[j].o_bytes += src[j].o_bytes;
329 dst[j].o_packets += src[j].o_packets;
330 dst[j].i_bytes += src[j].i_bytes;
331 dst[j].i_packets += src[j].i_packets;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800332 }
333 }
Alexey Dobriyana661c412009-11-25 15:40:35 -0800334
335 seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
336 kfree(dst);
337 return 0;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800338}
339#endif
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800340
Denis V. Lunev73b38712008-02-28 20:51:18 -0800341static int __net_init ip_rt_do_proc_init(struct net *net)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800342{
343 struct proc_dir_entry *pde;
344
Yejune Dengf105f262021-03-16 10:57:36 +0800345 pde = proc_create_seq("rt_cache", 0444, net->proc_net,
346 &rt_cache_seq_ops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800347 if (!pde)
348 goto err1;
349
Yejune Dengf105f262021-03-16 10:57:36 +0800350 pde = proc_create_seq("rt_cache", 0444, net->proc_net_stat,
351 &rt_cpu_seq_ops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800352 if (!pde)
353 goto err2;
354
Patrick McHardyc7066f72011-01-14 13:36:42 +0100355#ifdef CONFIG_IP_ROUTE_CLASSID
Christoph Hellwig3f3942a2018-05-15 15:57:23 +0200356 pde = proc_create_single("rt_acct", 0, net->proc_net,
357 rt_acct_proc_show);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800358 if (!pde)
359 goto err3;
360#endif
361 return 0;
362
Patrick McHardyc7066f72011-01-14 13:36:42 +0100363#ifdef CONFIG_IP_ROUTE_CLASSID
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800364err3:
365 remove_proc_entry("rt_cache", net->proc_net_stat);
366#endif
367err2:
368 remove_proc_entry("rt_cache", net->proc_net);
369err1:
370 return -ENOMEM;
371}
Denis V. Lunev73b38712008-02-28 20:51:18 -0800372
373static void __net_exit ip_rt_do_proc_exit(struct net *net)
374{
375 remove_proc_entry("rt_cache", net->proc_net_stat);
376 remove_proc_entry("rt_cache", net->proc_net);
Patrick McHardyc7066f72011-01-14 13:36:42 +0100377#ifdef CONFIG_IP_ROUTE_CLASSID
Denis V. Lunev73b38712008-02-28 20:51:18 -0800378 remove_proc_entry("rt_acct", net->proc_net);
Alexey Dobriyan0a931ac2010-01-17 03:32:50 +0000379#endif
Denis V. Lunev73b38712008-02-28 20:51:18 -0800380}
381
382static struct pernet_operations ip_rt_proc_ops __net_initdata = {
383 .init = ip_rt_do_proc_init,
384 .exit = ip_rt_do_proc_exit,
385};
386
387static int __init ip_rt_proc_init(void)
388{
389 return register_pernet_subsys(&ip_rt_proc_ops);
390}
391
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800392#else
Denis V. Lunev73b38712008-02-28 20:51:18 -0800393static inline int ip_rt_proc_init(void)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800394{
395 return 0;
396}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397#endif /* CONFIG_PROC_FS */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900398
Eric Dumazet4331deb2012-07-25 05:11:23 +0000399static inline bool rt_is_expired(const struct rtable *rth)
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700400{
fan.duca4c3fc2013-07-30 08:33:53 +0800401 return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev));
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700402}
403
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +0000404void rt_cache_flush(struct net *net)
Eric Dumazet29e75252008-01-31 17:05:09 -0800405{
fan.duca4c3fc2013-07-30 08:33:53 +0800406 rt_genid_bump_ipv4(net);
Eric Dumazet98376382010-03-08 03:20:00 +0000407}
408
David S. Millerf894cbf2012-07-02 21:52:24 -0700409static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
410 struct sk_buff *skb,
411 const void *daddr)
David Miller3769cff2011-07-11 22:44:24 +0000412{
David Ahern1550c172019-04-05 16:30:27 -0700413 const struct rtable *rt = container_of(dst, struct rtable, dst);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700414 struct net_device *dev = dst->dev;
David Miller3769cff2011-07-11 22:44:24 +0000415 struct neighbour *n;
416
David Ahern5c9f7c12019-04-05 16:30:34 -0700417 rcu_read_lock_bh();
David S. Millerd3aaeb32011-07-18 00:40:17 -0700418
David Ahern5c9f7c12019-04-05 16:30:34 -0700419 if (likely(rt->rt_gw_family == AF_INET)) {
420 n = ip_neigh_gw4(dev, rt->rt_gw4);
421 } else if (rt->rt_gw_family == AF_INET6) {
422 n = ip_neigh_gw6(dev, &rt->rt_gw6);
423 } else {
424 __be32 pkey;
425
426 pkey = skb ? ip_hdr(skb)->daddr : *((__be32 *) daddr);
427 n = ip_neigh_gw4(dev, pkey);
428 }
429
Ido Schimmel537de0c2019-07-04 19:26:38 +0300430 if (!IS_ERR(n) && !refcount_inc_not_zero(&n->refcnt))
David Ahern5c9f7c12019-04-05 16:30:34 -0700431 n = NULL;
432
433 rcu_read_unlock_bh();
434
435 return n;
David S. Millerd3aaeb32011-07-18 00:40:17 -0700436}
437
Julian Anastasov63fca652017-02-06 23:14:15 +0200438static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)
439{
David Ahern1550c172019-04-05 16:30:27 -0700440 const struct rtable *rt = container_of(dst, struct rtable, dst);
Julian Anastasov63fca652017-02-06 23:14:15 +0200441 struct net_device *dev = dst->dev;
442 const __be32 *pkey = daddr;
Julian Anastasov63fca652017-02-06 23:14:15 +0200443
David Ahern6de9c052019-04-05 16:30:36 -0700444 if (rt->rt_gw_family == AF_INET) {
David Ahern1550c172019-04-05 16:30:27 -0700445 pkey = (const __be32 *)&rt->rt_gw4;
David Ahern6de9c052019-04-05 16:30:36 -0700446 } else if (rt->rt_gw_family == AF_INET6) {
447 return __ipv6_confirm_neigh_stub(dev, &rt->rt_gw6);
448 } else if (!daddr ||
Julian Anastasov63fca652017-02-06 23:14:15 +0200449 (rt->rt_flags &
David Ahern6de9c052019-04-05 16:30:36 -0700450 (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) {
Julian Anastasov63fca652017-02-06 23:14:15 +0200451 return;
David Ahern6de9c052019-04-05 16:30:36 -0700452 }
Julian Anastasov63fca652017-02-06 23:14:15 +0200453 __ipv4_confirm_neigh(dev, *(__force u32 *)pkey);
454}
455
Eric Dumazetaa6dd212021-03-24 14:53:37 -0700456/* Hash tables of size 2048..262144 depending on RAM size.
457 * Each bucket uses 8 bytes.
458 */
459static u32 ip_idents_mask __read_mostly;
Eric Dumazet355b590c2015-05-01 10:37:49 -0700460static atomic_t *ip_idents __read_mostly;
461static u32 *ip_tstamps __read_mostly;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200462
463/* In order to protect privacy, we add a perturbation to identifiers
464 * if one generator is seldom used. This makes hard for an attacker
465 * to infer how many packets were sent between two points in time.
466 */
467u32 ip_idents_reserve(u32 hash, int segs)
468{
Eric Dumazetaa6dd212021-03-24 14:53:37 -0700469 u32 bucket, old, now = (u32)jiffies;
470 atomic_t *p_id;
471 u32 *p_tstamp;
Yuqi Jina6211ca2020-05-16 11:46:49 +0800472 u32 delta = 0;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200473
Eric Dumazetaa6dd212021-03-24 14:53:37 -0700474 bucket = hash & ip_idents_mask;
475 p_tstamp = ip_tstamps + bucket;
476 p_id = ip_idents + bucket;
477 old = READ_ONCE(*p_tstamp);
478
Eric Dumazet355b590c2015-05-01 10:37:49 -0700479 if (old != now && cmpxchg(p_tstamp, old, now) == old)
Eric Dumazet04ca6972014-07-26 08:58:10 +0200480 delta = prandom_u32_max(now - old);
481
Yuqi Jina6211ca2020-05-16 11:46:49 +0800482 /* If UBSAN reports an error there, please make sure your compiler
483 * supports -fno-strict-overflow before reporting it that was a bug
484 * in UBSAN, and it has been fixed in GCC-8.
485 */
486 return atomic_add_return(segs + delta, p_id) - segs;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200487}
488EXPORT_SYMBOL(ip_idents_reserve);
Eric Dumazet73f156a2014-06-02 05:26:03 -0700489
Hannes Frederic Sowab6a77192015-03-25 17:07:44 +0100490void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491{
Eric Dumazet73f156a2014-06-02 05:26:03 -0700492 u32 hash, id;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493
Eric Dumazetdf453702019-03-27 12:40:33 -0700494 /* Note the following code is not safe, but this is okay. */
495 if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key)))
496 get_random_bytes(&net->ipv4.ip_id_key,
497 sizeof(net->ipv4.ip_id_key));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498
Eric Dumazetdf453702019-03-27 12:40:33 -0700499 hash = siphash_3u32((__force u32)iph->daddr,
Eric Dumazet04ca6972014-07-26 08:58:10 +0200500 (__force u32)iph->saddr,
Eric Dumazetdf453702019-03-27 12:40:33 -0700501 iph->protocol,
502 &net->ipv4.ip_id_key);
Eric Dumazet73f156a2014-06-02 05:26:03 -0700503 id = ip_idents_reserve(hash, segs);
504 iph->id = htons(id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700505}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000506EXPORT_SYMBOL(__ip_select_ident);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700507
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900508static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
509 const struct sock *sk,
David S. Miller4895c772012-07-17 04:19:00 -0700510 const struct iphdr *iph,
511 int oif, u8 tos,
512 u8 prot, u32 mark, int flow_flags)
513{
514 if (sk) {
515 const struct inet_sock *inet = inet_sk(sk);
516
517 oif = sk->sk_bound_dev_if;
518 mark = sk->sk_mark;
519 tos = RT_CONN_FLAGS(sk);
520 prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
521 }
522 flowi4_init_output(fl4, oif, mark, tos,
523 RT_SCOPE_UNIVERSE, prot,
524 flow_flags,
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900525 iph->daddr, iph->saddr, 0, 0,
526 sock_net_uid(net, sk));
David S. Miller4895c772012-07-17 04:19:00 -0700527}
528
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200529static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
530 const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700531{
Lorenzo Colittid109e612016-11-30 02:56:47 +0900532 const struct net *net = dev_net(skb->dev);
David S. Miller4895c772012-07-17 04:19:00 -0700533 const struct iphdr *iph = ip_hdr(skb);
534 int oif = skb->dev->ifindex;
535 u8 tos = RT_TOS(iph->tos);
536 u8 prot = iph->protocol;
537 u32 mark = skb->mark;
538
Lorenzo Colittid109e612016-11-30 02:56:47 +0900539 __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0);
David S. Miller4895c772012-07-17 04:19:00 -0700540}
541
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200542static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700543{
544 const struct inet_sock *inet = inet_sk(sk);
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200545 const struct ip_options_rcu *inet_opt;
David S. Miller4895c772012-07-17 04:19:00 -0700546 __be32 daddr = inet->inet_daddr;
547
548 rcu_read_lock();
549 inet_opt = rcu_dereference(inet->inet_opt);
550 if (inet_opt && inet_opt->opt.srr)
551 daddr = inet_opt->opt.faddr;
552 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
553 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
554 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
555 inet_sk_flowi_flags(sk),
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900556 daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
David S. Miller4895c772012-07-17 04:19:00 -0700557 rcu_read_unlock();
558}
559
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200560static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
561 const struct sk_buff *skb)
David S. Miller4895c772012-07-17 04:19:00 -0700562{
563 if (skb)
564 build_skb_flow_key(fl4, skb, sk);
565 else
566 build_sk_flow_key(fl4, sk);
567}
568
David S. Millerc5038a82012-07-31 15:02:02 -0700569static DEFINE_SPINLOCK(fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700570
Timo Teräs2ffae992013-06-27 10:27:05 +0300571static void fnhe_flush_routes(struct fib_nh_exception *fnhe)
572{
573 struct rtable *rt;
574
575 rt = rcu_dereference(fnhe->fnhe_rth_input);
576 if (rt) {
577 RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL);
Wei Wang95c47f92017-06-17 10:42:30 -0700578 dst_dev_put(&rt->dst);
Wei Wang08301062017-06-17 10:42:29 -0700579 dst_release(&rt->dst);
Timo Teräs2ffae992013-06-27 10:27:05 +0300580 }
581 rt = rcu_dereference(fnhe->fnhe_rth_output);
582 if (rt) {
583 RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL);
Wei Wang95c47f92017-06-17 10:42:30 -0700584 dst_dev_put(&rt->dst);
Wei Wang08301062017-06-17 10:42:29 -0700585 dst_release(&rt->dst);
Timo Teräs2ffae992013-06-27 10:27:05 +0300586 }
587}
588
Julian Anastasovaee06da2012-07-18 10:15:35 +0000589static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
David S. Miller4895c772012-07-17 04:19:00 -0700590{
591 struct fib_nh_exception *fnhe, *oldest;
592
593 oldest = rcu_dereference(hash->chain);
594 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
595 fnhe = rcu_dereference(fnhe->fnhe_next)) {
596 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
597 oldest = fnhe;
598 }
Timo Teräs2ffae992013-06-27 10:27:05 +0300599 fnhe_flush_routes(oldest);
David S. Miller4895c772012-07-17 04:19:00 -0700600 return oldest;
601}
602
David S. Millerd3a25c92012-07-17 13:23:08 -0700603static inline u32 fnhe_hashfun(__be32 daddr)
604{
Eric Dumazetd546c622014-09-04 08:21:31 -0700605 static u32 fnhe_hashrnd __read_mostly;
David S. Millerd3a25c92012-07-17 13:23:08 -0700606 u32 hval;
607
Eric Dumazetd546c622014-09-04 08:21:31 -0700608 net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd));
Miaohe Lin5af68892020-08-29 05:21:30 -0400609 hval = jhash_1word((__force u32)daddr, fnhe_hashrnd);
Eric Dumazetd546c622014-09-04 08:21:31 -0700610 return hash_32(hval, FNHE_HASH_SHIFT);
David S. Millerd3a25c92012-07-17 13:23:08 -0700611}
612
Timo Teräs387aa652013-05-27 20:46:31 +0000613static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
614{
615 rt->rt_pmtu = fnhe->fnhe_pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100616 rt->rt_mtu_locked = fnhe->fnhe_mtu_locked;
Timo Teräs387aa652013-05-27 20:46:31 +0000617 rt->dst.expires = fnhe->fnhe_expires;
618
619 if (fnhe->fnhe_gw) {
620 rt->rt_flags |= RTCF_REDIRECTED;
David Ahern77d5bc72019-09-17 10:39:49 -0700621 rt->rt_uses_gateway = 1;
David Ahern1550c172019-04-05 16:30:27 -0700622 rt->rt_gw_family = AF_INET;
623 rt->rt_gw4 = fnhe->fnhe_gw;
Timo Teräs387aa652013-05-27 20:46:31 +0000624 }
625}
626
David Aherna5995e72019-04-30 07:45:50 -0700627static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr,
628 __be32 gw, u32 pmtu, bool lock,
629 unsigned long expires)
David S. Miller4895c772012-07-17 04:19:00 -0700630{
Julian Anastasovaee06da2012-07-18 10:15:35 +0000631 struct fnhe_hash_bucket *hash;
David S. Miller4895c772012-07-17 04:19:00 -0700632 struct fib_nh_exception *fnhe;
Timo Teräs387aa652013-05-27 20:46:31 +0000633 struct rtable *rt;
Xin Longcebe84c2017-11-17 14:27:18 +0800634 u32 genid, hval;
Timo Teräs387aa652013-05-27 20:46:31 +0000635 unsigned int i;
David S. Miller4895c772012-07-17 04:19:00 -0700636 int depth;
Xin Longcebe84c2017-11-17 14:27:18 +0800637
David Aherna5995e72019-04-30 07:45:50 -0700638 genid = fnhe_genid(dev_net(nhc->nhc_dev));
Xin Longcebe84c2017-11-17 14:27:18 +0800639 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -0700640
David S. Millerc5038a82012-07-31 15:02:02 -0700641 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000642
David Aherna5995e72019-04-30 07:45:50 -0700643 hash = rcu_dereference(nhc->nhc_exceptions);
David S. Miller4895c772012-07-17 04:19:00 -0700644 if (!hash) {
Kees Cook6396bb22018-06-12 14:03:40 -0700645 hash = kcalloc(FNHE_HASH_SIZE, sizeof(*hash), GFP_ATOMIC);
David S. Miller4895c772012-07-17 04:19:00 -0700646 if (!hash)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000647 goto out_unlock;
David Aherna5995e72019-04-30 07:45:50 -0700648 rcu_assign_pointer(nhc->nhc_exceptions, hash);
David S. Miller4895c772012-07-17 04:19:00 -0700649 }
650
David S. Miller4895c772012-07-17 04:19:00 -0700651 hash += hval;
652
653 depth = 0;
654 for (fnhe = rcu_dereference(hash->chain); fnhe;
655 fnhe = rcu_dereference(fnhe->fnhe_next)) {
656 if (fnhe->fnhe_daddr == daddr)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000657 break;
David S. Miller4895c772012-07-17 04:19:00 -0700658 depth++;
659 }
660
Julian Anastasovaee06da2012-07-18 10:15:35 +0000661 if (fnhe) {
Xin Longcebe84c2017-11-17 14:27:18 +0800662 if (fnhe->fnhe_genid != genid)
663 fnhe->fnhe_genid = genid;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000664 if (gw)
665 fnhe->fnhe_gw = gw;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100666 if (pmtu) {
Julian Anastasovaee06da2012-07-18 10:15:35 +0000667 fnhe->fnhe_pmtu = pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100668 fnhe->fnhe_mtu_locked = lock;
669 }
Xin Longe39d5242017-11-17 14:27:06 +0800670 fnhe->fnhe_expires = max(1UL, expires);
Timo Teräs387aa652013-05-27 20:46:31 +0000671 /* Update all cached dsts too */
Timo Teräs2ffae992013-06-27 10:27:05 +0300672 rt = rcu_dereference(fnhe->fnhe_rth_input);
673 if (rt)
674 fill_route_from_fnhe(rt, fnhe);
675 rt = rcu_dereference(fnhe->fnhe_rth_output);
Timo Teräs387aa652013-05-27 20:46:31 +0000676 if (rt)
677 fill_route_from_fnhe(rt, fnhe);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000678 } else {
679 if (depth > FNHE_RECLAIM_DEPTH)
680 fnhe = fnhe_oldest(hash);
681 else {
682 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
683 if (!fnhe)
684 goto out_unlock;
685
686 fnhe->fnhe_next = hash->chain;
687 rcu_assign_pointer(hash->chain, fnhe);
688 }
Xin Longcebe84c2017-11-17 14:27:18 +0800689 fnhe->fnhe_genid = genid;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000690 fnhe->fnhe_daddr = daddr;
691 fnhe->fnhe_gw = gw;
692 fnhe->fnhe_pmtu = pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100693 fnhe->fnhe_mtu_locked = lock;
Julian Anastasov94720e32018-05-02 09:41:19 +0300694 fnhe->fnhe_expires = max(1UL, expires);
Timo Teräs387aa652013-05-27 20:46:31 +0000695
696 /* Exception created; mark the cached routes for the nexthop
697 * stale, so anyone caching it rechecks if this exception
698 * applies to them.
699 */
David Ahern0f457a32019-04-30 07:45:48 -0700700 rt = rcu_dereference(nhc->nhc_rth_input);
Timo Teräs2ffae992013-06-27 10:27:05 +0300701 if (rt)
702 rt->dst.obsolete = DST_OBSOLETE_KILL;
703
Timo Teräs387aa652013-05-27 20:46:31 +0000704 for_each_possible_cpu(i) {
705 struct rtable __rcu **prt;
Shubhankar Kuranagatti6ad08602021-03-12 13:00:05 +0530706
David Ahern0f457a32019-04-30 07:45:48 -0700707 prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i);
Timo Teräs387aa652013-05-27 20:46:31 +0000708 rt = rcu_dereference(*prt);
709 if (rt)
710 rt->dst.obsolete = DST_OBSOLETE_KILL;
711 }
David S. Miller4895c772012-07-17 04:19:00 -0700712 }
David S. Miller4895c772012-07-17 04:19:00 -0700713
David S. Miller4895c772012-07-17 04:19:00 -0700714 fnhe->fnhe_stamp = jiffies;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000715
716out_unlock:
David S. Millerc5038a82012-07-31 15:02:02 -0700717 spin_unlock_bh(&fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700718}
719
David S. Millerceb33202012-07-17 11:31:28 -0700720static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
721 bool kill_route)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722{
David S. Millere47a1852012-07-11 20:55:47 -0700723 __be32 new_gw = icmp_hdr(skb)->un.gateway;
David S. Miller94206122012-07-11 20:38:08 -0700724 __be32 old_gw = ip_hdr(skb)->saddr;
David S. Millere47a1852012-07-11 20:55:47 -0700725 struct net_device *dev = skb->dev;
David S. Millere47a1852012-07-11 20:55:47 -0700726 struct in_device *in_dev;
David S. Miller4895c772012-07-17 04:19:00 -0700727 struct fib_result res;
David S. Millere47a1852012-07-11 20:55:47 -0700728 struct neighbour *n;
Denis V. Lunev317805b2008-02-28 20:50:06 -0800729 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730
David S. Miller94206122012-07-11 20:38:08 -0700731 switch (icmp_hdr(skb)->code & 7) {
732 case ICMP_REDIR_NET:
733 case ICMP_REDIR_NETTOS:
734 case ICMP_REDIR_HOST:
735 case ICMP_REDIR_HOSTTOS:
736 break;
737
738 default:
739 return;
740 }
741
David Ahern1550c172019-04-05 16:30:27 -0700742 if (rt->rt_gw_family != AF_INET || rt->rt_gw4 != old_gw)
David S. Millere47a1852012-07-11 20:55:47 -0700743 return;
744
745 in_dev = __in_dev_get_rcu(dev);
746 if (!in_dev)
747 return;
748
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900749 net = dev_net(dev);
Joe Perches9d4fb272009-11-23 10:41:23 -0800750 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
751 ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
752 ipv4_is_zeronet(new_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700753 goto reject_redirect;
754
755 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
756 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
757 goto reject_redirect;
758 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
759 goto reject_redirect;
760 } else {
Denis V. Lunev317805b2008-02-28 20:50:06 -0800761 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762 goto reject_redirect;
763 }
764
Stephen Suryaputra Lin969447f2016-11-10 11:16:15 -0500765 n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
766 if (!n)
767 n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
WANG Cong2c1a4312014-09-24 17:07:53 -0700768 if (!IS_ERR(n)) {
David S. Millere47a1852012-07-11 20:55:47 -0700769 if (!(n->nud_state & NUD_VALID)) {
770 neigh_event_send(n, NULL);
771 } else {
Andy Gospodarek0eeb0752015-06-23 13:45:37 -0400772 if (fib_lookup(net, fl4, &res, 0) == 0) {
David Ahern2fbc6e82020-09-14 21:03:54 -0600773 struct fib_nh_common *nhc;
David S. Miller4895c772012-07-17 04:19:00 -0700774
David Ahern2fbc6e82020-09-14 21:03:54 -0600775 fib_select_path(net, &res, fl4, skb);
776 nhc = FIB_RES_NHC(res);
David Aherna5995e72019-04-30 07:45:50 -0700777 update_or_create_fnhe(nhc, fl4->daddr, new_gw,
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100778 0, false,
779 jiffies + ip_rt_gc_timeout);
David S. Miller4895c772012-07-17 04:19:00 -0700780 }
David S. Millerceb33202012-07-17 11:31:28 -0700781 if (kill_route)
782 rt->dst.obsolete = DST_OBSOLETE_KILL;
David S. Millere47a1852012-07-11 20:55:47 -0700783 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
784 }
785 neigh_release(n);
786 }
787 return;
788
789reject_redirect:
790#ifdef CONFIG_IP_ROUTE_VERBOSE
David S. Miller99ee0382012-07-12 07:40:05 -0700791 if (IN_DEV_LOG_MARTIANS(in_dev)) {
792 const struct iphdr *iph = (const struct iphdr *) skb->data;
793 __be32 daddr = iph->daddr;
794 __be32 saddr = iph->saddr;
795
David S. Millere47a1852012-07-11 20:55:47 -0700796 net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
797 " Advised path = %pI4 -> %pI4\n",
798 &old_gw, dev->name, &new_gw,
799 &saddr, &daddr);
David S. Miller99ee0382012-07-12 07:40:05 -0700800 }
David S. Millere47a1852012-07-11 20:55:47 -0700801#endif
802 ;
803}
804
David S. Miller4895c772012-07-17 04:19:00 -0700805static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
806{
807 struct rtable *rt;
808 struct flowi4 fl4;
Michal Kubecekf96ef982013-05-28 08:26:49 +0200809 const struct iphdr *iph = (const struct iphdr *) skb->data;
Lorenzo Colitti7d995692016-12-23 00:33:57 +0900810 struct net *net = dev_net(skb->dev);
Michal Kubecekf96ef982013-05-28 08:26:49 +0200811 int oif = skb->dev->ifindex;
812 u8 tos = RT_TOS(iph->tos);
813 u8 prot = iph->protocol;
814 u32 mark = skb->mark;
David S. Miller4895c772012-07-17 04:19:00 -0700815
816 rt = (struct rtable *) dst;
817
Lorenzo Colitti7d995692016-12-23 00:33:57 +0900818 __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
David S. Millerceb33202012-07-17 11:31:28 -0700819 __ip_do_redirect(rt, skb, &fl4, true);
David S. Miller4895c772012-07-17 04:19:00 -0700820}
821
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
823{
Eric Dumazetee6b9672008-03-05 18:30:47 -0800824 struct rtable *rt = (struct rtable *)dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825 struct dst_entry *ret = dst;
826
827 if (rt) {
Timo Teräsd11a4dc2010-03-18 23:20:20 +0000828 if (dst->obsolete > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700829 ip_rt_put(rt);
830 ret = NULL;
David S. Miller59436342012-07-10 06:58:42 -0700831 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
832 rt->dst.expires) {
David S. Miller89aef892012-07-17 11:00:09 -0700833 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834 ret = NULL;
835 }
836 }
837 return ret;
838}
839
840/*
841 * Algorithm:
842 * 1. The first ip_rt_redirect_number redirects are sent
843 * with exponential backoff, then we stop sending them at all,
844 * assuming that the host ignores our redirects.
845 * 2. If we did not see packets requiring redirects
846 * during ip_rt_redirect_silence, we assume that the host
847 * forgot redirected route and start to send redirects again.
848 *
849 * This algorithm is much cheaper and more intelligent than dumb load limiting
850 * in icmp.c.
851 *
852 * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
853 * and "frag. need" (breaks PMTU discovery) in icmp.c.
854 */
855
856void ip_rt_send_redirect(struct sk_buff *skb)
857{
Eric Dumazet511c3f92009-06-02 05:14:27 +0000858 struct rtable *rt = skb_rtable(skb);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700859 struct in_device *in_dev;
David S. Miller92d86822011-02-04 15:55:25 -0800860 struct inet_peer *peer;
David S. Miller1d861aa2012-07-10 03:58:16 -0700861 struct net *net;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700862 int log_martians;
David Ahern192132b2015-08-27 16:07:03 -0700863 int vif;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700864
Eric Dumazet30038fc2009-08-28 23:52:01 -0700865 rcu_read_lock();
Changli Gaod8d1f302010-06-10 23:31:35 -0700866 in_dev = __in_dev_get_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700867 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
868 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700869 return;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700870 }
871 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
David Ahern385add92015-09-29 20:07:13 -0700872 vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700873 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874
David S. Miller1d861aa2012-07-10 03:58:16 -0700875 net = dev_net(rt->dst.dev);
David Ahern192132b2015-08-27 16:07:03 -0700876 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800877 if (!peer) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000878 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
879 rt_nexthop(rt, ip_hdr(skb)->daddr));
David S. Miller92d86822011-02-04 15:55:25 -0800880 return;
881 }
882
Linus Torvalds1da177e2005-04-16 15:20:36 -0700883 /* No redirected packets during ip_rt_redirect_silence;
884 * reset the algorithm.
885 */
Lorenzo Bianconic09551c62019-02-06 19:18:04 +0100886 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) {
David S. Miller92d86822011-02-04 15:55:25 -0800887 peer->rate_tokens = 0;
Lorenzo Bianconic09551c62019-02-06 19:18:04 +0100888 peer->n_redirects = 0;
889 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700890
891 /* Too many ignored redirects; do not send anything
Changli Gaod8d1f302010-06-10 23:31:35 -0700892 * set dst.rate_last to the last seen redirected packet.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700893 */
Lorenzo Bianconic09551c62019-02-06 19:18:04 +0100894 if (peer->n_redirects >= ip_rt_redirect_number) {
David S. Miller92d86822011-02-04 15:55:25 -0800895 peer->rate_last = jiffies;
David S. Miller1d861aa2012-07-10 03:58:16 -0700896 goto out_put_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700897 }
898
899 /* Check for load limit; set rate_last to the latest sent
900 * redirect.
901 */
Paolo Abeni57644432020-05-08 19:28:34 +0200902 if (peer->n_redirects == 0 ||
Li Yewang14fb8a72006-12-18 00:26:35 -0800903 time_after(jiffies,
David S. Miller92d86822011-02-04 15:55:25 -0800904 (peer->rate_last +
Paolo Abenib4064722019-10-04 15:11:17 +0200905 (ip_rt_redirect_load << peer->n_redirects)))) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000906 __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
907
908 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
David S. Miller92d86822011-02-04 15:55:25 -0800909 peer->rate_last = jiffies;
Lorenzo Bianconic09551c62019-02-06 19:18:04 +0100910 ++peer->n_redirects;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911#ifdef CONFIG_IP_ROUTE_VERBOSE
Eric Dumazet30038fc2009-08-28 23:52:01 -0700912 if (log_martians &&
Paolo Abenib4064722019-10-04 15:11:17 +0200913 peer->n_redirects == ip_rt_redirect_number)
Joe Perchese87cc472012-05-13 21:56:26 +0000914 net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
David S. Miller92101b32012-07-23 16:29:00 -0700915 &ip_hdr(skb)->saddr, inet_iif(skb),
Julian Anastasove81da0e2012-10-08 11:41:15 +0000916 &ip_hdr(skb)->daddr, &gw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700917#endif
918 }
David S. Miller1d861aa2012-07-10 03:58:16 -0700919out_put_peer:
920 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921}
922
923static int ip_error(struct sk_buff *skb)
924{
Eric Dumazet511c3f92009-06-02 05:14:27 +0000925 struct rtable *rt = skb_rtable(skb);
Stephen Suryaputrae2c0dc12018-02-28 12:20:44 -0500926 struct net_device *dev = skb->dev;
927 struct in_device *in_dev;
David S. Miller92d86822011-02-04 15:55:25 -0800928 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929 unsigned long now;
David S. Miller251da412012-06-26 16:27:09 -0700930 struct net *net;
David S. Miller92d86822011-02-04 15:55:25 -0800931 bool send;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700932 int code;
933
Stephen Suryaputrae2c0dc12018-02-28 12:20:44 -0500934 if (netif_is_l3_master(skb->dev)) {
935 dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif);
936 if (!dev)
937 goto out;
938 }
939
940 in_dev = __in_dev_get_rcu(dev);
941
Eric W. Biederman381c7592015-05-22 04:58:12 -0500942 /* IP on this device is disabled. */
943 if (!in_dev)
944 goto out;
945
David S. Miller251da412012-06-26 16:27:09 -0700946 net = dev_net(rt->dst.dev);
947 if (!IN_DEV_FORWARD(in_dev)) {
948 switch (rt->dst.error) {
949 case EHOSTUNREACH:
Eric Dumazetb45386e2016-04-27 16:44:35 -0700950 __IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS);
David S. Miller251da412012-06-26 16:27:09 -0700951 break;
952
953 case ENETUNREACH:
Eric Dumazetb45386e2016-04-27 16:44:35 -0700954 __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
David S. Miller251da412012-06-26 16:27:09 -0700955 break;
956 }
957 goto out;
958 }
959
Changli Gaod8d1f302010-06-10 23:31:35 -0700960 switch (rt->dst.error) {
Joe Perches4500ebf2011-07-01 09:43:07 +0000961 case EINVAL:
962 default:
963 goto out;
964 case EHOSTUNREACH:
965 code = ICMP_HOST_UNREACH;
966 break;
967 case ENETUNREACH:
968 code = ICMP_NET_UNREACH;
Eric Dumazetb45386e2016-04-27 16:44:35 -0700969 __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
Joe Perches4500ebf2011-07-01 09:43:07 +0000970 break;
971 case EACCES:
972 code = ICMP_PKT_FILTERED;
973 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700974 }
975
David Ahern192132b2015-08-27 16:07:03 -0700976 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
David Ahern385add92015-09-29 20:07:13 -0700977 l3mdev_master_ifindex(skb->dev), 1);
David S. Miller92d86822011-02-04 15:55:25 -0800978
979 send = true;
980 if (peer) {
981 now = jiffies;
982 peer->rate_tokens += now - peer->rate_last;
983 if (peer->rate_tokens > ip_rt_error_burst)
984 peer->rate_tokens = ip_rt_error_burst;
985 peer->rate_last = now;
986 if (peer->rate_tokens >= ip_rt_error_cost)
987 peer->rate_tokens -= ip_rt_error_cost;
988 else
989 send = false;
David S. Miller1d861aa2012-07-10 03:58:16 -0700990 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991 }
David S. Miller92d86822011-02-04 15:55:25 -0800992 if (send)
993 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994
995out: kfree_skb(skb);
996 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900997}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998
Steffen Klassertd851c122012-10-07 22:47:25 +0000999static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000{
Steffen Klassertd851c122012-10-07 22:47:25 +00001001 struct dst_entry *dst = &rt->dst;
David Ahern2fbc6e82020-09-14 21:03:54 -06001002 struct net *net = dev_net(dst->dev);
David S. Miller4895c772012-07-17 04:19:00 -07001003 struct fib_result res;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001004 bool lock = false;
Miaohe Lin8b4510d2020-08-24 23:20:28 -04001005 u32 old_mtu;
David S. Miller2c8cec52011-02-09 20:42:07 -08001006
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001007 if (ip_mtu_locked(dst))
Steffen Klassertfa1e4922013-01-16 20:58:10 +00001008 return;
1009
Miaohe Lin8b4510d2020-08-24 23:20:28 -04001010 old_mtu = ipv4_mtu(dst);
Sabrina Dubroca28d35bc2018-10-09 17:48:15 +02001011 if (old_mtu < mtu)
Li Wei3cdaa5b2015-01-29 16:09:03 +08001012 return;
1013
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001014 if (mtu < ip_rt_min_pmtu) {
1015 lock = true;
Sabrina Dubroca28d35bc2018-10-09 17:48:15 +02001016 mtu = min(old_mtu, ip_rt_min_pmtu);
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001017 }
Eric Dumazetfe6fe792011-06-08 06:07:07 +00001018
Sabrina Dubroca28d35bc2018-10-09 17:48:15 +02001019 if (rt->rt_pmtu == mtu && !lock &&
Timo Teräsf0162292013-05-27 20:46:32 +00001020 time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
1021 return;
1022
Eric Dumazetc5ae7d42012-08-28 12:33:07 +00001023 rcu_read_lock();
David Ahern2fbc6e82020-09-14 21:03:54 -06001024 if (fib_lookup(net, fl4, &res, 0) == 0) {
1025 struct fib_nh_common *nhc;
David S. Miller4895c772012-07-17 04:19:00 -07001026
David Ahern2fbc6e82020-09-14 21:03:54 -06001027 fib_select_path(net, &res, fl4, NULL);
1028 nhc = FIB_RES_NHC(res);
David Aherna5995e72019-04-30 07:45:50 -07001029 update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
Julian Anastasovaee06da2012-07-18 10:15:35 +00001030 jiffies + ip_rt_mtu_expires);
David S. Miller4895c772012-07-17 04:19:00 -07001031 }
Eric Dumazetc5ae7d42012-08-28 12:33:07 +00001032 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001033}
1034
David S. Miller4895c772012-07-17 04:19:00 -07001035static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
Hangbin Liubd085ef2019-12-22 10:51:09 +08001036 struct sk_buff *skb, u32 mtu,
1037 bool confirm_neigh)
David S. Miller4895c772012-07-17 04:19:00 -07001038{
1039 struct rtable *rt = (struct rtable *) dst;
1040 struct flowi4 fl4;
1041
1042 ip_rt_build_flow_key(&fl4, sk, skb);
Stefano Briviodf23bb12020-08-04 07:53:42 +02001043
1044 /* Don't make lookup fail for bridged encapsulations */
1045 if (skb && netif_is_any_bridge_port(skb->dev))
1046 fl4.flowi4_oif = 0;
1047
Steffen Klassertd851c122012-10-07 22:47:25 +00001048 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller4895c772012-07-17 04:19:00 -07001049}
1050
David S. Miller36393392012-06-14 22:21:46 -07001051void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
Maciej Żenczykowskid888f392018-09-25 20:56:26 -07001052 int oif, u8 protocol)
David S. Miller36393392012-06-14 22:21:46 -07001053{
Miaohe Lin5af68892020-08-29 05:21:30 -04001054 const struct iphdr *iph = (const struct iphdr *)skb->data;
David S. Miller36393392012-06-14 22:21:46 -07001055 struct flowi4 fl4;
1056 struct rtable *rt;
Maciej Żenczykowskid888f392018-09-25 20:56:26 -07001057 u32 mark = IP4_REPLY_MARK(net, skb->mark);
Lorenzo Colitti1b3c61d2014-05-13 10:17:34 -07001058
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001059 __build_flow_key(net, &fl4, NULL, iph, oif,
Maciej Żenczykowskid888f392018-09-25 20:56:26 -07001060 RT_TOS(iph->tos), protocol, mark, 0);
David S. Miller36393392012-06-14 22:21:46 -07001061 rt = __ip_route_output_key(net, &fl4);
1062 if (!IS_ERR(rt)) {
David S. Miller4895c772012-07-17 04:19:00 -07001063 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller36393392012-06-14 22:21:46 -07001064 ip_rt_put(rt);
1065 }
1066}
1067EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
1068
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001069static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
David S. Miller36393392012-06-14 22:21:46 -07001070{
Miaohe Lin343d8c62020-08-25 08:32:11 -04001071 const struct iphdr *iph = (const struct iphdr *)skb->data;
David S. Miller4895c772012-07-17 04:19:00 -07001072 struct flowi4 fl4;
1073 struct rtable *rt;
David S. Miller36393392012-06-14 22:21:46 -07001074
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001075 __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0);
Lorenzo Colitti1b3c61d2014-05-13 10:17:34 -07001076
1077 if (!fl4.flowi4_mark)
1078 fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
1079
David S. Miller4895c772012-07-17 04:19:00 -07001080 rt = __ip_route_output_key(sock_net(sk), &fl4);
1081 if (!IS_ERR(rt)) {
1082 __ip_rt_update_pmtu(rt, &fl4, mtu);
1083 ip_rt_put(rt);
1084 }
David S. Miller36393392012-06-14 22:21:46 -07001085}
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001086
1087void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1088{
Miaohe Lin5af68892020-08-29 05:21:30 -04001089 const struct iphdr *iph = (const struct iphdr *)skb->data;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001090 struct flowi4 fl4;
1091 struct rtable *rt;
Eric Dumazet7f502362014-06-30 01:26:23 -07001092 struct dst_entry *odst = NULL;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001093 bool new = false;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001094 struct net *net = sock_net(sk);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001095
1096 bh_lock_sock(sk);
Hannes Frederic Sowa482fc602013-11-05 02:24:17 +01001097
1098 if (!ip_sk_accept_pmtu(sk))
1099 goto out;
1100
Eric Dumazet7f502362014-06-30 01:26:23 -07001101 odst = sk_dst_get(sk);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001102
Eric Dumazet7f502362014-06-30 01:26:23 -07001103 if (sock_owned_by_user(sk) || !odst) {
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001104 __ipv4_sk_update_pmtu(skb, sk, mtu);
1105 goto out;
1106 }
1107
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001108 __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001109
Eric Dumazet7f502362014-06-30 01:26:23 -07001110 rt = (struct rtable *)odst;
Ian Morris51456b22015-04-03 09:17:26 +01001111 if (odst->obsolete && !odst->ops->check(odst, 0)) {
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001112 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1113 if (IS_ERR(rt))
1114 goto out;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001115
1116 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001117 }
1118
Miaohe Lin343d8c62020-08-25 08:32:11 -04001119 __ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001120
Eric Dumazet7f502362014-06-30 01:26:23 -07001121 if (!dst_check(&rt->dst, 0)) {
Steffen Klassertb44108d2013-01-22 00:01:28 +00001122 if (new)
1123 dst_release(&rt->dst);
1124
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001125 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1126 if (IS_ERR(rt))
1127 goto out;
1128
Steffen Klassertb44108d2013-01-22 00:01:28 +00001129 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001130 }
1131
Steffen Klassertb44108d2013-01-22 00:01:28 +00001132 if (new)
Eric Dumazet7f502362014-06-30 01:26:23 -07001133 sk_dst_set(sk, &rt->dst);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001134
1135out:
1136 bh_unlock_sock(sk);
Eric Dumazet7f502362014-06-30 01:26:23 -07001137 dst_release(odst);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001138}
David S. Miller36393392012-06-14 22:21:46 -07001139EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
David S. Millerf39925d2011-02-09 22:00:16 -08001140
David S. Millerb42597e2012-07-11 21:25:45 -07001141void ipv4_redirect(struct sk_buff *skb, struct net *net,
Maciej Żenczykowski1042caa2018-09-25 20:56:27 -07001142 int oif, u8 protocol)
David S. Millerb42597e2012-07-11 21:25:45 -07001143{
Miaohe Lin5af68892020-08-29 05:21:30 -04001144 const struct iphdr *iph = (const struct iphdr *)skb->data;
David S. Millerb42597e2012-07-11 21:25:45 -07001145 struct flowi4 fl4;
1146 struct rtable *rt;
1147
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001148 __build_flow_key(net, &fl4, NULL, iph, oif,
Maciej Żenczykowski1042caa2018-09-25 20:56:27 -07001149 RT_TOS(iph->tos), protocol, 0, 0);
David S. Millerb42597e2012-07-11 21:25:45 -07001150 rt = __ip_route_output_key(net, &fl4);
1151 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001152 __ip_do_redirect(rt, skb, &fl4, false);
David S. Millerb42597e2012-07-11 21:25:45 -07001153 ip_rt_put(rt);
1154 }
1155}
1156EXPORT_SYMBOL_GPL(ipv4_redirect);
1157
1158void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
1159{
Miaohe Lin343d8c62020-08-25 08:32:11 -04001160 const struct iphdr *iph = (const struct iphdr *)skb->data;
David S. Miller4895c772012-07-17 04:19:00 -07001161 struct flowi4 fl4;
1162 struct rtable *rt;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001163 struct net *net = sock_net(sk);
David S. Millerb42597e2012-07-11 21:25:45 -07001164
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001165 __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
1166 rt = __ip_route_output_key(net, &fl4);
David S. Miller4895c772012-07-17 04:19:00 -07001167 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001168 __ip_do_redirect(rt, skb, &fl4, false);
David S. Miller4895c772012-07-17 04:19:00 -07001169 ip_rt_put(rt);
1170 }
David S. Millerb42597e2012-07-11 21:25:45 -07001171}
1172EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1173
Brian Vazquezbbd807d2021-02-01 17:41:32 +00001174INDIRECT_CALLABLE_SCOPE struct dst_entry *ipv4_dst_check(struct dst_entry *dst,
1175 u32 cookie)
David S. Millerefbc368d2011-12-01 13:38:59 -05001176{
1177 struct rtable *rt = (struct rtable *) dst;
1178
David S. Millerceb33202012-07-17 11:31:28 -07001179 /* All IPV4 dsts are created with ->obsolete set to the value
1180 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1181 * into this function always.
1182 *
Timo Teräs387aa652013-05-27 20:46:31 +00001183 * When a PMTU/redirect information update invalidates a route,
1184 * this is indicated by setting obsolete to DST_OBSOLETE_KILL or
Julian Wiedmann02afc7a2019-03-20 20:02:56 +01001185 * DST_OBSOLETE_DEAD.
David S. Millerceb33202012-07-17 11:31:28 -07001186 */
Timo Teräs387aa652013-05-27 20:46:31 +00001187 if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
David S. Millerefbc368d2011-12-01 13:38:59 -05001188 return NULL;
Timo Teräsd11a4dc2010-03-18 23:20:20 +00001189 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190}
Brian Vazquez9c979212021-02-04 18:18:39 +00001191EXPORT_INDIRECT_CALLABLE(ipv4_dst_check);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192
Eric Dumazet20ff83f2019-04-24 08:04:05 -07001193static void ipv4_send_dest_unreach(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194{
Stephen Suryaputraed0de452019-04-12 16:19:27 -04001195 struct ip_options opt;
Eric Dumazetc543cb42019-04-13 17:32:21 -07001196 int res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001197
Stephen Suryaputraed0de452019-04-12 16:19:27 -04001198 /* Recompile ip options since IPCB may not be valid anymore.
Eric Dumazet20ff83f2019-04-24 08:04:05 -07001199 * Also check we have a reasonable ipv4 header.
Stephen Suryaputraed0de452019-04-12 16:19:27 -04001200 */
Eric Dumazet20ff83f2019-04-24 08:04:05 -07001201 if (!pskb_network_may_pull(skb, sizeof(struct iphdr)) ||
1202 ip_hdr(skb)->version != 4 || ip_hdr(skb)->ihl < 5)
Stephen Suryaputraed0de452019-04-12 16:19:27 -04001203 return;
1204
Eric Dumazet20ff83f2019-04-24 08:04:05 -07001205 memset(&opt, 0, sizeof(opt));
1206 if (ip_hdr(skb)->ihl > 5) {
1207 if (!pskb_network_may_pull(skb, ip_hdr(skb)->ihl * 4))
1208 return;
1209 opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr);
1210
1211 rcu_read_lock();
1212 res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL);
1213 rcu_read_unlock();
1214
1215 if (res)
1216 return;
1217 }
Stephen Suryaputraed0de452019-04-12 16:19:27 -04001218 __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt);
Eric Dumazet20ff83f2019-04-24 08:04:05 -07001219}
1220
1221static void ipv4_link_failure(struct sk_buff *skb)
1222{
1223 struct rtable *rt;
1224
1225 ipv4_send_dest_unreach(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001226
Eric Dumazet511c3f92009-06-02 05:14:27 +00001227 rt = skb_rtable(skb);
David S. Miller59436342012-07-10 06:58:42 -07001228 if (rt)
1229 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001230}
1231
Eric W. Biedermanede20592015-10-07 16:48:47 -05001232static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233{
Joe Perches91df42b2012-05-15 14:11:54 +00001234 pr_debug("%s: %pI4 -> %pI4, %s\n",
1235 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1236 skb->dev ? skb->dev->name : "?");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001237 kfree_skb(skb);
Dave Jonesc378a9c2011-05-21 07:16:42 +00001238 WARN_ON(1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001239 return 0;
1240}
1241
1242/*
Shubhankar Kuranagatti6ad08602021-03-12 13:00:05 +05301243 * We do not cache source address of outgoing interface,
1244 * because it is used only by IP RR, TS and SRR options,
1245 * so that it out of fast path.
1246 *
1247 * BTW remember: "addr" is allowed to be not aligned
1248 * in IP options!
Linus Torvalds1da177e2005-04-16 15:20:36 -07001249 */
1250
David S. Miller8e363602011-05-13 17:29:41 -04001251void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001252{
Al Viroa61ced52006-09-26 21:27:54 -07001253 __be32 src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001254
David S. Millerc7537962010-11-11 17:07:48 -08001255 if (rt_is_output_route(rt))
David S. Millerc5be24f2011-05-13 18:01:21 -04001256 src = ip_hdr(skb)->saddr;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001257 else {
David S. Miller8e363602011-05-13 17:29:41 -04001258 struct fib_result res;
Maciej Żenczykowskie351bb62018-09-29 23:44:46 -07001259 struct iphdr *iph = ip_hdr(skb);
1260 struct flowi4 fl4 = {
1261 .daddr = iph->daddr,
1262 .saddr = iph->saddr,
1263 .flowi4_tos = RT_TOS(iph->tos),
1264 .flowi4_oif = rt->dst.dev->ifindex,
1265 .flowi4_iif = skb->dev->ifindex,
1266 .flowi4_mark = skb->mark,
1267 };
David S. Miller5e2b61f2011-03-04 21:47:09 -08001268
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001269 rcu_read_lock();
Andy Gospodarek0eeb0752015-06-23 13:45:37 -04001270 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0)
David Aherneba618a2019-04-02 14:11:55 -07001271 src = fib_result_prefsrc(dev_net(rt->dst.dev), &res);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001272 else
David S. Millerf8126f12012-07-13 05:03:45 -07001273 src = inet_select_addr(rt->dst.dev,
1274 rt_nexthop(rt, iph->daddr),
1275 RT_SCOPE_UNIVERSE);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001276 rcu_read_unlock();
1277 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001278 memcpy(addr, &src, 4);
1279}
1280
Patrick McHardyc7066f72011-01-14 13:36:42 +01001281#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001282static void set_class_tag(struct rtable *rt, u32 tag)
1283{
Changli Gaod8d1f302010-06-10 23:31:35 -07001284 if (!(rt->dst.tclassid & 0xFFFF))
1285 rt->dst.tclassid |= tag & 0xFFFF;
1286 if (!(rt->dst.tclassid & 0xFFFF0000))
1287 rt->dst.tclassid |= tag & 0xFFFF0000;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001288}
1289#endif
1290
David S. Miller0dbaee32010-12-13 12:52:14 -08001291static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1292{
Gao Feng7ed14d92017-04-12 12:34:03 +08001293 unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
Eric Dumazet164a5e72017-10-18 17:02:03 -07001294 unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
Gao Feng7ed14d92017-04-12 12:34:03 +08001295 ip_rt_min_advmss);
David S. Miller0dbaee32010-12-13 12:52:14 -08001296
Gao Feng7ed14d92017-04-12 12:34:03 +08001297 return min(advmss, IPV4_MAX_PMTU - header_size);
David S. Miller0dbaee32010-12-13 12:52:14 -08001298}
1299
Brian Vazquezf67fbea2021-02-01 17:41:31 +00001300INDIRECT_CALLABLE_SCOPE unsigned int ipv4_mtu(const struct dst_entry *dst)
David S. Millerd33e4552010-12-14 13:01:14 -08001301{
Miaohe Lin5af68892020-08-29 05:21:30 -04001302 const struct rtable *rt = (const struct rtable *)dst;
David S. Miller59436342012-07-10 06:58:42 -07001303 unsigned int mtu = rt->rt_pmtu;
1304
Alexander Duyck98d75c32012-08-27 06:30:01 +00001305 if (!mtu || time_after_eq(jiffies, rt->dst.expires))
David S. Miller59436342012-07-10 06:58:42 -07001306 mtu = dst_metric_raw(dst, RTAX_MTU);
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001307
Steffen Klassert38d523e2013-01-16 20:55:01 +00001308 if (mtu)
Vadim Fedorenkofade5642021-06-25 19:21:39 +03001309 goto out;
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001310
Eric Dumazetc780a042017-08-16 11:09:12 -07001311 mtu = READ_ONCE(dst->dev->mtu);
David S. Millerd33e4552010-12-14 13:01:14 -08001312
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001313 if (unlikely(ip_mtu_locked(dst))) {
David Ahern77d5bc72019-09-17 10:39:49 -07001314 if (rt->rt_uses_gateway && mtu > 576)
David S. Millerd33e4552010-12-14 13:01:14 -08001315 mtu = 576;
1316 }
1317
Vadim Fedorenkofade5642021-06-25 19:21:39 +03001318out:
Roopa Prabhu14972cb2016-08-24 20:10:43 -07001319 mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
1320
1321 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
David S. Millerd33e4552010-12-14 13:01:14 -08001322}
Brian Vazquez9c979212021-02-04 18:18:39 +00001323EXPORT_INDIRECT_CALLABLE(ipv4_mtu);
David S. Millerd33e4552010-12-14 13:01:14 -08001324
David Aherna5995e72019-04-30 07:45:50 -07001325static void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr)
Julian Anastasov94720e32018-05-02 09:41:19 +03001326{
1327 struct fnhe_hash_bucket *hash;
1328 struct fib_nh_exception *fnhe, __rcu **fnhe_p;
1329 u32 hval = fnhe_hashfun(daddr);
1330
1331 spin_lock_bh(&fnhe_lock);
1332
David Aherna5995e72019-04-30 07:45:50 -07001333 hash = rcu_dereference_protected(nhc->nhc_exceptions,
Julian Anastasov94720e32018-05-02 09:41:19 +03001334 lockdep_is_held(&fnhe_lock));
1335 hash += hval;
1336
1337 fnhe_p = &hash->chain;
1338 fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
1339 while (fnhe) {
1340 if (fnhe->fnhe_daddr == daddr) {
1341 rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
1342 fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
Xin Longee60ad22019-03-08 14:50:54 +08001343 /* set fnhe_daddr to 0 to ensure it won't bind with
1344 * new dsts in rt_bind_exception().
1345 */
1346 fnhe->fnhe_daddr = 0;
Julian Anastasov94720e32018-05-02 09:41:19 +03001347 fnhe_flush_routes(fnhe);
1348 kfree_rcu(fnhe, rcu);
1349 break;
1350 }
1351 fnhe_p = &fnhe->fnhe_next;
1352 fnhe = rcu_dereference_protected(fnhe->fnhe_next,
1353 lockdep_is_held(&fnhe_lock));
1354 }
1355
1356 spin_unlock_bh(&fnhe_lock);
1357}
1358
David Aherna5995e72019-04-30 07:45:50 -07001359static struct fib_nh_exception *find_exception(struct fib_nh_common *nhc,
1360 __be32 daddr)
David S. Miller4895c772012-07-17 04:19:00 -07001361{
David Aherna5995e72019-04-30 07:45:50 -07001362 struct fnhe_hash_bucket *hash = rcu_dereference(nhc->nhc_exceptions);
David S. Miller4895c772012-07-17 04:19:00 -07001363 struct fib_nh_exception *fnhe;
1364 u32 hval;
1365
David S. Millerf2bb4be2012-07-17 12:20:47 -07001366 if (!hash)
1367 return NULL;
1368
David S. Millerd3a25c92012-07-17 13:23:08 -07001369 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -07001370
1371 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1372 fnhe = rcu_dereference(fnhe->fnhe_next)) {
Julian Anastasov94720e32018-05-02 09:41:19 +03001373 if (fnhe->fnhe_daddr == daddr) {
1374 if (fnhe->fnhe_expires &&
1375 time_after(jiffies, fnhe->fnhe_expires)) {
David Aherna5995e72019-04-30 07:45:50 -07001376 ip_del_fnhe(nhc, daddr);
Julian Anastasov94720e32018-05-02 09:41:19 +03001377 break;
1378 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001379 return fnhe;
Julian Anastasov94720e32018-05-02 09:41:19 +03001380 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001381 }
1382 return NULL;
1383}
David S. Miller4895c772012-07-17 04:19:00 -07001384
David Ahern50d889b2018-05-21 09:08:13 -07001385/* MTU selection:
1386 * 1. mtu on route is locked - use it
1387 * 2. mtu from nexthop exception
1388 * 3. mtu from egress device
1389 */
1390
1391u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
1392{
David Aherneba618a2019-04-02 14:11:55 -07001393 struct fib_nh_common *nhc = res->nhc;
1394 struct net_device *dev = nhc->nhc_dev;
David Ahern50d889b2018-05-21 09:08:13 -07001395 struct fib_info *fi = res->fi;
David Ahern50d889b2018-05-21 09:08:13 -07001396 u32 mtu = 0;
1397
1398 if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu ||
1399 fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU))
1400 mtu = fi->fib_mtu;
1401
1402 if (likely(!mtu)) {
1403 struct fib_nh_exception *fnhe;
1404
David Aherna5995e72019-04-30 07:45:50 -07001405 fnhe = find_exception(nhc, daddr);
David Ahern50d889b2018-05-21 09:08:13 -07001406 if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires))
1407 mtu = fnhe->fnhe_pmtu;
1408 }
1409
1410 if (likely(!mtu))
1411 mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU);
1412
David Aherneba618a2019-04-02 14:11:55 -07001413 return mtu - lwtunnel_headroom(nhc->nhc_lwtstate, mtu);
David Ahern50d889b2018-05-21 09:08:13 -07001414}
1415
David S. Millercaacf052012-07-31 15:06:50 -07001416static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
Wei Wanga4c2fd72017-06-17 10:42:42 -07001417 __be32 daddr, const bool do_cache)
David S. Millerf2bb4be2012-07-17 12:20:47 -07001418{
David S. Millercaacf052012-07-31 15:06:50 -07001419 bool ret = false;
1420
David S. Millerc5038a82012-07-31 15:02:02 -07001421 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +00001422
David S. Millerc5038a82012-07-31 15:02:02 -07001423 if (daddr == fnhe->fnhe_daddr) {
Timo Teräs2ffae992013-06-27 10:27:05 +03001424 struct rtable __rcu **porig;
1425 struct rtable *orig;
Timo Teräs5aad1de2013-05-27 20:46:33 +00001426 int genid = fnhe_genid(dev_net(rt->dst.dev));
Timo Teräs2ffae992013-06-27 10:27:05 +03001427
1428 if (rt_is_input_route(rt))
1429 porig = &fnhe->fnhe_rth_input;
1430 else
1431 porig = &fnhe->fnhe_rth_output;
1432 orig = rcu_dereference(*porig);
Timo Teräs5aad1de2013-05-27 20:46:33 +00001433
1434 if (fnhe->fnhe_genid != genid) {
1435 fnhe->fnhe_genid = genid;
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001436 fnhe->fnhe_gw = 0;
1437 fnhe->fnhe_pmtu = 0;
1438 fnhe->fnhe_expires = 0;
Hangbin Liu0e8411e42018-05-09 18:06:44 +08001439 fnhe->fnhe_mtu_locked = false;
Timo Teräs2ffae992013-06-27 10:27:05 +03001440 fnhe_flush_routes(fnhe);
1441 orig = NULL;
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001442 }
Timo Teräs387aa652013-05-27 20:46:31 +00001443 fill_route_from_fnhe(rt, fnhe);
David Ahern1550c172019-04-05 16:30:27 -07001444 if (!rt->rt_gw4) {
1445 rt->rt_gw4 = daddr;
1446 rt->rt_gw_family = AF_INET;
1447 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001448
Wei Wanga4c2fd72017-06-17 10:42:42 -07001449 if (do_cache) {
Wei Wang08301062017-06-17 10:42:29 -07001450 dst_hold(&rt->dst);
Timo Teräs2ffae992013-06-27 10:27:05 +03001451 rcu_assign_pointer(*porig, rt);
Wei Wang08301062017-06-17 10:42:29 -07001452 if (orig) {
Wei Wang95c47f92017-06-17 10:42:30 -07001453 dst_dev_put(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001454 dst_release(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001455 }
Timo Teräs2ffae992013-06-27 10:27:05 +03001456 ret = true;
1457 }
David S. Millerc5038a82012-07-31 15:02:02 -07001458
1459 fnhe->fnhe_stamp = jiffies;
David S. Millerc5038a82012-07-31 15:02:02 -07001460 }
1461 spin_unlock_bh(&fnhe_lock);
David S. Millercaacf052012-07-31 15:06:50 -07001462
1463 return ret;
Eric Dumazet54764bb2012-07-31 01:08:23 +00001464}
1465
David Ahern87063a1f2019-04-30 07:45:49 -07001466static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt)
David S. Millerf2bb4be2012-07-17 12:20:47 -07001467{
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001468 struct rtable *orig, *prev, **p;
David S. Millercaacf052012-07-31 15:06:50 -07001469 bool ret = true;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001470
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001471 if (rt_is_input_route(rt)) {
David Ahern0f457a32019-04-30 07:45:48 -07001472 p = (struct rtable **)&nhc->nhc_rth_input;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001473 } else {
David Ahern0f457a32019-04-30 07:45:48 -07001474 p = (struct rtable **)raw_cpu_ptr(nhc->nhc_pcpu_rth_output);
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001475 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001476 orig = *p;
1477
Wei Wang08301062017-06-17 10:42:29 -07001478 /* hold dst before doing cmpxchg() to avoid race condition
1479 * on this dst
1480 */
1481 dst_hold(&rt->dst);
David S. Millerf2bb4be2012-07-17 12:20:47 -07001482 prev = cmpxchg(p, orig, rt);
1483 if (prev == orig) {
Wei Wang08301062017-06-17 10:42:29 -07001484 if (orig) {
Wei Wang5018c592019-10-16 12:03:15 -07001485 rt_add_uncached_list(orig);
Wei Wang08301062017-06-17 10:42:29 -07001486 dst_release(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001487 }
1488 } else {
1489 dst_release(&rt->dst);
David S. Millercaacf052012-07-31 15:06:50 -07001490 ret = false;
Wei Wang08301062017-06-17 10:42:29 -07001491 }
David S. Millercaacf052012-07-31 15:06:50 -07001492
1493 return ret;
1494}
1495
Eric Dumazet5055c372015-01-14 15:17:06 -08001496struct uncached_list {
1497 spinlock_t lock;
1498 struct list_head head;
1499};
1500
1501static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
David S. Millercaacf052012-07-31 15:06:50 -07001502
Xin Long510c3212018-02-14 19:06:02 +08001503void rt_add_uncached_list(struct rtable *rt)
David S. Millercaacf052012-07-31 15:06:50 -07001504{
Eric Dumazet5055c372015-01-14 15:17:06 -08001505 struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list);
1506
1507 rt->rt_uncached_list = ul;
1508
1509 spin_lock_bh(&ul->lock);
1510 list_add_tail(&rt->rt_uncached, &ul->head);
1511 spin_unlock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001512}
1513
Xin Long510c3212018-02-14 19:06:02 +08001514void rt_del_uncached_list(struct rtable *rt)
David S. Millercaacf052012-07-31 15:06:50 -07001515{
Eric Dumazet78df76a2012-08-24 05:40:47 +00001516 if (!list_empty(&rt->rt_uncached)) {
Eric Dumazet5055c372015-01-14 15:17:06 -08001517 struct uncached_list *ul = rt->rt_uncached_list;
1518
1519 spin_lock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001520 list_del(&rt->rt_uncached);
Eric Dumazet5055c372015-01-14 15:17:06 -08001521 spin_unlock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001522 }
1523}
1524
Xin Long510c3212018-02-14 19:06:02 +08001525static void ipv4_dst_destroy(struct dst_entry *dst)
1526{
Xin Long510c3212018-02-14 19:06:02 +08001527 struct rtable *rt = (struct rtable *)dst;
1528
David Ahern1620a332018-10-04 20:07:54 -07001529 ip_dst_metrics_put(dst);
Xin Long510c3212018-02-14 19:06:02 +08001530 rt_del_uncached_list(rt);
1531}
1532
David S. Millercaacf052012-07-31 15:06:50 -07001533void rt_flush_dev(struct net_device *dev)
1534{
Eric Dumazet5055c372015-01-14 15:17:06 -08001535 struct rtable *rt;
1536 int cpu;
David S. Millercaacf052012-07-31 15:06:50 -07001537
Eric Dumazet5055c372015-01-14 15:17:06 -08001538 for_each_possible_cpu(cpu) {
1539 struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
1540
1541 spin_lock_bh(&ul->lock);
1542 list_for_each_entry(rt, &ul->head, rt_uncached) {
David S. Millercaacf052012-07-31 15:06:50 -07001543 if (rt->dst.dev != dev)
1544 continue;
Mahesh Bandewar8d7017f2019-07-01 14:38:57 -07001545 rt->dst.dev = blackhole_netdev;
David S. Millercaacf052012-07-31 15:06:50 -07001546 dev_hold(rt->dst.dev);
1547 dev_put(dev);
1548 }
Eric Dumazet5055c372015-01-14 15:17:06 -08001549 spin_unlock_bh(&ul->lock);
David S. Miller4895c772012-07-17 04:19:00 -07001550 }
1551}
1552
Eric Dumazet4331deb2012-07-25 05:11:23 +00001553static bool rt_cache_valid(const struct rtable *rt)
David S. Millerd2d68ba92012-07-17 12:58:50 -07001554{
Eric Dumazet4331deb2012-07-25 05:11:23 +00001555 return rt &&
1556 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1557 !rt_is_expired(rt);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001558}
1559
David S. Millerf2bb4be2012-07-17 12:20:47 -07001560static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
David S. Miller5e2b61f2011-03-04 21:47:09 -08001561 const struct fib_result *res,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001562 struct fib_nh_exception *fnhe,
Wei Wanga4c2fd72017-06-17 10:42:42 -07001563 struct fib_info *fi, u16 type, u32 itag,
1564 const bool do_cache)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001565{
David S. Millercaacf052012-07-31 15:06:50 -07001566 bool cached = false;
1567
Linus Torvalds1da177e2005-04-16 15:20:36 -07001568 if (fi) {
David Aherneba618a2019-04-02 14:11:55 -07001569 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
David S. Miller4895c772012-07-17 04:19:00 -07001570
David Ahern0f5f7d72019-04-05 16:30:29 -07001571 if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) {
David Ahern77d5bc72019-09-17 10:39:49 -07001572 rt->rt_uses_gateway = 1;
David Ahern0f5f7d72019-04-05 16:30:29 -07001573 rt->rt_gw_family = nhc->nhc_gw_family;
1574 /* only INET and INET6 are supported */
1575 if (likely(nhc->nhc_gw_family == AF_INET))
1576 rt->rt_gw4 = nhc->nhc_gw.ipv4;
1577 else
1578 rt->rt_gw6 = nhc->nhc_gw.ipv6;
Julian Anastasov155e8332012-10-08 11:41:18 +00001579 }
David Ahern0f5f7d72019-04-05 16:30:29 -07001580
David Aherne1255ed2018-10-04 20:07:53 -07001581 ip_dst_init_metrics(&rt->dst, fi->fib_metrics);
1582
Patrick McHardyc7066f72011-01-14 13:36:42 +01001583#ifdef CONFIG_IP_ROUTE_CLASSID
David Aherndcb1ecb2019-06-03 20:19:50 -07001584 if (nhc->nhc_family == AF_INET) {
David Ahern87063a1f2019-04-30 07:45:49 -07001585 struct fib_nh *nh;
1586
1587 nh = container_of(nhc, struct fib_nh, nh_common);
1588 rt->dst.tclassid = nh->nh_tclassid;
1589 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001590#endif
David Ahern87063a1f2019-04-30 07:45:49 -07001591 rt->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);
David S. Millerc5038a82012-07-31 15:02:02 -07001592 if (unlikely(fnhe))
Wei Wanga4c2fd72017-06-17 10:42:42 -07001593 cached = rt_bind_exception(rt, fnhe, daddr, do_cache);
1594 else if (do_cache)
David Ahern87063a1f2019-04-30 07:45:49 -07001595 cached = rt_cache_route(nhc, rt);
Julian Anastasov155e8332012-10-08 11:41:18 +00001596 if (unlikely(!cached)) {
1597 /* Routes we intend to cache in nexthop exception or
1598 * FIB nexthop have the DST_NOCACHE bit clear.
1599 * However, if we are unsuccessful at storing this
1600 * route into the cache we really need to set it.
1601 */
David Ahern1550c172019-04-05 16:30:27 -07001602 if (!rt->rt_gw4) {
1603 rt->rt_gw_family = AF_INET;
1604 rt->rt_gw4 = daddr;
1605 }
Julian Anastasov155e8332012-10-08 11:41:18 +00001606 rt_add_uncached_list(rt);
1607 }
1608 } else
David S. Millercaacf052012-07-31 15:06:50 -07001609 rt_add_uncached_list(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001610
Patrick McHardyc7066f72011-01-14 13:36:42 +01001611#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612#ifdef CONFIG_IP_MULTIPLE_TABLES
David S. Miller85b91b02012-07-13 08:21:29 -07001613 set_class_tag(rt, res->tclassid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001614#endif
1615 set_class_tag(rt, itag);
1616#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001617}
1618
David Ahern9ab179d2016-04-07 11:10:06 -07001619struct rtable *rt_dst_alloc(struct net_device *dev,
1620 unsigned int flags, u16 type,
David Laightaf13b3c2020-03-23 14:31:19 +00001621 bool nopolicy, bool noxfrm)
David S. Miller0c4dcd52011-02-17 15:42:37 -08001622{
David Ahernd08c4f32015-09-02 13:58:34 -07001623 struct rtable *rt;
1624
1625 rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
David Ahernd08c4f32015-09-02 13:58:34 -07001626 (nopolicy ? DST_NOPOLICY : 0) |
Wei Wangb2a9c0e2017-06-17 10:42:41 -07001627 (noxfrm ? DST_NOXFRM : 0));
David Ahernd08c4f32015-09-02 13:58:34 -07001628
1629 if (rt) {
1630 rt->rt_genid = rt_genid_ipv4(dev_net(dev));
1631 rt->rt_flags = flags;
1632 rt->rt_type = type;
1633 rt->rt_is_input = 0;
1634 rt->rt_iif = 0;
1635 rt->rt_pmtu = 0;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001636 rt->rt_mtu_locked = 0;
David Ahern77d5bc72019-09-17 10:39:49 -07001637 rt->rt_uses_gateway = 0;
David Ahern1550c172019-04-05 16:30:27 -07001638 rt->rt_gw_family = 0;
1639 rt->rt_gw4 = 0;
David Ahernd08c4f32015-09-02 13:58:34 -07001640 INIT_LIST_HEAD(&rt->rt_uncached);
1641
1642 rt->dst.output = ip_output;
1643 if (flags & RTCF_LOCAL)
1644 rt->dst.input = ip_local_deliver;
1645 }
1646
1647 return rt;
David S. Miller0c4dcd52011-02-17 15:42:37 -08001648}
David Ahern9ab179d2016-04-07 11:10:06 -07001649EXPORT_SYMBOL(rt_dst_alloc);
David S. Miller0c4dcd52011-02-17 15:42:37 -08001650
Stephen Suryaputra5b18f122019-06-26 02:21:16 -04001651struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt)
1652{
1653 struct rtable *new_rt;
1654
1655 new_rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
1656 rt->dst.flags);
1657
1658 if (new_rt) {
1659 new_rt->rt_genid = rt_genid_ipv4(dev_net(dev));
1660 new_rt->rt_flags = rt->rt_flags;
1661 new_rt->rt_type = rt->rt_type;
1662 new_rt->rt_is_input = rt->rt_is_input;
1663 new_rt->rt_iif = rt->rt_iif;
1664 new_rt->rt_pmtu = rt->rt_pmtu;
1665 new_rt->rt_mtu_locked = rt->rt_mtu_locked;
1666 new_rt->rt_gw_family = rt->rt_gw_family;
1667 if (rt->rt_gw_family == AF_INET)
1668 new_rt->rt_gw4 = rt->rt_gw4;
1669 else if (rt->rt_gw_family == AF_INET6)
1670 new_rt->rt_gw6 = rt->rt_gw6;
1671 INIT_LIST_HEAD(&new_rt->rt_uncached);
1672
Stephen Suryaputra5b18f122019-06-26 02:21:16 -04001673 new_rt->dst.input = rt->dst.input;
1674 new_rt->dst.output = rt->dst.output;
1675 new_rt->dst.error = rt->dst.error;
1676 new_rt->dst.lastuse = jiffies;
1677 new_rt->dst.lwtstate = lwtstate_get(rt->dst.lwtstate);
1678 }
1679 return new_rt;
1680}
1681EXPORT_SYMBOL(rt_dst_clone);
1682
Eric Dumazet96d36222010-06-02 19:21:31 +00001683/* called in rcu_read_lock() section */
Paolo Abenibc044e82017-09-28 15:51:37 +02001684int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1685 u8 tos, struct net_device *dev,
1686 struct in_device *in_dev, u32 *itag)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001687{
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001688 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001689
1690 /* Primary sanity checks. */
Ian Morris51456b22015-04-03 09:17:26 +01001691 if (!in_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001692 return -EINVAL;
1693
Jan Engelhardt1e637c72008-01-21 03:18:08 -08001694 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
Thomas Grafd0daebc32012-06-12 00:44:01 +00001695 skb->protocol != htons(ETH_P_IP))
Paolo Abenibc044e82017-09-28 15:51:37 +02001696 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697
Alexander Duyck75fea732015-09-28 11:10:38 -07001698 if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
Paolo Abenibc044e82017-09-28 15:51:37 +02001699 return -EINVAL;
Thomas Grafd0daebc32012-06-12 00:44:01 +00001700
Joe Perchesf97c1e02007-12-16 13:45:43 -08001701 if (ipv4_is_zeronet(saddr)) {
Edward Chron1d2f4eb2019-01-31 15:00:40 -08001702 if (!ipv4_is_local_multicast(daddr) &&
1703 ip_hdr(skb)->protocol != IPPROTO_IGMP)
Paolo Abenibc044e82017-09-28 15:51:37 +02001704 return -EINVAL;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001705 } else {
David S. Miller9e56e382012-06-28 18:54:02 -07001706 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
Paolo Abenibc044e82017-09-28 15:51:37 +02001707 in_dev, itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001708 if (err < 0)
Paolo Abenibc044e82017-09-28 15:51:37 +02001709 return err;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001710 }
Paolo Abenibc044e82017-09-28 15:51:37 +02001711 return 0;
1712}
1713
1714/* called in rcu_read_lock() section */
1715static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1716 u8 tos, struct net_device *dev, int our)
1717{
1718 struct in_device *in_dev = __in_dev_get_rcu(dev);
1719 unsigned int flags = RTCF_MULTICAST;
1720 struct rtable *rth;
1721 u32 itag = 0;
1722 int err;
1723
1724 err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag);
1725 if (err)
1726 return err;
1727
David Ahernd08c4f32015-09-02 13:58:34 -07001728 if (our)
1729 flags |= RTCF_LOCAL;
1730
1731 rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
Vincent Bernat62679a82020-11-07 20:35:15 +01001732 IN_DEV_ORCONF(in_dev, NOPOLICY), false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001733 if (!rth)
Paolo Abenibc044e82017-09-28 15:51:37 +02001734 return -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001735
Patrick McHardyc7066f72011-01-14 13:36:42 +01001736#ifdef CONFIG_IP_ROUTE_CLASSID
Changli Gaod8d1f302010-06-10 23:31:35 -07001737 rth->dst.tclassid = itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001738#endif
David S. Millercf911662011-04-28 14:31:47 -07001739 rth->dst.output = ip_rt_bug;
David S. Miller9917e1e82012-07-17 14:44:26 -07001740 rth->rt_is_input= 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001741
1742#ifdef CONFIG_IP_MROUTE
Joe Perchesf97c1e02007-12-16 13:45:43 -08001743 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
Changli Gaod8d1f302010-06-10 23:31:35 -07001744 rth->dst.input = ip_mr_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001745#endif
1746 RT_CACHE_STAT_INC(in_slow_mc);
1747
David S. Miller89aef892012-07-17 11:00:09 -07001748 skb_dst_set(skb, &rth->dst);
1749 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001750}
1751
1752
1753static void ip_handle_martian_source(struct net_device *dev,
1754 struct in_device *in_dev,
1755 struct sk_buff *skb,
Al Viro9e12bb22006-09-26 21:25:20 -07001756 __be32 daddr,
1757 __be32 saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001758{
1759 RT_CACHE_STAT_INC(in_martian_src);
1760#ifdef CONFIG_IP_ROUTE_VERBOSE
1761 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1762 /*
1763 * RFC1812 recommendation, if source is martian,
1764 * the only hint is MAC header.
1765 */
Joe Perches058bd4d2012-03-11 18:36:11 +00001766 pr_warn("martian source %pI4 from %pI4, on dev %s\n",
Harvey Harrison673d57e2008-10-31 00:53:57 -07001767 &daddr, &saddr, dev->name);
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001768 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
Joe Perches058bd4d2012-03-11 18:36:11 +00001769 print_hex_dump(KERN_WARNING, "ll header: ",
1770 DUMP_PREFIX_OFFSET, 16, 1,
1771 skb_mac_header(skb),
David S. Millerb2c85102018-11-20 10:15:36 -08001772 dev->hard_header_len, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001773 }
1774 }
1775#endif
1776}
1777
Eric Dumazet47360222010-06-03 04:13:21 +00001778/* called in rcu_read_lock() section */
Stephen Hemminger5969f712008-04-10 01:52:09 -07001779static int __mkroute_input(struct sk_buff *skb,
David S. Miller982721f2011-02-16 21:44:24 -08001780 const struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001781 struct in_device *in_dev,
David S. Millerc6cffba2012-07-26 11:14:38 +00001782 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001783{
David Aherneba618a2019-04-02 14:11:55 -07001784 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
1785 struct net_device *dev = nhc->nhc_dev;
Timo Teräs2ffae992013-06-27 10:27:05 +03001786 struct fib_nh_exception *fnhe;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001787 struct rtable *rth;
1788 int err;
1789 struct in_device *out_dev;
David S. Millerd2d68ba92012-07-17 12:58:50 -07001790 bool do_cache;
Li RongQingfbdc0ad2014-05-22 16:36:55 +08001791 u32 itag = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001792
1793 /* get a working reference to the output device */
David Aherneba618a2019-04-02 14:11:55 -07001794 out_dev = __in_dev_get_rcu(dev);
Ian Morris51456b22015-04-03 09:17:26 +01001795 if (!out_dev) {
Joe Perchese87cc472012-05-13 21:56:26 +00001796 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001797 return -EINVAL;
1798 }
1799
Michael Smith5c04c812011-04-07 04:51:50 +00001800 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
David S. Miller9e56e382012-06-28 18:54:02 -07001801 in_dev->dev, in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802 if (err < 0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001803 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001804 saddr);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001805
Linus Torvalds1da177e2005-04-16 15:20:36 -07001806 goto cleanup;
1807 }
1808
Julian Anastasove81da0e2012-10-08 11:41:15 +00001809 do_cache = res->fi && !itag;
1810 if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
David Aherneba618a2019-04-02 14:11:55 -07001811 skb->protocol == htons(ETH_P_IP)) {
David Ahernbdf00462019-04-05 16:30:26 -07001812 __be32 gw;
David Aherneba618a2019-04-02 14:11:55 -07001813
David Ahernbdf00462019-04-05 16:30:26 -07001814 gw = nhc->nhc_gw_family == AF_INET ? nhc->nhc_gw.ipv4 : 0;
David Aherneba618a2019-04-02 14:11:55 -07001815 if (IN_DEV_SHARED_MEDIA(out_dev) ||
1816 inet_addr_onlink(out_dev, saddr, gw))
1817 IPCB(skb)->flags |= IPSKB_DOREDIRECT;
1818 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001819
1820 if (skb->protocol != htons(ETH_P_IP)) {
1821 /* Not IP (i.e. ARP). Do not create route, if it is
1822 * invalid for proxy arp. DNAT routes are always valid.
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001823 *
1824 * Proxy arp feature have been extended to allow, ARP
1825 * replies back to the same interface, to support
1826 * Private VLAN switch technologies. See arp.c.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001827 */
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001828 if (out_dev == in_dev &&
1829 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001830 err = -EINVAL;
1831 goto cleanup;
1832 }
1833 }
1834
David Aherna5995e72019-04-30 07:45:50 -07001835 fnhe = find_exception(nhc, daddr);
Julian Anastasove81da0e2012-10-08 11:41:15 +00001836 if (do_cache) {
Julian Anastasov94720e32018-05-02 09:41:19 +03001837 if (fnhe)
Timo Teräs2ffae992013-06-27 10:27:05 +03001838 rth = rcu_dereference(fnhe->fnhe_rth_input);
Julian Anastasov94720e32018-05-02 09:41:19 +03001839 else
David Ahern0f457a32019-04-30 07:45:48 -07001840 rth = rcu_dereference(nhc->nhc_rth_input);
Julian Anastasove81da0e2012-10-08 11:41:15 +00001841 if (rt_cache_valid(rth)) {
1842 skb_dst_set_noref(skb, &rth->dst);
1843 goto out;
David S. Millerd2d68ba92012-07-17 12:58:50 -07001844 }
1845 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001846
David Ahernd08c4f32015-09-02 13:58:34 -07001847 rth = rt_dst_alloc(out_dev->dev, 0, res->type,
Vincent Bernat62679a82020-11-07 20:35:15 +01001848 IN_DEV_ORCONF(in_dev, NOPOLICY),
1849 IN_DEV_ORCONF(out_dev, NOXFRM));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001850 if (!rth) {
1851 err = -ENOBUFS;
1852 goto cleanup;
1853 }
1854
David S. Miller9917e1e82012-07-17 14:44:26 -07001855 rth->rt_is_input = 1;
Duan Jionga6254862014-02-17 15:23:43 +08001856 RT_CACHE_STAT_INC(in_slow_tot);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001857
Changli Gaod8d1f302010-06-10 23:31:35 -07001858 rth->dst.input = ip_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001859
Wei Wanga4c2fd72017-06-17 10:42:42 -07001860 rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag,
1861 do_cache);
David Ahern99428952018-02-13 20:32:04 -08001862 lwtunnel_set_redirect(&rth->dst);
David S. Millerc6cffba2012-07-26 11:14:38 +00001863 skb_dst_set(skb, &rth->dst);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001864out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001865 err = 0;
1866 cleanup:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001867 return err;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001868}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001869
Peter Nørlund79a13152015-09-30 10:12:22 +02001870#ifdef CONFIG_IP_ROUTE_MULTIPATH
Peter Nørlund79a13152015-09-30 10:12:22 +02001871/* To make ICMP packets follow the right flow, the multipath hash is
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001872 * calculated from the inner IP addresses.
Peter Nørlund79a13152015-09-30 10:12:22 +02001873 */
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001874static void ip_multipath_l3_keys(const struct sk_buff *skb,
1875 struct flow_keys *hash_keys)
Peter Nørlund79a13152015-09-30 10:12:22 +02001876{
1877 const struct iphdr *outer_iph = ip_hdr(skb);
David Ahern6f74b6c2018-03-02 08:32:13 -08001878 const struct iphdr *key_iph = outer_iph;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001879 const struct iphdr *inner_iph;
Peter Nørlund79a13152015-09-30 10:12:22 +02001880 const struct icmphdr *icmph;
1881 struct iphdr _inner_iph;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001882 struct icmphdr _icmph;
1883
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001884 if (likely(outer_iph->protocol != IPPROTO_ICMP))
David Ahern6f74b6c2018-03-02 08:32:13 -08001885 goto out;
Peter Nørlund79a13152015-09-30 10:12:22 +02001886
1887 if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
David Ahern6f74b6c2018-03-02 08:32:13 -08001888 goto out;
Peter Nørlund79a13152015-09-30 10:12:22 +02001889
1890 icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
1891 &_icmph);
1892 if (!icmph)
David Ahern6f74b6c2018-03-02 08:32:13 -08001893 goto out;
Peter Nørlund79a13152015-09-30 10:12:22 +02001894
Matteo Croce54074f12019-11-02 01:12:04 +01001895 if (!icmp_is_err(icmph->type))
David Ahern6f74b6c2018-03-02 08:32:13 -08001896 goto out;
Peter Nørlund79a13152015-09-30 10:12:22 +02001897
1898 inner_iph = skb_header_pointer(skb,
1899 outer_iph->ihl * 4 + sizeof(_icmph),
1900 sizeof(_inner_iph), &_inner_iph);
1901 if (!inner_iph)
David Ahern6f74b6c2018-03-02 08:32:13 -08001902 goto out;
1903
1904 key_iph = inner_iph;
1905out:
1906 hash_keys->addrs.v4addrs.src = key_iph->saddr;
1907 hash_keys->addrs.v4addrs.dst = key_iph->daddr;
Peter Nørlund79a13152015-09-30 10:12:22 +02001908}
1909
Ido Schimmel4253b492021-05-17 21:15:19 +03001910static u32 fib_multipath_custom_hash_outer(const struct net *net,
1911 const struct sk_buff *skb,
1912 bool *p_has_inner)
1913{
1914 u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
1915 struct flow_keys keys, hash_keys;
1916
1917 if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
1918 return 0;
1919
1920 memset(&hash_keys, 0, sizeof(hash_keys));
1921 skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP);
1922
1923 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1924 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
1925 hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
1926 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
1927 hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
1928 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
1929 hash_keys.basic.ip_proto = keys.basic.ip_proto;
1930 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
1931 hash_keys.ports.src = keys.ports.src;
1932 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
1933 hash_keys.ports.dst = keys.ports.dst;
1934
1935 *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION);
1936 return flow_hash_from_keys(&hash_keys);
1937}
1938
1939static u32 fib_multipath_custom_hash_inner(const struct net *net,
1940 const struct sk_buff *skb,
1941 bool has_inner)
1942{
1943 u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
1944 struct flow_keys keys, hash_keys;
1945
1946 /* We assume the packet carries an encapsulation, but if none was
1947 * encountered during dissection of the outer flow, then there is no
1948 * point in calling the flow dissector again.
1949 */
1950 if (!has_inner)
1951 return 0;
1952
1953 if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK))
1954 return 0;
1955
1956 memset(&hash_keys, 0, sizeof(hash_keys));
1957 skb_flow_dissect_flow_keys(skb, &keys, 0);
1958
1959 if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION))
1960 return 0;
1961
1962 if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
1963 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1964 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
1965 hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
1966 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
1967 hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
1968 } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
1969 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1970 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
1971 hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
1972 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
1973 hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
1974 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL)
1975 hash_keys.tags.flow_label = keys.tags.flow_label;
1976 }
1977
1978 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
1979 hash_keys.basic.ip_proto = keys.basic.ip_proto;
1980 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT)
1981 hash_keys.ports.src = keys.ports.src;
1982 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
1983 hash_keys.ports.dst = keys.ports.dst;
1984
1985 return flow_hash_from_keys(&hash_keys);
1986}
1987
1988static u32 fib_multipath_custom_hash_skb(const struct net *net,
1989 const struct sk_buff *skb)
1990{
1991 u32 mhash, mhash_inner;
1992 bool has_inner = true;
1993
1994 mhash = fib_multipath_custom_hash_outer(net, skb, &has_inner);
1995 mhash_inner = fib_multipath_custom_hash_inner(net, skb, has_inner);
1996
1997 return jhash_2words(mhash, mhash_inner, 0);
1998}
1999
2000static u32 fib_multipath_custom_hash_fl4(const struct net *net,
2001 const struct flowi4 *fl4)
2002{
2003 u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
2004 struct flow_keys hash_keys;
2005
2006 if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
2007 return 0;
2008
2009 memset(&hash_keys, 0, sizeof(hash_keys));
2010 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
2011 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
2012 hash_keys.addrs.v4addrs.src = fl4->saddr;
2013 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
2014 hash_keys.addrs.v4addrs.dst = fl4->daddr;
2015 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
2016 hash_keys.basic.ip_proto = fl4->flowi4_proto;
2017 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
2018 hash_keys.ports.src = fl4->fl4_sport;
2019 if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
2020 hash_keys.ports.dst = fl4->fl4_dport;
2021
2022 return flow_hash_from_keys(&hash_keys);
2023}
2024
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02002025/* if skb is set it will be used and fl4 can be NULL */
David Ahern7efc0b62018-03-02 08:32:12 -08002026int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002027 const struct sk_buff *skb, struct flow_keys *flkeys)
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02002028{
Ido Schimmel2a8e4992019-03-01 13:38:43 +00002029 u32 multipath_hash = fl4 ? fl4->flowi4_multipath_hash : 0;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02002030 struct flow_keys hash_keys;
Ido Schimmel2e68ea92021-05-17 21:15:17 +03002031 u32 mhash = 0;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02002032
2033 switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
2034 case 0:
2035 memset(&hash_keys, 0, sizeof(hash_keys));
2036 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
2037 if (skb) {
2038 ip_multipath_l3_keys(skb, &hash_keys);
2039 } else {
2040 hash_keys.addrs.v4addrs.src = fl4->saddr;
2041 hash_keys.addrs.v4addrs.dst = fl4->daddr;
2042 }
Ido Schimmel2e68ea92021-05-17 21:15:17 +03002043 mhash = flow_hash_from_keys(&hash_keys);
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02002044 break;
2045 case 1:
2046 /* skb is currently provided only when forwarding */
2047 if (skb) {
2048 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2049 struct flow_keys keys;
2050
2051 /* short-circuit if we already have L4 hash present */
2052 if (skb->l4_hash)
2053 return skb_get_hash_raw(skb) >> 1;
David Ahernec7127a2018-03-02 08:32:14 -08002054
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02002055 memset(&hash_keys, 0, sizeof(hash_keys));
David Ahern1fe4b112018-02-21 11:00:54 -08002056
David Ahernec7127a2018-03-02 08:32:14 -08002057 if (!flkeys) {
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002058 skb_flow_dissect_flow_keys(skb, &keys, flag);
David Ahernec7127a2018-03-02 08:32:14 -08002059 flkeys = &keys;
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002060 }
David Ahernec7127a2018-03-02 08:32:14 -08002061
2062 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
2063 hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
2064 hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
2065 hash_keys.ports.src = flkeys->ports.src;
2066 hash_keys.ports.dst = flkeys->ports.dst;
2067 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02002068 } else {
2069 memset(&hash_keys, 0, sizeof(hash_keys));
2070 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
2071 hash_keys.addrs.v4addrs.src = fl4->saddr;
2072 hash_keys.addrs.v4addrs.dst = fl4->daddr;
2073 hash_keys.ports.src = fl4->fl4_sport;
2074 hash_keys.ports.dst = fl4->fl4_dport;
2075 hash_keys.basic.ip_proto = fl4->flowi4_proto;
2076 }
Ido Schimmel2e68ea92021-05-17 21:15:17 +03002077 mhash = flow_hash_from_keys(&hash_keys);
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02002078 break;
Stephen Suryaputra363887a2019-06-13 14:38:58 -04002079 case 2:
2080 memset(&hash_keys, 0, sizeof(hash_keys));
Stephen Suryaputra363887a2019-06-13 14:38:58 -04002081 /* skb is currently provided only when forwarding */
2082 if (skb) {
2083 struct flow_keys keys;
2084
2085 skb_flow_dissect_flow_keys(skb, &keys, 0);
Stephen Suryaputra828b2b42019-07-06 10:55:17 -04002086 /* Inner can be v4 or v6 */
2087 if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2088 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
2089 hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
2090 hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
2091 } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2092 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2093 hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
2094 hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
2095 hash_keys.tags.flow_label = keys.tags.flow_label;
2096 hash_keys.basic.ip_proto = keys.basic.ip_proto;
2097 } else {
2098 /* Same as case 0 */
2099 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
2100 ip_multipath_l3_keys(skb, &hash_keys);
2101 }
Stephen Suryaputra363887a2019-06-13 14:38:58 -04002102 } else {
2103 /* Same as case 0 */
Stephen Suryaputra828b2b42019-07-06 10:55:17 -04002104 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
Stephen Suryaputra363887a2019-06-13 14:38:58 -04002105 hash_keys.addrs.v4addrs.src = fl4->saddr;
2106 hash_keys.addrs.v4addrs.dst = fl4->daddr;
2107 }
Ido Schimmel2e68ea92021-05-17 21:15:17 +03002108 mhash = flow_hash_from_keys(&hash_keys);
Stephen Suryaputra363887a2019-06-13 14:38:58 -04002109 break;
Ido Schimmel4253b492021-05-17 21:15:19 +03002110 case 3:
2111 if (skb)
2112 mhash = fib_multipath_custom_hash_skb(net, skb);
2113 else
2114 mhash = fib_multipath_custom_hash_fl4(net, fl4);
2115 break;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02002116 }
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02002117
wenxu24ba1442019-02-24 11:36:20 +08002118 if (multipath_hash)
2119 mhash = jhash_2words(mhash, multipath_hash, 0);
2120
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02002121 return mhash >> 1;
2122}
Peter Nørlund79a13152015-09-30 10:12:22 +02002123#endif /* CONFIG_IP_ROUTE_MULTIPATH */
2124
Stephen Hemminger5969f712008-04-10 01:52:09 -07002125static int ip_mkroute_input(struct sk_buff *skb,
2126 struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07002127 struct in_device *in_dev,
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002128 __be32 daddr, __be32 saddr, u32 tos,
2129 struct flow_keys *hkeys)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002130{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002131#ifdef CONFIG_IP_ROUTE_MULTIPATH
David Ahern5481d732019-06-03 20:19:49 -07002132 if (res->fi && fib_info_num_path(res->fi) > 1) {
David Ahern7efc0b62018-03-02 08:32:12 -08002133 int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
Peter Nørlund0e884c72015-09-30 10:12:21 +02002134
Peter Nørlund0e884c72015-09-30 10:12:21 +02002135 fib_select_multipath(res, h);
2136 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002137#endif
2138
2139 /* create a routing cache entry */
David S. Millerc6cffba2012-07-26 11:14:38 +00002140 return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002141}
2142
Paolo Abeni02b24942019-11-20 13:47:37 +01002143/* Implements all the saddr-related checks as ip_route_input_slow(),
2144 * assuming daddr is valid and the destination is not a local broadcast one.
2145 * Uses the provided hint instead of performing a route lookup.
2146 */
2147int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2148 u8 tos, struct net_device *dev,
2149 const struct sk_buff *hint)
2150{
2151 struct in_device *in_dev = __in_dev_get_rcu(dev);
Miaohe Lin2ce578c2020-06-27 15:47:51 +08002152 struct rtable *rt = skb_rtable(hint);
Paolo Abeni02b24942019-11-20 13:47:37 +01002153 struct net *net = dev_net(dev);
2154 int err = -EINVAL;
2155 u32 tag = 0;
2156
2157 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
2158 goto martian_source;
2159
2160 if (ipv4_is_zeronet(saddr))
2161 goto martian_source;
2162
2163 if (ipv4_is_loopback(saddr) && !IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
2164 goto martian_source;
2165
2166 if (rt->rt_type != RTN_LOCAL)
2167 goto skip_validate_source;
2168
2169 tos &= IPTOS_RT_MASK;
2170 err = fib_validate_source(skb, saddr, daddr, tos, 0, dev, in_dev, &tag);
2171 if (err < 0)
2172 goto martian_source;
2173
2174skip_validate_source:
2175 skb_dst_copy(skb, hint);
2176 return 0;
2177
2178martian_source:
2179 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
2180 return err;
2181}
2182
David Ahernb87b04f2021-06-12 18:24:59 -06002183/* get device for dst_alloc with local routes */
2184static struct net_device *ip_rt_get_dev(struct net *net,
2185 const struct fib_result *res)
2186{
2187 struct fib_nh_common *nhc = res->fi ? res->nhc : NULL;
2188 struct net_device *dev = NULL;
2189
2190 if (nhc)
2191 dev = l3mdev_master_dev_rcu(nhc->nhc_dev);
2192
2193 return dev ? : net->loopback_dev;
2194}
2195
Linus Torvalds1da177e2005-04-16 15:20:36 -07002196/*
2197 * NOTE. We drop all the packets that has local source
2198 * addresses, because every properly looped back packet
2199 * must have correct destination already attached by output routine.
Paolo Abeni02b24942019-11-20 13:47:37 +01002200 * Changes in the enforced policies must be applied also to
2201 * ip_route_use_hint().
Linus Torvalds1da177e2005-04-16 15:20:36 -07002202 *
2203 * Such approach solves two big problems:
2204 * 1. Not simplex devices are handled properly.
2205 * 2. IP spoofing attempts are filtered with 100% of guarantee.
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002206 * called with rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -07002207 */
2208
Al Viro9e12bb22006-09-26 21:25:20 -07002209static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
David Ahern5510cdf2017-05-25 10:42:34 -07002210 u8 tos, struct net_device *dev,
2211 struct fib_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002212{
Eric Dumazet96d36222010-06-02 19:21:31 +00002213 struct in_device *in_dev = __in_dev_get_rcu(dev);
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002214 struct flow_keys *flkeys = NULL, _flkeys;
2215 struct net *net = dev_net(dev);
Thomas Graf1b7179d2015-07-21 10:43:59 +02002216 struct ip_tunnel_info *tun_info;
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002217 int err = -EINVAL;
Eric Dumazet95c96172012-04-15 05:58:06 +00002218 unsigned int flags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002219 u32 itag = 0;
Eric Dumazet95c96172012-04-15 05:58:06 +00002220 struct rtable *rth;
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002221 struct flowi4 fl4;
Xin Long0a904782019-06-02 19:10:24 +08002222 bool do_cache = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002223
2224 /* IP on this device is disabled. */
2225
2226 if (!in_dev)
2227 goto out;
2228
2229 /* Check for the most weird martians, which can be not detected
Shubhankar Kuranagatti6ad08602021-03-12 13:00:05 +05302230 * by fib_lookup.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002231 */
2232
Jiri Benc61adedf2015-08-20 13:56:25 +02002233 tun_info = skb_tunnel_info(skb);
Jiri Benc46fa0622015-08-28 20:48:19 +02002234 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
Thomas Graf1b7179d2015-07-21 10:43:59 +02002235 fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id;
2236 else
2237 fl4.flowi4_tun_key.tun_id = 0;
Thomas Graff38a9eb2015-07-21 10:43:56 +02002238 skb_dst_drop(skb);
2239
Thomas Grafd0daebc32012-06-12 00:44:01 +00002240 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002241 goto martian_source;
2242
David Ahern5510cdf2017-05-25 10:42:34 -07002243 res->fi = NULL;
2244 res->table = NULL;
Andy Walls27a954b2010-10-17 15:11:22 +00002245 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002246 goto brd_input;
2247
2248 /* Accept zero addresses only to limited broadcast;
2249 * I even do not know to fix it or not. Waiting for complains :-)
2250 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08002251 if (ipv4_is_zeronet(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002252 goto martian_source;
2253
Thomas Grafd0daebc32012-06-12 00:44:01 +00002254 if (ipv4_is_zeronet(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002255 goto martian_destination;
2256
Eric Dumazet9eb43e72012-08-03 21:27:25 +00002257 /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
2258 * and call it once if daddr or/and saddr are loopback addresses
2259 */
2260 if (ipv4_is_loopback(daddr)) {
2261 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00002262 goto martian_destination;
Eric Dumazet9eb43e72012-08-03 21:27:25 +00002263 } else if (ipv4_is_loopback(saddr)) {
2264 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00002265 goto martian_source;
2266 }
2267
Linus Torvalds1da177e2005-04-16 15:20:36 -07002268 /*
2269 * Now we are ready to route packet.
2270 */
David S. Miller68a5e3d2011-03-11 20:07:33 -05002271 fl4.flowi4_oif = 0;
David Aherne0d56fd2016-09-10 12:09:57 -07002272 fl4.flowi4_iif = dev->ifindex;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002273 fl4.flowi4_mark = skb->mark;
2274 fl4.flowi4_tos = tos;
2275 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
David Ahernb84f7872015-09-29 19:07:07 -07002276 fl4.flowi4_flags = 0;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002277 fl4.daddr = daddr;
2278 fl4.saddr = saddr;
Julian Anastasov8bcfd092017-02-26 15:50:52 +02002279 fl4.flowi4_uid = sock_net_uid(net, NULL);
David Ahern1869e222020-09-13 12:43:39 -06002280 fl4.flowi4_multipath_hash = 0;
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002281
David Ahern5a847a62018-05-16 13:36:40 -07002282 if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) {
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002283 flkeys = &_flkeys;
David Ahern5a847a62018-05-16 13:36:40 -07002284 } else {
2285 fl4.flowi4_proto = 0;
2286 fl4.fl4_sport = 0;
2287 fl4.fl4_dport = 0;
2288 }
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002289
David Ahern5510cdf2017-05-25 10:42:34 -07002290 err = fib_lookup(net, &fl4, res, 0);
Duan Jiongcd0f0b92014-02-14 18:26:22 +08002291 if (err != 0) {
2292 if (!IN_DEV_FORWARD(in_dev))
2293 err = -EHOSTUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002294 goto no_route;
Duan Jiongcd0f0b92014-02-14 18:26:22 +08002295 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002296
Xin Long5cbf7772018-07-27 16:37:28 +08002297 if (res->type == RTN_BROADCAST) {
2298 if (IN_DEV_BFORWARD(in_dev))
2299 goto make_route;
Xin Long0a904782019-06-02 19:10:24 +08002300 /* not do cache if bc_forwarding is enabled */
2301 if (IPV4_DEVCONF_ALL(net, BC_FORWARDING))
2302 do_cache = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002303 goto brd_input;
Xin Long5cbf7772018-07-27 16:37:28 +08002304 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002305
David Ahern5510cdf2017-05-25 10:42:34 -07002306 if (res->type == RTN_LOCAL) {
Michael Smith5c04c812011-04-07 04:51:50 +00002307 err = fib_validate_source(skb, saddr, daddr, tos,
Cong Wang0d5edc62014-04-15 16:25:35 -07002308 0, dev, in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00002309 if (err < 0)
David Ahern0d753962015-09-28 11:10:44 -07002310 goto martian_source;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002311 goto local_input;
2312 }
2313
Duan Jiongcd0f0b92014-02-14 18:26:22 +08002314 if (!IN_DEV_FORWARD(in_dev)) {
2315 err = -EHOSTUNREACH;
David S. Miller251da412012-06-26 16:27:09 -07002316 goto no_route;
Duan Jiongcd0f0b92014-02-14 18:26:22 +08002317 }
David Ahern5510cdf2017-05-25 10:42:34 -07002318 if (res->type != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002319 goto martian_destination;
2320
Xin Long5cbf7772018-07-27 16:37:28 +08002321make_route:
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002322 err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002323out: return err;
2324
2325brd_input:
2326 if (skb->protocol != htons(ETH_P_IP))
2327 goto e_inval;
2328
David S. Miller41347dc2012-06-28 04:05:27 -07002329 if (!ipv4_is_zeronet(saddr)) {
David S. Miller9e56e382012-06-28 18:54:02 -07002330 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
2331 in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002332 if (err < 0)
David Ahern0d753962015-09-28 11:10:44 -07002333 goto martian_source;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002334 }
2335 flags |= RTCF_BROADCAST;
David Ahern5510cdf2017-05-25 10:42:34 -07002336 res->type = RTN_BROADCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002337 RT_CACHE_STAT_INC(in_brd);
2338
2339local_input:
Xin Long0a904782019-06-02 19:10:24 +08002340 do_cache &= res->fi && !itag;
2341 if (do_cache) {
2342 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
David Aherneba618a2019-04-02 14:11:55 -07002343
Xin Long0a904782019-06-02 19:10:24 +08002344 rth = rcu_dereference(nhc->nhc_rth_input);
2345 if (rt_cache_valid(rth)) {
2346 skb_dst_set_noref(skb, &rth->dst);
2347 err = 0;
2348 goto out;
David S. Millerd2d68ba92012-07-17 12:58:50 -07002349 }
2350 }
2351
David Ahernb87b04f2021-06-12 18:24:59 -06002352 rth = rt_dst_alloc(ip_rt_get_dev(net, res),
David Ahern5510cdf2017-05-25 10:42:34 -07002353 flags | RTCF_LOCAL, res->type,
Vincent Bernat62679a82020-11-07 20:35:15 +01002354 IN_DEV_ORCONF(in_dev, NOPOLICY), false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002355 if (!rth)
2356 goto e_nobufs;
2357
Changli Gaod8d1f302010-06-10 23:31:35 -07002358 rth->dst.output= ip_rt_bug;
David S. Millercf911662011-04-28 14:31:47 -07002359#ifdef CONFIG_IP_ROUTE_CLASSID
2360 rth->dst.tclassid = itag;
2361#endif
David S. Miller9917e1e82012-07-17 14:44:26 -07002362 rth->rt_is_input = 1;
Roopa Prabhu571e7222015-07-21 10:43:47 +02002363
Duan Jionga6254862014-02-17 15:23:43 +08002364 RT_CACHE_STAT_INC(in_slow_tot);
David Ahern5510cdf2017-05-25 10:42:34 -07002365 if (res->type == RTN_UNREACHABLE) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002366 rth->dst.input= ip_error;
2367 rth->dst.error= -err;
Shubhankar Kuranagatti6b9c8f42021-03-11 02:43:43 +05302368 rth->rt_flags &= ~RTCF_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002369 }
Thomas Grafefd85702016-11-30 17:10:09 +01002370
Alexei Starovoitovdcdfdf52013-11-19 19:12:34 -08002371 if (do_cache) {
David Aherneba618a2019-04-02 14:11:55 -07002372 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
Thomas Grafefd85702016-11-30 17:10:09 +01002373
David Aherneba618a2019-04-02 14:11:55 -07002374 rth->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);
Thomas Grafefd85702016-11-30 17:10:09 +01002375 if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
2376 WARN_ON(rth->dst.input == lwtunnel_input);
2377 rth->dst.lwtstate->orig_input = rth->dst.input;
2378 rth->dst.input = lwtunnel_input;
2379 }
2380
David Ahern87063a1f2019-04-30 07:45:49 -07002381 if (unlikely(!rt_cache_route(nhc, rth)))
Alexei Starovoitovdcdfdf52013-11-19 19:12:34 -08002382 rt_add_uncached_list(rth);
Alexei Starovoitovdcdfdf52013-11-19 19:12:34 -08002383 }
David S. Miller89aef892012-07-17 11:00:09 -07002384 skb_dst_set(skb, &rth->dst);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002385 err = 0;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002386 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002387
2388no_route:
2389 RT_CACHE_STAT_INC(in_no_route);
David Ahern5510cdf2017-05-25 10:42:34 -07002390 res->type = RTN_UNREACHABLE;
2391 res->fi = NULL;
2392 res->table = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002393 goto local_input;
2394
2395 /*
2396 * Do not cache martian addresses: they should be logged (RFC1812)
2397 */
2398martian_destination:
2399 RT_CACHE_STAT_INC(in_martian_dst);
2400#ifdef CONFIG_IP_ROUTE_VERBOSE
Joe Perchese87cc472012-05-13 21:56:26 +00002401 if (IN_DEV_LOG_MARTIANS(in_dev))
2402 net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
2403 &daddr, &saddr, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002404#endif
Dietmar Eggemann2c2910a2005-06-28 13:06:23 -07002405
Linus Torvalds1da177e2005-04-16 15:20:36 -07002406e_inval:
2407 err = -EINVAL;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002408 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002409
2410e_nobufs:
2411 err = -ENOBUFS;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002412 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002413
2414martian_source:
2415 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002416 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002417}
2418
David S. Millerc6cffba2012-07-26 11:14:38 +00002419int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2420 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002421{
David Ahern5510cdf2017-05-25 10:42:34 -07002422 struct fib_result res;
2423 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002424
Julian Anastasov6e280992017-02-26 17:14:35 +02002425 tos &= IPTOS_RT_MASK;
Eric Dumazet96d36222010-06-02 19:21:31 +00002426 rcu_read_lock();
David Ahern5510cdf2017-05-25 10:42:34 -07002427 err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res);
2428 rcu_read_unlock();
Eric Dumazet96d36222010-06-02 19:21:31 +00002429
David Ahern5510cdf2017-05-25 10:42:34 -07002430 return err;
2431}
2432EXPORT_SYMBOL(ip_route_input_noref);
2433
2434/* called with rcu_read_lock held */
2435int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2436 u8 tos, struct net_device *dev, struct fib_result *res)
2437{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002438 /* Multicast recognition logic is moved from route cache to here.
Shubhankar Kuranagatti6ad08602021-03-12 13:00:05 +05302439 * The problem was that too many Ethernet cards have broken/missing
2440 * hardware multicast filters :-( As result the host on multicasting
2441 * network acquires a lot of useless route cache entries, sort of
2442 * SDR messages from all the world. Now we try to get rid of them.
2443 * Really, provided software IP multicast filter is organized
2444 * reasonably (at least, hashed), it does not result in a slowdown
2445 * comparing with route cache reject entries.
2446 * Note, that multicast routers are not affected, because
2447 * route cache entry is created eventually.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002448 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08002449 if (ipv4_is_multicast(daddr)) {
Eric Dumazet96d36222010-06-02 19:21:31 +00002450 struct in_device *in_dev = __in_dev_get_rcu(dev);
David Aherne58e4152016-10-31 15:54:00 -07002451 int our = 0;
David Ahern5510cdf2017-05-25 10:42:34 -07002452 int err = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002453
Paolo Abeni22c74762019-03-06 10:42:53 +01002454 if (!in_dev)
2455 return err;
2456 our = ip_check_mc_rcu(in_dev, daddr, saddr,
2457 ip_hdr(skb)->protocol);
David Aherne58e4152016-10-31 15:54:00 -07002458
2459 /* check l3 master if no match yet */
Paolo Abeni22c74762019-03-06 10:42:53 +01002460 if (!our && netif_is_l3_slave(dev)) {
David Aherne58e4152016-10-31 15:54:00 -07002461 struct in_device *l3_in_dev;
2462
2463 l3_in_dev = __in_dev_get_rcu(skb->dev);
2464 if (l3_in_dev)
2465 our = ip_check_mc_rcu(l3_in_dev, daddr, saddr,
2466 ip_hdr(skb)->protocol);
2467 }
2468
David Aherne58e4152016-10-31 15:54:00 -07002469 if (our
Linus Torvalds1da177e2005-04-16 15:20:36 -07002470#ifdef CONFIG_IP_MROUTE
David Aherne58e4152016-10-31 15:54:00 -07002471 ||
2472 (!ipv4_is_local_multicast(daddr) &&
2473 IN_DEV_MFORWARD(in_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002474#endif
David Aherne58e4152016-10-31 15:54:00 -07002475 ) {
David Ahern5510cdf2017-05-25 10:42:34 -07002476 err = ip_route_input_mc(skb, daddr, saddr,
David Aherne58e4152016-10-31 15:54:00 -07002477 tos, dev, our);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002478 }
David Ahern5510cdf2017-05-25 10:42:34 -07002479 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002480 }
David Ahern5510cdf2017-05-25 10:42:34 -07002481
2482 return ip_route_input_slow(skb, daddr, saddr, tos, dev, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002483}
2484
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002485/* called with rcu_read_lock() */
David S. Miller982721f2011-02-16 21:44:24 -08002486static struct rtable *__mkroute_output(const struct fib_result *res,
David Miller1a00fee2012-07-01 02:02:56 +00002487 const struct flowi4 *fl4, int orig_oif,
Julian Anastasovf61759e2011-12-02 11:39:42 +00002488 struct net_device *dev_out,
David S. Miller5ada5522011-02-17 15:29:00 -08002489 unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002490{
David S. Miller982721f2011-02-16 21:44:24 -08002491 struct fib_info *fi = res->fi;
David S. Millerf2bb4be2012-07-17 12:20:47 -07002492 struct fib_nh_exception *fnhe;
David S. Miller5ada5522011-02-17 15:29:00 -08002493 struct in_device *in_dev;
David S. Miller982721f2011-02-16 21:44:24 -08002494 u16 type = res->type;
David S. Miller5ada5522011-02-17 15:29:00 -08002495 struct rtable *rth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00002496 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002497
Thomas Grafd0daebc32012-06-12 00:44:01 +00002498 in_dev = __in_dev_get_rcu(dev_out);
2499 if (!in_dev)
David S. Miller5ada5522011-02-17 15:29:00 -08002500 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002501
Thomas Grafd0daebc32012-06-12 00:44:01 +00002502 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
David Ahern5f02ce242016-09-10 12:09:54 -07002503 if (ipv4_is_loopback(fl4->saddr) &&
2504 !(dev_out->flags & IFF_LOOPBACK) &&
2505 !netif_is_l3_master(dev_out))
Thomas Grafd0daebc32012-06-12 00:44:01 +00002506 return ERR_PTR(-EINVAL);
2507
David S. Miller68a5e3d2011-03-11 20:07:33 -05002508 if (ipv4_is_lbcast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08002509 type = RTN_BROADCAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002510 else if (ipv4_is_multicast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08002511 type = RTN_MULTICAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002512 else if (ipv4_is_zeronet(fl4->daddr))
David S. Miller5ada5522011-02-17 15:29:00 -08002513 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002514
2515 if (dev_out->flags & IFF_LOOPBACK)
2516 flags |= RTCF_LOCAL;
2517
Julian Anastasov63617422012-11-22 23:04:14 +02002518 do_cache = true;
David S. Miller982721f2011-02-16 21:44:24 -08002519 if (type == RTN_BROADCAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002520 flags |= RTCF_BROADCAST | RTCF_LOCAL;
David S. Miller982721f2011-02-16 21:44:24 -08002521 fi = NULL;
2522 } else if (type == RTN_MULTICAST) {
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00002523 flags |= RTCF_MULTICAST | RTCF_LOCAL;
David S. Miller813b3b52011-04-28 14:48:42 -07002524 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
2525 fl4->flowi4_proto))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002526 flags &= ~RTCF_LOCAL;
Julian Anastasov63617422012-11-22 23:04:14 +02002527 else
2528 do_cache = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002529 /* If multicast route do not exist use
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00002530 * default one, but do not gateway in this case.
2531 * Yes, it is hack.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002532 */
David S. Miller982721f2011-02-16 21:44:24 -08002533 if (fi && res->prefixlen < 4)
2534 fi = NULL;
Chris Friesend6d5e992016-04-08 15:21:30 -06002535 } else if ((type == RTN_LOCAL) && (orig_oif != 0) &&
2536 (orig_oif != dev_out->ifindex)) {
2537 /* For local routes that require a particular output interface
2538 * we do not want to cache the result. Caching the result
2539 * causes incorrect behaviour when there are multiple source
2540 * addresses on the interface, the end result being that if the
2541 * intended recipient is waiting on that interface for the
2542 * packet he won't receive it because it will be delivered on
2543 * the loopback interface and the IP_PKTINFO ipi_ifindex will
2544 * be set to the loopback interface as well.
2545 */
Julian Anastasov94720e32018-05-02 09:41:19 +03002546 do_cache = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002547 }
2548
David S. Millerf2bb4be2012-07-17 12:20:47 -07002549 fnhe = NULL;
Julian Anastasov63617422012-11-22 23:04:14 +02002550 do_cache &= fi != NULL;
Julian Anastasov94720e32018-05-02 09:41:19 +03002551 if (fi) {
David Aherneba618a2019-04-02 14:11:55 -07002552 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
David S. Millerc5038a82012-07-31 15:02:02 -07002553 struct rtable __rcu **prth;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00002554
David Aherna5995e72019-04-30 07:45:50 -07002555 fnhe = find_exception(nhc, fl4->daddr);
Julian Anastasov94720e32018-05-02 09:41:19 +03002556 if (!do_cache)
2557 goto add;
Xin Longdeed49d2016-02-18 21:21:19 +08002558 if (fnhe) {
Timo Teräs2ffae992013-06-27 10:27:05 +03002559 prth = &fnhe->fnhe_rth_output;
Julian Anastasov94720e32018-05-02 09:41:19 +03002560 } else {
2561 if (unlikely(fl4->flowi4_flags &
2562 FLOWI_FLAG_KNOWN_NH &&
David Ahernbdf00462019-04-05 16:30:26 -07002563 !(nhc->nhc_gw_family &&
David Aherneba618a2019-04-02 14:11:55 -07002564 nhc->nhc_scope == RT_SCOPE_LINK))) {
Julian Anastasov94720e32018-05-02 09:41:19 +03002565 do_cache = false;
2566 goto add;
Julian Anastasovc92b9652012-10-08 11:41:19 +00002567 }
David Ahern0f457a32019-04-30 07:45:48 -07002568 prth = raw_cpu_ptr(nhc->nhc_pcpu_rth_output);
Julian Anastasovc92b9652012-10-08 11:41:19 +00002569 }
David S. Millerc5038a82012-07-31 15:02:02 -07002570 rth = rcu_dereference(*prth);
Wei Wang9df16ef2017-06-17 10:42:31 -07002571 if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst))
David S. Millerc5038a82012-07-31 15:02:02 -07002572 return rth;
David S. Millerf2bb4be2012-07-17 12:20:47 -07002573 }
Julian Anastasovc92b9652012-10-08 11:41:19 +00002574
2575add:
David Ahernd08c4f32015-09-02 13:58:34 -07002576 rth = rt_dst_alloc(dev_out, flags, type,
Vincent Bernat62679a82020-11-07 20:35:15 +01002577 IN_DEV_ORCONF(in_dev, NOPOLICY),
2578 IN_DEV_ORCONF(in_dev, NOXFRM));
Dimitris Michailidis8391d072010-10-07 14:48:38 +00002579 if (!rth)
David S. Miller5ada5522011-02-17 15:29:00 -08002580 return ERR_PTR(-ENOBUFS);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00002581
David Ahern9438c872017-08-11 17:02:02 -07002582 rth->rt_iif = orig_oif;
David Ahernb7503e02015-09-02 13:58:35 -07002583
Linus Torvalds1da177e2005-04-16 15:20:36 -07002584 RT_CACHE_STAT_INC(out_slow_tot);
2585
Linus Torvalds1da177e2005-04-16 15:20:36 -07002586 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002587 if (flags & RTCF_LOCAL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07002588 !(dev_out->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002589 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002590 RT_CACHE_STAT_INC(out_slow_mc);
2591 }
2592#ifdef CONFIG_IP_MROUTE
David S. Miller982721f2011-02-16 21:44:24 -08002593 if (type == RTN_MULTICAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002594 if (IN_DEV_MFORWARD(in_dev) &&
David S. Miller813b3b52011-04-28 14:48:42 -07002595 !ipv4_is_local_multicast(fl4->daddr)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002596 rth->dst.input = ip_mr_input;
2597 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002598 }
2599 }
2600#endif
2601 }
2602
Wei Wanga4c2fd72017-06-17 10:42:42 -07002603 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache);
David Ahern99428952018-02-13 20:32:04 -08002604 lwtunnel_set_redirect(&rth->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002605
David S. Miller5ada5522011-02-17 15:29:00 -08002606 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002607}
2608
Linus Torvalds1da177e2005-04-16 15:20:36 -07002609/*
2610 * Major route resolver routine.
2611 */
2612
David Ahern3abd1ade2017-05-25 10:42:33 -07002613struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
2614 const struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002615{
Julian Anastasovf61759e2011-12-02 11:39:42 +00002616 __u8 tos = RT_FL_TOS(fl4);
Eric Dumazetd0ea2b12018-04-07 13:42:42 -07002617 struct fib_result res = {
2618 .type = RTN_UNSPEC,
2619 .fi = NULL,
2620 .table = NULL,
2621 .tclassid = 0,
2622 };
David S. Miller5ada5522011-02-17 15:29:00 -08002623 struct rtable *rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002624
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002625 fl4->flowi4_iif = LOOPBACK_IFINDEX;
David S. Miller813b3b52011-04-28 14:48:42 -07002626 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
2627 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
2628 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
David S. Miller44713b62011-03-04 21:24:47 -08002629
David S. Miller010c2702011-02-17 15:37:09 -08002630 rcu_read_lock();
David Ahern3abd1ade2017-05-25 10:42:33 -07002631 rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
2632 rcu_read_unlock();
2633
2634 return rth;
2635}
2636EXPORT_SYMBOL_GPL(ip_route_output_key_hash);
2637
2638struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
2639 struct fib_result *res,
2640 const struct sk_buff *skb)
2641{
2642 struct net_device *dev_out = NULL;
2643 int orig_oif = fl4->flowi4_oif;
2644 unsigned int flags = 0;
2645 struct rtable *rth;
Stefano Brivio595e0652019-10-16 20:52:09 +02002646 int err;
David Ahern3abd1ade2017-05-25 10:42:33 -07002647
David S. Miller813b3b52011-04-28 14:48:42 -07002648 if (fl4->saddr) {
David S. Miller813b3b52011-04-28 14:48:42 -07002649 if (ipv4_is_multicast(fl4->saddr) ||
2650 ipv4_is_lbcast(fl4->saddr) ||
Stefano Brivio595e0652019-10-16 20:52:09 +02002651 ipv4_is_zeronet(fl4->saddr)) {
2652 rth = ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002653 goto out;
Stefano Brivio595e0652019-10-16 20:52:09 +02002654 }
2655
2656 rth = ERR_PTR(-ENETUNREACH);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002657
Linus Torvalds1da177e2005-04-16 15:20:36 -07002658 /* I removed check for oif == dev_out->oif here.
Shubhankar Kuranagatti6ad08602021-03-12 13:00:05 +05302659 * It was wrong for two reasons:
2660 * 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
2661 * is assigned to multiple interfaces.
2662 * 2. Moreover, we are allowed to send packets with saddr
2663 * of another iface. --ANK
Linus Torvalds1da177e2005-04-16 15:20:36 -07002664 */
2665
David S. Miller813b3b52011-04-28 14:48:42 -07002666 if (fl4->flowi4_oif == 0 &&
2667 (ipv4_is_multicast(fl4->daddr) ||
2668 ipv4_is_lbcast(fl4->daddr))) {
Julian Anastasova210d012008-10-01 07:28:28 -07002669 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07002670 dev_out = __ip_dev_find(net, fl4->saddr, false);
Ian Morris51456b22015-04-03 09:17:26 +01002671 if (!dev_out)
Julian Anastasova210d012008-10-01 07:28:28 -07002672 goto out;
2673
Linus Torvalds1da177e2005-04-16 15:20:36 -07002674 /* Special hack: user can direct multicasts
Shubhankar Kuranagatti6ad08602021-03-12 13:00:05 +05302675 * and limited broadcast via necessary interface
2676 * without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
2677 * This hack is not just for fun, it allows
2678 * vic,vat and friends to work.
2679 * They bind socket to loopback, set ttl to zero
2680 * and expect that it will work.
2681 * From the viewpoint of routing cache they are broken,
2682 * because we are not allowed to build multicast path
2683 * with loopback source addr (look, routing cache
2684 * cannot know, that ttl is zero, so that packet
2685 * will not leave this host and route is valid).
2686 * Luckily, this hack is good workaround.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002687 */
2688
David S. Miller813b3b52011-04-28 14:48:42 -07002689 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002690 goto make_route;
2691 }
Julian Anastasova210d012008-10-01 07:28:28 -07002692
David S. Miller813b3b52011-04-28 14:48:42 -07002693 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
Julian Anastasova210d012008-10-01 07:28:28 -07002694 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07002695 if (!__ip_dev_find(net, fl4->saddr, false))
Julian Anastasova210d012008-10-01 07:28:28 -07002696 goto out;
Julian Anastasova210d012008-10-01 07:28:28 -07002697 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002698 }
2699
2700
David S. Miller813b3b52011-04-28 14:48:42 -07002701 if (fl4->flowi4_oif) {
2702 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002703 rth = ERR_PTR(-ENODEV);
Ian Morris51456b22015-04-03 09:17:26 +01002704 if (!dev_out)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002705 goto out;
Herbert Xue5ed6392005-10-03 14:35:55 -07002706
2707 /* RACE: Check return value of inet_select_addr instead. */
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002708 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08002709 rth = ERR_PTR(-ENETUNREACH);
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002710 goto out;
2711 }
David S. Miller813b3b52011-04-28 14:48:42 -07002712 if (ipv4_is_local_multicast(fl4->daddr) ||
Andrew Lunn6a211652015-05-01 16:39:54 +02002713 ipv4_is_lbcast(fl4->daddr) ||
2714 fl4->flowi4_proto == IPPROTO_IGMP) {
David S. Miller813b3b52011-04-28 14:48:42 -07002715 if (!fl4->saddr)
2716 fl4->saddr = inet_select_addr(dev_out, 0,
2717 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002718 goto make_route;
2719 }
Jiri Benc0a7e2262013-10-04 17:04:48 +02002720 if (!fl4->saddr) {
David S. Miller813b3b52011-04-28 14:48:42 -07002721 if (ipv4_is_multicast(fl4->daddr))
2722 fl4->saddr = inet_select_addr(dev_out, 0,
2723 fl4->flowi4_scope);
2724 else if (!fl4->daddr)
2725 fl4->saddr = inet_select_addr(dev_out, 0,
2726 RT_SCOPE_HOST);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002727 }
2728 }
2729
David S. Miller813b3b52011-04-28 14:48:42 -07002730 if (!fl4->daddr) {
2731 fl4->daddr = fl4->saddr;
2732 if (!fl4->daddr)
2733 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
Denis V. Lunevb40afd02008-01-22 22:06:19 -08002734 dev_out = net->loopback_dev;
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002735 fl4->flowi4_oif = LOOPBACK_IFINDEX;
David Ahern3abd1ade2017-05-25 10:42:33 -07002736 res->type = RTN_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002737 flags |= RTCF_LOCAL;
2738 goto make_route;
2739 }
2740
David Ahern3abd1ade2017-05-25 10:42:33 -07002741 err = fib_lookup(net, fl4, res, 0);
Nikola Forró0315e382015-09-17 16:01:32 +02002742 if (err) {
David Ahern3abd1ade2017-05-25 10:42:33 -07002743 res->fi = NULL;
2744 res->table = NULL;
David Ahern6104e112016-10-12 13:20:11 -07002745 if (fl4->flowi4_oif &&
David Aherne58e4152016-10-31 15:54:00 -07002746 (ipv4_is_multicast(fl4->daddr) ||
2747 !netif_index_is_l3_master(net, fl4->flowi4_oif))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002748 /* Apparently, routing tables are wrong. Assume,
Shubhankar Kuranagatti6ad08602021-03-12 13:00:05 +05302749 * that the destination is on link.
2750 *
2751 * WHY? DW.
2752 * Because we are allowed to send to iface
2753 * even if it has NO routes and NO assigned
2754 * addresses. When oif is specified, routing
2755 * tables are looked up with only one purpose:
2756 * to catch if destination is gatewayed, rather than
2757 * direct. Moreover, if MSG_DONTROUTE is set,
2758 * we send packet, ignoring both routing tables
2759 * and ifaddr state. --ANK
2760 *
2761 *
2762 * We could make it even if oif is unknown,
2763 * likely IPv6, but we do not.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002764 */
2765
David S. Miller813b3b52011-04-28 14:48:42 -07002766 if (fl4->saddr == 0)
2767 fl4->saddr = inet_select_addr(dev_out, 0,
2768 RT_SCOPE_LINK);
David Ahern3abd1ade2017-05-25 10:42:33 -07002769 res->type = RTN_UNICAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002770 goto make_route;
2771 }
Nikola Forró0315e382015-09-17 16:01:32 +02002772 rth = ERR_PTR(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002773 goto out;
2774 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002775
David Ahern3abd1ade2017-05-25 10:42:33 -07002776 if (res->type == RTN_LOCAL) {
David S. Miller813b3b52011-04-28 14:48:42 -07002777 if (!fl4->saddr) {
David Ahern3abd1ade2017-05-25 10:42:33 -07002778 if (res->fi->fib_prefsrc)
2779 fl4->saddr = res->fi->fib_prefsrc;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002780 else
David S. Miller813b3b52011-04-28 14:48:42 -07002781 fl4->saddr = fl4->daddr;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002782 }
David Ahern5f02ce242016-09-10 12:09:54 -07002783
2784 /* L3 master device is the loopback for that domain */
David Ahern3abd1ade2017-05-25 10:42:33 -07002785 dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? :
Robert Shearmanb7c84872017-04-21 21:34:59 +01002786 net->loopback_dev;
David Ahern839da4d2017-08-10 13:49:10 -07002787
2788 /* make sure orig_oif points to fib result device even
2789 * though packet rx/tx happens over loopback or l3mdev
2790 */
2791 orig_oif = FIB_RES_OIF(*res);
2792
David S. Miller813b3b52011-04-28 14:48:42 -07002793 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002794 flags |= RTCF_LOCAL;
2795 goto make_route;
2796 }
2797
David Ahern3abd1ade2017-05-25 10:42:33 -07002798 fib_select_path(net, res, fl4, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002799
David Ahern3abd1ade2017-05-25 10:42:33 -07002800 dev_out = FIB_RES_DEV(*res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002801
2802make_route:
David Ahern3abd1ade2017-05-25 10:42:33 -07002803 rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002804
David S. Miller010c2702011-02-17 15:37:09 -08002805out:
David S. Millerb23dd4f2011-03-02 14:31:35 -08002806 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002807}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002808
David S. Miller14e50e52007-05-24 18:17:54 -07002809static struct dst_ops ipv4_dst_blackhole_ops = {
Daniel Borkmannc4c877b2021-03-10 01:38:09 +01002810 .family = AF_INET,
2811 .default_advmss = ipv4_default_advmss,
2812 .neigh_lookup = ipv4_neigh_lookup,
2813 .check = dst_blackhole_check,
2814 .cow_metrics = dst_blackhole_cow_metrics,
2815 .update_pmtu = dst_blackhole_update_pmtu,
2816 .redirect = dst_blackhole_redirect,
2817 .mtu = dst_blackhole_mtu,
David S. Miller14e50e52007-05-24 18:17:54 -07002818};
2819
David S. Miller2774c132011-03-01 14:59:04 -08002820struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -07002821{
David S. Miller2774c132011-03-01 14:59:04 -08002822 struct rtable *ort = (struct rtable *) dst_orig;
David S. Millerf5b0a872012-07-19 12:31:33 -07002823 struct rtable *rt;
David S. Miller14e50e52007-05-24 18:17:54 -07002824
Steffen Klassert6c0e7282017-10-09 08:43:55 +02002825 rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0);
David S. Miller14e50e52007-05-24 18:17:54 -07002826 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002827 struct dst_entry *new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -07002828
David S. Miller14e50e52007-05-24 18:17:54 -07002829 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -08002830 new->input = dst_discard;
Eric W. Biedermanede20592015-10-07 16:48:47 -05002831 new->output = dst_discard_out;
David S. Miller14e50e52007-05-24 18:17:54 -07002832
Wei Wang1dbe32522017-06-17 10:42:26 -07002833 new->dev = net->loopback_dev;
David S. Miller14e50e52007-05-24 18:17:54 -07002834 if (new->dev)
2835 dev_hold(new->dev);
2836
David S. Miller9917e1e82012-07-17 14:44:26 -07002837 rt->rt_is_input = ort->rt_is_input;
David S. Miller5e2b61f2011-03-04 21:47:09 -08002838 rt->rt_iif = ort->rt_iif;
David S. Miller59436342012-07-10 06:58:42 -07002839 rt->rt_pmtu = ort->rt_pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01002840 rt->rt_mtu_locked = ort->rt_mtu_locked;
David S. Miller14e50e52007-05-24 18:17:54 -07002841
fan.duca4c3fc2013-07-30 08:33:53 +08002842 rt->rt_genid = rt_genid_ipv4(net);
David S. Miller14e50e52007-05-24 18:17:54 -07002843 rt->rt_flags = ort->rt_flags;
2844 rt->rt_type = ort->rt_type;
David Ahern77d5bc72019-09-17 10:39:49 -07002845 rt->rt_uses_gateway = ort->rt_uses_gateway;
David Ahern1550c172019-04-05 16:30:27 -07002846 rt->rt_gw_family = ort->rt_gw_family;
2847 if (rt->rt_gw_family == AF_INET)
2848 rt->rt_gw4 = ort->rt_gw4;
David Ahern0f5f7d72019-04-05 16:30:29 -07002849 else if (rt->rt_gw_family == AF_INET6)
2850 rt->rt_gw6 = ort->rt_gw6;
David S. Miller14e50e52007-05-24 18:17:54 -07002851
David S. Millercaacf052012-07-31 15:06:50 -07002852 INIT_LIST_HEAD(&rt->rt_uncached);
David S. Miller14e50e52007-05-24 18:17:54 -07002853 }
2854
David S. Miller2774c132011-03-01 14:59:04 -08002855 dst_release(dst_orig);
2856
2857 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -07002858}
2859
David S. Miller9d6ec932011-03-12 01:12:47 -05002860struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
Eric Dumazet6f9c9612015-09-25 07:39:10 -07002861 const struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002862{
David S. Miller9d6ec932011-03-12 01:12:47 -05002863 struct rtable *rt = __ip_route_output_key(net, flp4);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002864
David S. Millerb23dd4f2011-03-02 14:31:35 -08002865 if (IS_ERR(rt))
2866 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002867
David Ahern874fb9e2020-10-09 11:01:01 -07002868 if (flp4->flowi4_proto) {
2869 flp4->flowi4_oif = rt->dst.dev->ifindex;
Steffen Klassertf92ee612014-09-16 10:08:40 +02002870 rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
2871 flowi4_to_flowi(flp4),
2872 sk, 0);
David Ahern874fb9e2020-10-09 11:01:01 -07002873 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002874
David S. Millerb23dd4f2011-03-02 14:31:35 -08002875 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002876}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002877EXPORT_SYMBOL_GPL(ip_route_output_flow);
2878
Martin Varghese571912c2020-02-24 10:57:50 +05302879struct rtable *ip_route_output_tunnel(struct sk_buff *skb,
2880 struct net_device *dev,
2881 struct net *net, __be32 *saddr,
2882 const struct ip_tunnel_info *info,
2883 u8 protocol, bool use_cache)
2884{
2885#ifdef CONFIG_DST_CACHE
2886 struct dst_cache *dst_cache;
2887#endif
2888 struct rtable *rt = NULL;
2889 struct flowi4 fl4;
2890 __u8 tos;
2891
2892#ifdef CONFIG_DST_CACHE
2893 dst_cache = (struct dst_cache *)&info->dst_cache;
2894 if (use_cache) {
2895 rt = dst_cache_get_ip4(dst_cache, saddr);
2896 if (rt)
2897 return rt;
2898 }
2899#endif
2900 memset(&fl4, 0, sizeof(fl4));
2901 fl4.flowi4_mark = skb->mark;
2902 fl4.flowi4_proto = protocol;
2903 fl4.daddr = info->key.u.ipv4.dst;
2904 fl4.saddr = info->key.u.ipv4.src;
2905 tos = info->key.tos;
2906 fl4.flowi4_tos = RT_TOS(tos);
2907
2908 rt = ip_route_output_key(net, &fl4);
2909 if (IS_ERR(rt)) {
2910 netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr);
2911 return ERR_PTR(-ENETUNREACH);
2912 }
2913 if (rt->dst.dev == dev) { /* is this necessary? */
2914 netdev_dbg(dev, "circular route to %pI4\n", &fl4.daddr);
2915 ip_rt_put(rt);
2916 return ERR_PTR(-ELOOP);
2917 }
2918#ifdef CONFIG_DST_CACHE
2919 if (use_cache)
2920 dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr);
2921#endif
2922 *saddr = fl4.saddr;
2923 return rt;
2924}
2925EXPORT_SYMBOL_GPL(ip_route_output_tunnel);
2926
David Ahern3765d352017-05-25 10:42:36 -07002927/* called with rcu_read_lock held */
Roopa Prabhu404eb772018-05-22 14:03:27 -07002928static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
2929 struct rtable *rt, u32 table_id, struct flowi4 *fl4,
John Fastabende93fb3e2019-08-23 17:11:38 -07002930 struct sk_buff *skb, u32 portid, u32 seq,
2931 unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002932{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002933 struct rtmsg *r;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002934 struct nlmsghdr *nlh;
Steffen Klassert2bc8ca42011-10-11 01:12:02 +00002935 unsigned long expires = 0;
David S. Millerf1850712012-07-10 07:26:01 -07002936 u32 error;
Julian Anastasov521f5492012-07-20 12:02:08 +03002937 u32 metrics[RTAX_MAX];
Thomas Grafbe403ea2006-08-17 18:15:17 -07002938
John Fastabende93fb3e2019-08-23 17:11:38 -07002939 nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), flags);
Ian Morris51456b22015-04-03 09:17:26 +01002940 if (!nlh)
Patrick McHardy26932562007-01-31 23:16:40 -08002941 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002942
2943 r = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002944 r->rtm_family = AF_INET;
2945 r->rtm_dst_len = 32;
2946 r->rtm_src_len = 0;
Stefano Briviod9489742019-06-21 17:45:22 +02002947 r->rtm_tos = fl4 ? fl4->flowi4_tos : 0;
David Ahern8a430ed2017-01-11 15:42:17 -08002948 r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT;
David Ahernc36ba662015-09-02 13:58:36 -07002949 if (nla_put_u32(skb, RTA_TABLE, table_id))
David S. Millerf3756b72012-04-01 20:39:02 -04002950 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002951 r->rtm_type = rt->rt_type;
2952 r->rtm_scope = RT_SCOPE_UNIVERSE;
2953 r->rtm_protocol = RTPROT_UNSPEC;
2954 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2955 if (rt->rt_flags & RTCF_NOTIFY)
2956 r->rtm_flags |= RTM_F_NOTIFY;
Hannes Frederic Sowadf4d9252015-01-23 12:01:26 +01002957 if (IPCB(skb)->flags & IPSKB_DOREDIRECT)
2958 r->rtm_flags |= RTCF_DOREDIRECT;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002959
Jiri Benc930345e2015-03-29 16:59:25 +02002960 if (nla_put_in_addr(skb, RTA_DST, dst))
David S. Millerf3756b72012-04-01 20:39:02 -04002961 goto nla_put_failure;
David Miller1a00fee2012-07-01 02:02:56 +00002962 if (src) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002963 r->rtm_src_len = 32;
Jiri Benc930345e2015-03-29 16:59:25 +02002964 if (nla_put_in_addr(skb, RTA_SRC, src))
David S. Millerf3756b72012-04-01 20:39:02 -04002965 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002966 }
David S. Millerf3756b72012-04-01 20:39:02 -04002967 if (rt->dst.dev &&
2968 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2969 goto nla_put_failure;
Oliver Hermsae8cb932020-11-13 09:55:17 +01002970 if (rt->dst.lwtstate &&
2971 lwtunnel_fill_encap(skb, rt->dst.lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0)
2972 goto nla_put_failure;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002973#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf3756b72012-04-01 20:39:02 -04002974 if (rt->dst.tclassid &&
2975 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2976 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002977#endif
Stefano Briviod9489742019-06-21 17:45:22 +02002978 if (fl4 && !rt_is_input_route(rt) &&
David Millerd6c0a4f2012-07-01 02:02:59 +00002979 fl4->saddr != src) {
Jiri Benc930345e2015-03-29 16:59:25 +02002980 if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
David S. Millerf3756b72012-04-01 20:39:02 -04002981 goto nla_put_failure;
2982 }
David Ahern77d5bc72019-09-17 10:39:49 -07002983 if (rt->rt_uses_gateway) {
2984 if (rt->rt_gw_family == AF_INET &&
2985 nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) {
David Ahern0f5f7d72019-04-05 16:30:29 -07002986 goto nla_put_failure;
David Ahern77d5bc72019-09-17 10:39:49 -07002987 } else if (rt->rt_gw_family == AF_INET6) {
2988 int alen = sizeof(struct in6_addr);
2989 struct nlattr *nla;
2990 struct rtvia *via;
David Ahern0f5f7d72019-04-05 16:30:29 -07002991
David Ahern77d5bc72019-09-17 10:39:49 -07002992 nla = nla_reserve(skb, RTA_VIA, alen + 2);
2993 if (!nla)
2994 goto nla_put_failure;
2995
2996 via = nla_data(nla);
2997 via->rtvia_family = AF_INET6;
2998 memcpy(via->rtvia_addr, &rt->rt_gw6, alen);
2999 }
David Ahern0f5f7d72019-04-05 16:30:29 -07003000 }
Thomas Grafbe403ea2006-08-17 18:15:17 -07003001
Steffen Klassertee9a8f72012-10-08 00:56:54 +00003002 expires = rt->dst.expires;
3003 if (expires) {
3004 unsigned long now = jiffies;
3005
3006 if (time_before(now, expires))
3007 expires -= now;
3008 else
3009 expires = 0;
3010 }
3011
Julian Anastasov521f5492012-07-20 12:02:08 +03003012 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
Steffen Klassertee9a8f72012-10-08 00:56:54 +00003013 if (rt->rt_pmtu && expires)
Julian Anastasov521f5492012-07-20 12:02:08 +03003014 metrics[RTAX_MTU - 1] = rt->rt_pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01003015 if (rt->rt_mtu_locked && expires)
3016 metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU);
Julian Anastasov521f5492012-07-20 12:02:08 +03003017 if (rtnetlink_put_metrics(skb, metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -07003018 goto nla_put_failure;
3019
Stefano Briviod9489742019-06-21 17:45:22 +02003020 if (fl4) {
3021 if (fl4->flowi4_mark &&
3022 nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
3023 goto nla_put_failure;
Eric Dumazet963bfee2010-07-20 22:03:14 +00003024
Stefano Briviod9489742019-06-21 17:45:22 +02003025 if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
3026 nla_put_u32(skb, RTA_UID,
3027 from_kuid_munged(current_user_ns(),
3028 fl4->flowi4_uid)))
3029 goto nla_put_failure;
3030
3031 if (rt_is_input_route(rt)) {
3032#ifdef CONFIG_IP_MROUTE
3033 if (ipv4_is_multicast(dst) &&
3034 !ipv4_is_local_multicast(dst) &&
3035 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
3036 int err = ipmr_get_route(net, skb,
3037 fl4->saddr, fl4->daddr,
3038 r, portid);
3039
3040 if (err <= 0) {
3041 if (err == 0)
3042 return 0;
3043 goto nla_put_failure;
3044 }
3045 } else
3046#endif
3047 if (nla_put_u32(skb, RTA_IIF, fl4->flowi4_iif))
3048 goto nla_put_failure;
3049 }
3050 }
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09003051
Changli Gaod8d1f302010-06-10 23:31:35 -07003052 error = rt->dst.error;
Thomas Grafbe403ea2006-08-17 18:15:17 -07003053
David S. Millerf1850712012-07-10 07:26:01 -07003054 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08003055 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003056
Johannes Berg053c0952015-01-16 22:09:00 +01003057 nlmsg_end(skb, nlh);
3058 return 0;
Thomas Grafbe403ea2006-08-17 18:15:17 -07003059
3060nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08003061 nlmsg_cancel(skb, nlh);
3062 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003063}
3064
Stefano Brivioee289062019-06-21 17:45:23 +02003065static int fnhe_dump_bucket(struct net *net, struct sk_buff *skb,
3066 struct netlink_callback *cb, u32 table_id,
3067 struct fnhe_hash_bucket *bucket, int genid,
John Fastabende93fb3e2019-08-23 17:11:38 -07003068 int *fa_index, int fa_start, unsigned int flags)
Stefano Brivioee289062019-06-21 17:45:23 +02003069{
3070 int i;
3071
3072 for (i = 0; i < FNHE_HASH_SIZE; i++) {
3073 struct fib_nh_exception *fnhe;
3074
3075 for (fnhe = rcu_dereference(bucket[i].chain); fnhe;
3076 fnhe = rcu_dereference(fnhe->fnhe_next)) {
3077 struct rtable *rt;
3078 int err;
3079
3080 if (*fa_index < fa_start)
3081 goto next;
3082
3083 if (fnhe->fnhe_genid != genid)
3084 goto next;
3085
3086 if (fnhe->fnhe_expires &&
3087 time_after(jiffies, fnhe->fnhe_expires))
3088 goto next;
3089
3090 rt = rcu_dereference(fnhe->fnhe_rth_input);
3091 if (!rt)
3092 rt = rcu_dereference(fnhe->fnhe_rth_output);
3093 if (!rt)
3094 goto next;
3095
3096 err = rt_fill_info(net, fnhe->fnhe_daddr, 0, rt,
3097 table_id, NULL, skb,
3098 NETLINK_CB(cb->skb).portid,
John Fastabende93fb3e2019-08-23 17:11:38 -07003099 cb->nlh->nlmsg_seq, flags);
Stefano Brivioee289062019-06-21 17:45:23 +02003100 if (err)
3101 return err;
3102next:
3103 (*fa_index)++;
3104 }
3105 }
3106
3107 return 0;
3108}
3109
3110int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb,
3111 u32 table_id, struct fib_info *fi,
John Fastabende93fb3e2019-08-23 17:11:38 -07003112 int *fa_index, int fa_start, unsigned int flags)
Stefano Brivioee289062019-06-21 17:45:23 +02003113{
3114 struct net *net = sock_net(cb->skb->sk);
3115 int nhsel, genid = fnhe_genid(net);
3116
3117 for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) {
3118 struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel);
3119 struct fnhe_hash_bucket *bucket;
3120 int err;
3121
3122 if (nhc->nhc_flags & RTNH_F_DEAD)
3123 continue;
3124
Eric Dumazet93ed54b2019-06-26 03:04:50 -07003125 rcu_read_lock();
Stefano Brivioee289062019-06-21 17:45:23 +02003126 bucket = rcu_dereference(nhc->nhc_exceptions);
Eric Dumazet93ed54b2019-06-26 03:04:50 -07003127 err = 0;
3128 if (bucket)
3129 err = fnhe_dump_bucket(net, skb, cb, table_id, bucket,
John Fastabende93fb3e2019-08-23 17:11:38 -07003130 genid, fa_index, fa_start,
3131 flags);
Eric Dumazet93ed54b2019-06-26 03:04:50 -07003132 rcu_read_unlock();
Stefano Brivioee289062019-06-21 17:45:23 +02003133 if (err)
3134 return err;
3135 }
3136
3137 return 0;
3138}
3139
Roopa Prabhu404eb772018-05-22 14:03:27 -07003140static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst,
3141 u8 ip_proto, __be16 sport,
3142 __be16 dport)
3143{
3144 struct sk_buff *skb;
3145 struct iphdr *iph;
3146
3147 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3148 if (!skb)
3149 return NULL;
3150
3151 /* Reserve room for dummy headers, this skb can pass
3152 * through good chunk of routing engine.
3153 */
3154 skb_reset_mac_header(skb);
3155 skb_reset_network_header(skb);
3156 skb->protocol = htons(ETH_P_IP);
3157 iph = skb_put(skb, sizeof(struct iphdr));
3158 iph->protocol = ip_proto;
3159 iph->saddr = src;
3160 iph->daddr = dst;
3161 iph->version = 0x4;
3162 iph->frag_off = 0;
3163 iph->ihl = 0x5;
3164 skb_set_transport_header(skb, skb->len);
3165
3166 switch (iph->protocol) {
3167 case IPPROTO_UDP: {
3168 struct udphdr *udph;
3169
3170 udph = skb_put_zero(skb, sizeof(struct udphdr));
3171 udph->source = sport;
3172 udph->dest = dport;
3173 udph->len = sizeof(struct udphdr);
3174 udph->check = 0;
3175 break;
3176 }
3177 case IPPROTO_TCP: {
3178 struct tcphdr *tcph;
3179
3180 tcph = skb_put_zero(skb, sizeof(struct tcphdr));
3181 tcph->source = sport;
3182 tcph->dest = dport;
3183 tcph->doff = sizeof(struct tcphdr) / 4;
3184 tcph->rst = 1;
3185 tcph->check = ~tcp_v4_check(sizeof(struct tcphdr),
3186 src, dst, 0);
3187 break;
3188 }
3189 case IPPROTO_ICMP: {
3190 struct icmphdr *icmph;
3191
3192 icmph = skb_put_zero(skb, sizeof(struct icmphdr));
3193 icmph->type = ICMP_ECHO;
3194 icmph->code = 0;
3195 }
3196 }
3197
3198 return skb;
3199}
3200
Jakub Kicinskia00302b62019-01-18 10:46:19 -08003201static int inet_rtm_valid_getroute_req(struct sk_buff *skb,
3202 const struct nlmsghdr *nlh,
3203 struct nlattr **tb,
3204 struct netlink_ext_ack *extack)
3205{
3206 struct rtmsg *rtm;
3207 int i, err;
3208
3209 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
3210 NL_SET_ERR_MSG(extack,
3211 "ipv4: Invalid header for route get request");
3212 return -EINVAL;
3213 }
3214
3215 if (!netlink_strict_get_check(skb))
Johannes Berg8cb08172019-04-26 14:07:28 +02003216 return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
3217 rtm_ipv4_policy, extack);
Jakub Kicinskia00302b62019-01-18 10:46:19 -08003218
3219 rtm = nlmsg_data(nlh);
3220 if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) ||
3221 (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) ||
3222 rtm->rtm_table || rtm->rtm_protocol ||
3223 rtm->rtm_scope || rtm->rtm_type) {
3224 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for route get request");
3225 return -EINVAL;
3226 }
3227
3228 if (rtm->rtm_flags & ~(RTM_F_NOTIFY |
3229 RTM_F_LOOKUP_TABLE |
3230 RTM_F_FIB_MATCH)) {
3231 NL_SET_ERR_MSG(extack, "ipv4: Unsupported rtm_flags for route get request");
3232 return -EINVAL;
3233 }
3234
Johannes Berg8cb08172019-04-26 14:07:28 +02003235 err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
3236 rtm_ipv4_policy, extack);
Jakub Kicinskia00302b62019-01-18 10:46:19 -08003237 if (err)
3238 return err;
3239
3240 if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
3241 (tb[RTA_DST] && !rtm->rtm_dst_len)) {
3242 NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4");
3243 return -EINVAL;
3244 }
3245
3246 for (i = 0; i <= RTA_MAX; i++) {
3247 if (!tb[i])
3248 continue;
3249
3250 switch (i) {
3251 case RTA_IIF:
3252 case RTA_OIF:
3253 case RTA_SRC:
3254 case RTA_DST:
3255 case RTA_IP_PROTO:
3256 case RTA_SPORT:
3257 case RTA_DPORT:
3258 case RTA_MARK:
3259 case RTA_UID:
3260 break;
3261 default:
3262 NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in route get request");
3263 return -EINVAL;
3264 }
3265 }
3266
3267 return 0;
3268}
3269
David Ahernc21ef3e2017-04-16 09:48:24 -07003270static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
3271 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003272{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09003273 struct net *net = sock_net(in_skb->sk);
Thomas Grafd889ce32006-08-17 18:15:44 -07003274 struct nlattr *tb[RTA_MAX+1];
Roopa Prabhu404eb772018-05-22 14:03:27 -07003275 u32 table_id = RT_TABLE_MAIN;
3276 __be16 sport = 0, dport = 0;
David Ahern3765d352017-05-25 10:42:36 -07003277 struct fib_result res = {};
Roopa Prabhu404eb772018-05-22 14:03:27 -07003278 u8 ip_proto = IPPROTO_UDP;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003279 struct rtable *rt = NULL;
Roopa Prabhu404eb772018-05-22 14:03:27 -07003280 struct sk_buff *skb;
3281 struct rtmsg *rtm;
Maciej Żenczykowskie8e3fbe2018-09-29 23:44:47 -07003282 struct flowi4 fl4 = {};
Al Viro9e12bb22006-09-26 21:25:20 -07003283 __be32 dst = 0;
3284 __be32 src = 0;
Roopa Prabhu404eb772018-05-22 14:03:27 -07003285 kuid_t uid;
Al Viro9e12bb22006-09-26 21:25:20 -07003286 u32 iif;
Thomas Grafd889ce32006-08-17 18:15:44 -07003287 int err;
Eric Dumazet963bfee2010-07-20 22:03:14 +00003288 int mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003289
Jakub Kicinskia00302b62019-01-18 10:46:19 -08003290 err = inet_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
Thomas Grafd889ce32006-08-17 18:15:44 -07003291 if (err < 0)
Roopa Prabhu404eb772018-05-22 14:03:27 -07003292 return err;
Thomas Grafd889ce32006-08-17 18:15:44 -07003293
3294 rtm = nlmsg_data(nlh);
Jiri Benc67b61f62015-03-29 16:59:26 +02003295 src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
3296 dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
Thomas Grafd889ce32006-08-17 18:15:44 -07003297 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
Eric Dumazet963bfee2010-07-20 22:03:14 +00003298 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09003299 if (tb[RTA_UID])
3300 uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID]));
3301 else
3302 uid = (iif ? INVALID_UID : current_uid());
Linus Torvalds1da177e2005-04-16 15:20:36 -07003303
Roopa Prabhu404eb772018-05-22 14:03:27 -07003304 if (tb[RTA_IP_PROTO]) {
3305 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
Hangbin Liu5e1a99e2019-02-27 16:15:29 +08003306 &ip_proto, AF_INET, extack);
Roopa Prabhu404eb772018-05-22 14:03:27 -07003307 if (err)
3308 return err;
3309 }
Florian Laryschbbadb9a2017-04-07 14:42:20 +02003310
Roopa Prabhu404eb772018-05-22 14:03:27 -07003311 if (tb[RTA_SPORT])
3312 sport = nla_get_be16(tb[RTA_SPORT]);
3313
3314 if (tb[RTA_DPORT])
3315 dport = nla_get_be16(tb[RTA_DPORT]);
3316
3317 skb = inet_rtm_getroute_build_skb(src, dst, ip_proto, sport, dport);
3318 if (!skb)
3319 return -ENOBUFS;
Florian Laryschbbadb9a2017-04-07 14:42:20 +02003320
David Millerd6c0a4f2012-07-01 02:02:59 +00003321 fl4.daddr = dst;
3322 fl4.saddr = src;
Guillaume Nault1ebf1792020-11-26 19:09:22 +01003323 fl4.flowi4_tos = rtm->rtm_tos & IPTOS_RT_MASK;
David Millerd6c0a4f2012-07-01 02:02:59 +00003324 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
3325 fl4.flowi4_mark = mark;
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09003326 fl4.flowi4_uid = uid;
Roopa Prabhu404eb772018-05-22 14:03:27 -07003327 if (sport)
3328 fl4.fl4_sport = sport;
3329 if (dport)
3330 fl4.fl4_dport = dport;
3331 fl4.flowi4_proto = ip_proto;
David Millerd6c0a4f2012-07-01 02:02:59 +00003332
David Ahern3765d352017-05-25 10:42:36 -07003333 rcu_read_lock();
3334
Linus Torvalds1da177e2005-04-16 15:20:36 -07003335 if (iif) {
Thomas Grafd889ce32006-08-17 18:15:44 -07003336 struct net_device *dev;
3337
David Ahern3765d352017-05-25 10:42:36 -07003338 dev = dev_get_by_index_rcu(net, iif);
Ian Morris51456b22015-04-03 09:17:26 +01003339 if (!dev) {
Thomas Grafd889ce32006-08-17 18:15:44 -07003340 err = -ENODEV;
Roopa Prabhu404eb772018-05-22 14:03:27 -07003341 goto errout_rcu;
Thomas Grafd889ce32006-08-17 18:15:44 -07003342 }
3343
Roopa Prabhu404eb772018-05-22 14:03:27 -07003344 fl4.flowi4_iif = iif; /* for rt_fill_info */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003345 skb->dev = dev;
Eric Dumazet963bfee2010-07-20 22:03:14 +00003346 skb->mark = mark;
Guillaume Nault1ebf1792020-11-26 19:09:22 +01003347 err = ip_route_input_rcu(skb, dst, src,
3348 rtm->rtm_tos & IPTOS_RT_MASK, dev,
3349 &res);
Thomas Grafd889ce32006-08-17 18:15:44 -07003350
Eric Dumazet511c3f92009-06-02 05:14:27 +00003351 rt = skb_rtable(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07003352 if (err == 0 && rt->dst.error)
3353 err = -rt->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003354 } else {
Lorenzo Colitti6503a302018-01-11 18:36:26 +09003355 fl4.flowi4_iif = LOOPBACK_IFINDEX;
Ido Schimmel21f94772018-12-20 17:03:27 +00003356 skb->dev = net->loopback_dev;
David Ahern3765d352017-05-25 10:42:36 -07003357 rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb);
David S. Millerb23dd4f2011-03-02 14:31:35 -08003358 err = 0;
3359 if (IS_ERR(rt))
3360 err = PTR_ERR(rt);
Florian Westphal2c87d632017-08-14 00:52:58 +02003361 else
3362 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003363 }
Thomas Grafd889ce32006-08-17 18:15:44 -07003364
Linus Torvalds1da177e2005-04-16 15:20:36 -07003365 if (err)
Roopa Prabhu404eb772018-05-22 14:03:27 -07003366 goto errout_rcu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003367
Linus Torvalds1da177e2005-04-16 15:20:36 -07003368 if (rtm->rtm_flags & RTM_F_NOTIFY)
3369 rt->rt_flags |= RTCF_NOTIFY;
3370
David Ahernc36ba662015-09-02 13:58:36 -07003371 if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
David Ahern68e813a2018-02-14 14:24:28 -08003372 table_id = res.table ? res.table->tb_id : 0;
David Ahernc36ba662015-09-02 13:58:36 -07003373
Roopa Prabhu404eb772018-05-22 14:03:27 -07003374 /* reset skb for netlink reply msg */
3375 skb_trim(skb, 0);
3376 skb_reset_network_header(skb);
3377 skb_reset_transport_header(skb);
3378 skb_reset_mac_header(skb);
3379
Roopa Prabhubc3aae22017-08-16 12:38:52 -07003380 if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
Ido Schimmel1e301fd2020-01-14 13:23:10 +02003381 struct fib_rt_info fri;
3382
Roopa Prabhubc3aae22017-08-16 12:38:52 -07003383 if (!res.fi) {
3384 err = fib_props[res.type].error;
3385 if (!err)
3386 err = -EHOSTUNREACH;
Roopa Prabhu404eb772018-05-22 14:03:27 -07003387 goto errout_rcu;
Roopa Prabhubc3aae22017-08-16 12:38:52 -07003388 }
Ido Schimmel1e301fd2020-01-14 13:23:10 +02003389 fri.fi = res.fi;
3390 fri.tb_id = table_id;
3391 fri.dst = res.prefix;
3392 fri.dst_len = res.prefixlen;
3393 fri.tos = fl4.flowi4_tos;
3394 fri.type = rt->rt_type;
Ido Schimmel90b93f12020-01-14 13:23:11 +02003395 fri.offload = 0;
3396 fri.trap = 0;
Amit Cohen36c51002021-02-07 10:22:50 +02003397 fri.offload_failed = 0;
Ido Schimmel90b93f12020-01-14 13:23:11 +02003398 if (res.fa_head) {
3399 struct fib_alias *fa;
3400
3401 hlist_for_each_entry_rcu(fa, res.fa_head, fa_list) {
3402 u8 slen = 32 - fri.dst_len;
3403
3404 if (fa->fa_slen == slen &&
3405 fa->tb_id == fri.tb_id &&
3406 fa->fa_tos == fri.tos &&
3407 fa->fa_info == res.fi &&
3408 fa->fa_type == fri.type) {
3409 fri.offload = fa->offload;
3410 fri.trap = fa->trap;
3411 break;
3412 }
3413 }
3414 }
Roopa Prabhub6179812017-05-25 10:42:39 -07003415 err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
Ido Schimmel1e301fd2020-01-14 13:23:10 +02003416 nlh->nlmsg_seq, RTM_NEWROUTE, &fri, 0);
Roopa Prabhubc3aae22017-08-16 12:38:52 -07003417 } else {
Roopa Prabhu404eb772018-05-22 14:03:27 -07003418 err = rt_fill_info(net, dst, src, rt, table_id, &fl4, skb,
John Fastabende93fb3e2019-08-23 17:11:38 -07003419 NETLINK_CB(in_skb).portid,
3420 nlh->nlmsg_seq, 0);
Roopa Prabhubc3aae22017-08-16 12:38:52 -07003421 }
David S. Miller7b46a642015-01-18 23:36:08 -05003422 if (err < 0)
Roopa Prabhu404eb772018-05-22 14:03:27 -07003423 goto errout_rcu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003424
David Ahern3765d352017-05-25 10:42:36 -07003425 rcu_read_unlock();
3426
Eric W. Biederman15e47302012-09-07 20:12:54 +00003427 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003428
Thomas Grafd889ce32006-08-17 18:15:44 -07003429errout_free:
Roopa Prabhu404eb772018-05-22 14:03:27 -07003430 return err;
3431errout_rcu:
David Ahern3765d352017-05-25 10:42:36 -07003432 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003433 kfree_skb(skb);
Roopa Prabhu404eb772018-05-22 14:03:27 -07003434 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003435}
3436
Linus Torvalds1da177e2005-04-16 15:20:36 -07003437void ip_rt_multicast_event(struct in_device *in_dev)
3438{
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00003439 rt_cache_flush(dev_net(in_dev->dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07003440}
3441
3442#ifdef CONFIG_SYSCTL
Gao feng082c7ca2013-02-19 00:43:12 +00003443static int ip_rt_gc_interval __read_mostly = 60 * HZ;
3444static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
3445static int ip_rt_gc_elasticity __read_mostly = 8;
Arnd Bergmann773daa32018-02-28 14:32:48 +01003446static int ip_min_valid_pmtu __read_mostly = IPV4_MIN_MTU;
Gao feng082c7ca2013-02-19 00:43:12 +00003447
Joe Perchesfe2c6332013-06-11 23:04:25 -07003448static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
Christoph Hellwig32927392020-04-24 08:43:38 +02003449 void *buffer, size_t *lenp, loff_t *ppos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003450{
Timo Teräs5aad1de2013-05-27 20:46:33 +00003451 struct net *net = (struct net *)__ctl->extra1;
3452
Linus Torvalds1da177e2005-04-16 15:20:36 -07003453 if (write) {
Timo Teräs5aad1de2013-05-27 20:46:33 +00003454 rt_cache_flush(net);
3455 fnhe_genid_bump(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003456 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09003457 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003458
3459 return -EINVAL;
3460}
3461
Joe Perchesfe2c6332013-06-11 23:04:25 -07003462static struct ctl_table ipv4_route_table[] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09003463 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003464 .procname = "gc_thresh",
3465 .data = &ipv4_dst_ops.gc_thresh,
3466 .maxlen = sizeof(int),
3467 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003468 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003469 },
3470 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003471 .procname = "max_size",
3472 .data = &ip_rt_max_size,
3473 .maxlen = sizeof(int),
3474 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003475 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003476 },
3477 {
3478 /* Deprecated. Use gc_min_interval_ms */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09003479
Linus Torvalds1da177e2005-04-16 15:20:36 -07003480 .procname = "gc_min_interval",
3481 .data = &ip_rt_gc_min_interval,
3482 .maxlen = sizeof(int),
3483 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003484 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003485 },
3486 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003487 .procname = "gc_min_interval_ms",
3488 .data = &ip_rt_gc_min_interval,
3489 .maxlen = sizeof(int),
3490 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003491 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003492 },
3493 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003494 .procname = "gc_timeout",
3495 .data = &ip_rt_gc_timeout,
3496 .maxlen = sizeof(int),
3497 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003498 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003499 },
3500 {
Eric Dumazet9f28a2f2011-12-21 15:47:16 -05003501 .procname = "gc_interval",
3502 .data = &ip_rt_gc_interval,
3503 .maxlen = sizeof(int),
3504 .mode = 0644,
3505 .proc_handler = proc_dointvec_jiffies,
3506 },
3507 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003508 .procname = "redirect_load",
3509 .data = &ip_rt_redirect_load,
3510 .maxlen = sizeof(int),
3511 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003512 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003513 },
3514 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003515 .procname = "redirect_number",
3516 .data = &ip_rt_redirect_number,
3517 .maxlen = sizeof(int),
3518 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003519 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003520 },
3521 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003522 .procname = "redirect_silence",
3523 .data = &ip_rt_redirect_silence,
3524 .maxlen = sizeof(int),
3525 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003526 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003527 },
3528 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003529 .procname = "error_cost",
3530 .data = &ip_rt_error_cost,
3531 .maxlen = sizeof(int),
3532 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003533 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003534 },
3535 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003536 .procname = "error_burst",
3537 .data = &ip_rt_error_burst,
3538 .maxlen = sizeof(int),
3539 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003540 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003541 },
3542 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003543 .procname = "gc_elasticity",
3544 .data = &ip_rt_gc_elasticity,
3545 .maxlen = sizeof(int),
3546 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003547 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003548 },
3549 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003550 .procname = "mtu_expires",
3551 .data = &ip_rt_mtu_expires,
3552 .maxlen = sizeof(int),
3553 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003554 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003555 },
3556 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003557 .procname = "min_pmtu",
3558 .data = &ip_rt_min_pmtu,
3559 .maxlen = sizeof(int),
3560 .mode = 0644,
Sabrina Dubrocac7272c22018-02-26 16:13:43 +01003561 .proc_handler = proc_dointvec_minmax,
3562 .extra1 = &ip_min_valid_pmtu,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003563 },
3564 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003565 .procname = "min_adv_mss",
3566 .data = &ip_rt_min_advmss,
3567 .maxlen = sizeof(int),
3568 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003569 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003570 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08003571 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003572};
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003573
Christian Brauner5cdda5f2019-06-24 15:29:23 +02003574static const char ipv4_route_flush_procname[] = "flush";
3575
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003576static struct ctl_table ipv4_route_flush_table[] = {
3577 {
Christian Brauner5cdda5f2019-06-24 15:29:23 +02003578 .procname = ipv4_route_flush_procname,
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003579 .maxlen = sizeof(int),
3580 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003581 .proc_handler = ipv4_sysctl_rtcache_flush,
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003582 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08003583 { },
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003584};
3585
3586static __net_init int sysctl_route_net_init(struct net *net)
3587{
3588 struct ctl_table *tbl;
3589
3590 tbl = ipv4_route_flush_table;
Octavian Purdila09ad9bc2009-11-25 15:14:13 -08003591 if (!net_eq(net, &init_net)) {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003592 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
Ian Morris51456b22015-04-03 09:17:26 +01003593 if (!tbl)
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003594 goto err_dup;
Eric W. Biederman464dc802012-11-16 03:02:59 +00003595
Christian Brauner5cdda5f2019-06-24 15:29:23 +02003596 /* Don't export non-whitelisted sysctls to unprivileged users */
3597 if (net->user_ns != &init_user_ns) {
3598 if (tbl[0].procname != ipv4_route_flush_procname)
3599 tbl[0].procname = NULL;
3600 }
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003601 }
3602 tbl[0].extra1 = net;
3603
Eric W. Biedermanec8f23c2012-04-19 13:44:49 +00003604 net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
Ian Morris51456b22015-04-03 09:17:26 +01003605 if (!net->ipv4.route_hdr)
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003606 goto err_reg;
3607 return 0;
3608
3609err_reg:
3610 if (tbl != ipv4_route_flush_table)
3611 kfree(tbl);
3612err_dup:
3613 return -ENOMEM;
3614}
3615
3616static __net_exit void sysctl_route_net_exit(struct net *net)
3617{
3618 struct ctl_table *tbl;
3619
3620 tbl = net->ipv4.route_hdr->ctl_table_arg;
3621 unregister_net_sysctl_table(net->ipv4.route_hdr);
3622 BUG_ON(tbl == ipv4_route_flush_table);
3623 kfree(tbl);
3624}
3625
3626static __net_initdata struct pernet_operations sysctl_route_ops = {
3627 .init = sysctl_route_net_init,
3628 .exit = sysctl_route_net_exit,
3629};
Linus Torvalds1da177e2005-04-16 15:20:36 -07003630#endif
3631
Neil Horman3ee94372010-05-08 01:57:52 -07003632static __net_init int rt_genid_init(struct net *net)
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003633{
fan.duca4c3fc2013-07-30 08:33:53 +08003634 atomic_set(&net->ipv4.rt_genid, 0);
Timo Teräs5aad1de2013-05-27 20:46:33 +00003635 atomic_set(&net->fnhe_genid, 0);
Jason A. Donenfeld7aed9f72017-06-07 23:01:20 -04003636 atomic_set(&net->ipv4.dev_addr_genid, get_random_int());
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003637 return 0;
3638}
3639
Neil Horman3ee94372010-05-08 01:57:52 -07003640static __net_initdata struct pernet_operations rt_genid_ops = {
3641 .init = rt_genid_init,
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003642};
3643
David S. Millerc3426b42012-06-09 16:27:05 -07003644static int __net_init ipv4_inetpeer_init(struct net *net)
3645{
3646 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3647
3648 if (!bp)
3649 return -ENOMEM;
3650 inet_peer_base_init(bp);
3651 net->ipv4.peers = bp;
3652 return 0;
3653}
3654
3655static void __net_exit ipv4_inetpeer_exit(struct net *net)
3656{
3657 struct inet_peer_base *bp = net->ipv4.peers;
3658
3659 net->ipv4.peers = NULL;
David S. Miller56a6b242012-06-09 16:32:41 -07003660 inetpeer_invalidate_tree(bp);
David S. Millerc3426b42012-06-09 16:27:05 -07003661 kfree(bp);
3662}
3663
3664static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
3665 .init = ipv4_inetpeer_init,
3666 .exit = ipv4_inetpeer_exit,
3667};
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003668
Patrick McHardyc7066f72011-01-14 13:36:42 +01003669#ifdef CONFIG_IP_ROUTE_CLASSID
Tejun Heo7d720c32010-02-16 15:20:26 +00003670struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
Patrick McHardyc7066f72011-01-14 13:36:42 +01003671#endif /* CONFIG_IP_ROUTE_CLASSID */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003672
Linus Torvalds1da177e2005-04-16 15:20:36 -07003673int __init ip_rt_init(void)
3674{
Eric Dumazetaa6dd212021-03-24 14:53:37 -07003675 void *idents_hash;
Eric Dumazet5055c372015-01-14 15:17:06 -08003676 int cpu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003677
Eric Dumazetaa6dd212021-03-24 14:53:37 -07003678 /* For modern hosts, this will use 2 MB of memory */
3679 idents_hash = alloc_large_system_hash("IP idents",
3680 sizeof(*ip_idents) + sizeof(*ip_tstamps),
3681 0,
3682 16, /* one bucket per 64 KB */
3683 HASH_ZERO,
3684 NULL,
3685 &ip_idents_mask,
3686 2048,
3687 256*1024);
Eric Dumazet73f156a2014-06-02 05:26:03 -07003688
Eric Dumazetaa6dd212021-03-24 14:53:37 -07003689 ip_idents = idents_hash;
Eric Dumazet73f156a2014-06-02 05:26:03 -07003690
Eric Dumazetaa6dd212021-03-24 14:53:37 -07003691 prandom_bytes(ip_idents, (ip_idents_mask + 1) * sizeof(*ip_idents));
3692
3693 ip_tstamps = idents_hash + (ip_idents_mask + 1) * sizeof(*ip_idents);
Eric Dumazet355b590c2015-05-01 10:37:49 -07003694
Eric Dumazet5055c372015-01-14 15:17:06 -08003695 for_each_possible_cpu(cpu) {
3696 struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
3697
3698 INIT_LIST_HEAD(&ul->head);
3699 spin_lock_init(&ul->lock);
3700 }
Patrick McHardyc7066f72011-01-14 13:36:42 +01003701#ifdef CONFIG_IP_ROUTE_CLASSID
Ingo Molnar0dcec8c2009-02-25 14:07:33 +01003702 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
Linus Torvalds1da177e2005-04-16 15:20:36 -07003703 if (!ip_rt_acct)
3704 panic("IP: failed to allocate ip_rt_acct\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003705#endif
3706
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07003707 ipv4_dst_ops.kmem_cachep =
3708 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09003709 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003710
David S. Miller14e50e52007-05-24 18:17:54 -07003711 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
3712
Eric Dumazetfc66f952010-10-08 06:37:34 +00003713 if (dst_entries_init(&ipv4_dst_ops) < 0)
3714 panic("IP: failed to allocate ipv4_dst_ops counter\n");
3715
3716 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
3717 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
3718
David S. Miller89aef892012-07-17 11:00:09 -07003719 ipv4_dst_ops.gc_thresh = ~0;
3720 ip_rt_max_size = INT_MAX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003721
Linus Torvalds1da177e2005-04-16 15:20:36 -07003722 devinet_init();
3723 ip_fib_init();
3724
Denis V. Lunev73b38712008-02-28 20:51:18 -08003725 if (ip_rt_proc_init())
Joe Perches058bd4d2012-03-11 18:36:11 +00003726 pr_err("Unable to create route proc files\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003727#ifdef CONFIG_XFRM
3728 xfrm_init();
Steffen Klassert703fb942012-11-13 08:52:24 +01003729 xfrm4_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003730#endif
Florian Westphal394f51a2017-08-15 16:34:44 +02003731 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL,
3732 RTNL_FLAG_DOIT_UNLOCKED);
Thomas Graf63f34442007-03-22 11:55:17 -07003733
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003734#ifdef CONFIG_SYSCTL
3735 register_pernet_subsys(&sysctl_route_ops);
3736#endif
Neil Horman3ee94372010-05-08 01:57:52 -07003737 register_pernet_subsys(&rt_genid_ops);
David S. Millerc3426b42012-06-09 16:27:05 -07003738 register_pernet_subsys(&ipv4_inetpeer_ops);
Tim Hansen1bcdca32017-10-04 15:59:49 -04003739 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003740}
3741
Al Viroa1bc6eb2008-07-30 06:32:52 -04003742#ifdef CONFIG_SYSCTL
Al Viroeeb61f72008-07-27 08:59:33 +01003743/*
3744 * We really need to sanitize the damn ipv4 init order, then all
3745 * this nonsense will go away.
3746 */
3747void __init ip_static_sysctl_init(void)
3748{
Eric W. Biederman4e5ca782012-04-19 13:32:39 +00003749 register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
Al Viroeeb61f72008-07-27 08:59:33 +01003750}
Al Viroa1bc6eb2008-07-30 06:32:52 -04003751#endif