blob: 05a6a8ecb574a053c5c187a820a91968ea68a299 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * ROUTE - implementation of the IP router.
7 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07008 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
12 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 *
14 * Fixes:
15 * Alan Cox : Verify area fixes.
16 * Alan Cox : cli() protects routing changes
17 * Rui Oliveira : ICMP routing table updates
18 * (rco@di.uminho.pt) Routing table insertion and update
19 * Linus Torvalds : Rewrote bits to be sensible
20 * Alan Cox : Added BSD route gw semantics
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090021 * Alan Cox : Super /proc >4K
Linus Torvalds1da177e2005-04-16 15:20:36 -070022 * Alan Cox : MTU in route table
23 * Alan Cox : MSS actually. Also added the window
24 * clamper.
25 * Sam Lantinga : Fixed route matching in rt_del()
26 * Alan Cox : Routing cache support.
27 * Alan Cox : Removed compatibility cruft.
28 * Alan Cox : RTF_REJECT support.
29 * Alan Cox : TCP irtt support.
30 * Jonathan Naylor : Added Metric support.
31 * Miquel van Smoorenburg : BSD API fixes.
32 * Miquel van Smoorenburg : Metrics.
33 * Alan Cox : Use __u32 properly
34 * Alan Cox : Aligned routing errors more closely with BSD
35 * our system is still very different.
36 * Alan Cox : Faster /proc handling
37 * Alexey Kuznetsov : Massive rework to support tree based routing,
38 * routing caches and better behaviour.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090039 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 * Olaf Erb : irtt wasn't being copied right.
41 * Bjorn Ekwall : Kerneld route support.
42 * Alan Cox : Multicast fixed (I hope)
43 * Pavel Krauz : Limited broadcast fixed
44 * Mike McLagan : Routing by source
45 * Alexey Kuznetsov : End of old history. Split to fib.c and
46 * route.c and rewritten from scratch.
47 * Andi Kleen : Load-limit warning messages.
48 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
49 * Vitaly E. Lavrov : Race condition in ip_route_input_slow.
50 * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow.
51 * Vladimir V. Ivanov : IP rule info (flowid) is really useful.
52 * Marc Boucher : routing by fwmark
53 * Robert Olsson : Added rt_cache statistics
54 * Arnaldo C. Melo : Convert proc stuff to seq_file
Eric Dumazetbb1d23b2005-07-05 15:00:32 -070055 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
Ilia Sotnikovcef26852006-03-25 01:38:55 -080056 * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect
57 * Ilia Sotnikov : Removed TOS from hash calculations
Linus Torvalds1da177e2005-04-16 15:20:36 -070058 *
59 * This program is free software; you can redistribute it and/or
60 * modify it under the terms of the GNU General Public License
61 * as published by the Free Software Foundation; either version
62 * 2 of the License, or (at your option) any later version.
63 */
64
Joe Perchesafd465032012-03-12 07:03:32 +000065#define pr_fmt(fmt) "IPv4: " fmt
66
Linus Torvalds1da177e2005-04-16 15:20:36 -070067#include <linux/module.h>
Linus Torvalds7c0f6ba2016-12-24 11:46:01 -080068#include <linux/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070069#include <linux/bitops.h>
70#include <linux/types.h>
71#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070072#include <linux/mm.h>
73#include <linux/string.h>
74#include <linux/socket.h>
75#include <linux/sockios.h>
76#include <linux/errno.h>
77#include <linux/in.h>
78#include <linux/inet.h>
79#include <linux/netdevice.h>
80#include <linux/proc_fs.h>
81#include <linux/init.h>
82#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070083#include <linux/inetdevice.h>
84#include <linux/igmp.h>
85#include <linux/pkt_sched.h>
86#include <linux/mroute.h>
87#include <linux/netfilter_ipv4.h>
88#include <linux/random.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070089#include <linux/rcupdate.h>
90#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090091#include <linux/slab.h>
Eric Dumazet73f156a2014-06-02 05:26:03 -070092#include <linux/jhash.h>
Herbert Xu352e5122007-11-13 21:34:06 -080093#include <net/dst.h>
Thomas Graf1b7179d2015-07-21 10:43:59 +020094#include <net/dst_metadata.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020095#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070096#include <net/protocol.h>
97#include <net/ip.h>
98#include <net/route.h>
99#include <net/inetpeer.h>
100#include <net/sock.h>
101#include <net/ip_fib.h>
David Ahern5481d732019-06-03 20:19:49 -0700102#include <net/nexthop.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700103#include <net/arp.h>
104#include <net/tcp.h>
105#include <net/icmp.h>
106#include <net/xfrm.h>
Roopa Prabhu571e7222015-07-21 10:43:47 +0200107#include <net/lwtunnel.h>
Tom Tucker8d717402006-07-30 20:43:36 -0700108#include <net/netevent.h>
Thomas Graf63f34442007-03-22 11:55:17 -0700109#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700110#ifdef CONFIG_SYSCTL
111#include <linux/sysctl.h>
112#endif
David S. Miller6e5714e2011-08-03 20:50:44 -0700113#include <net/secure_seq.h>
Thomas Graf1b7179d2015-07-21 10:43:59 +0200114#include <net/ip_tunnels.h>
David Ahern385add92015-09-29 20:07:13 -0700115#include <net/l3mdev.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116
Roopa Prabhub6179812017-05-25 10:42:39 -0700117#include "fib_lookup.h"
118
David S. Miller68a5e3d2011-03-11 20:07:33 -0500119#define RT_FL_TOS(oldflp4) \
Julian Anastasovf61759e2011-12-02 11:39:42 +0000120 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700121
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122#define RT_GC_TIMEOUT (300*HZ)
123
Linus Torvalds1da177e2005-04-16 15:20:36 -0700124static int ip_rt_max_size;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700125static int ip_rt_redirect_number __read_mostly = 9;
126static int ip_rt_redirect_load __read_mostly = HZ / 50;
127static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
128static int ip_rt_error_cost __read_mostly = HZ;
129static int ip_rt_error_burst __read_mostly = 5 * HZ;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700130static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
Sabrina Dubrocac7272c22018-02-26 16:13:43 +0100131static u32 ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700132static int ip_rt_min_advmss __read_mostly = 256;
Eric Dumazet9f28a2f2011-12-21 15:47:16 -0500133
Xin Longdeed49d2016-02-18 21:21:19 +0800134static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
Sabrina Dubrocac7272c22018-02-26 16:13:43 +0100135
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136/*
137 * Interface to generic destination cache.
138 */
139
140static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -0800141static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
Steffen Klassertebb762f2011-11-23 02:12:51 +0000142static unsigned int ipv4_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
144static void ipv4_link_failure(struct sk_buff *skb);
David S. Miller6700c272012-07-17 03:29:28 -0700145static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
146 struct sk_buff *skb, u32 mtu);
147static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
148 struct sk_buff *skb);
David S. Millercaacf052012-07-31 15:06:50 -0700149static void ipv4_dst_destroy(struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150
David S. Miller62fa8a82011-01-26 20:51:05 -0800151static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
152{
David S. Miller31248732012-07-10 07:08:18 -0700153 WARN_ON(1);
154 return NULL;
David S. Miller62fa8a82011-01-26 20:51:05 -0800155}
156
David S. Millerf894cbf2012-07-02 21:52:24 -0700157static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
158 struct sk_buff *skb,
159 const void *daddr);
Julian Anastasov63fca652017-02-06 23:14:15 +0200160static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700161
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162static struct dst_ops ipv4_dst_ops = {
163 .family = AF_INET,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164 .check = ipv4_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800165 .default_advmss = ipv4_default_advmss,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000166 .mtu = ipv4_mtu,
David S. Miller62fa8a82011-01-26 20:51:05 -0800167 .cow_metrics = ipv4_cow_metrics,
David S. Millercaacf052012-07-31 15:06:50 -0700168 .destroy = ipv4_dst_destroy,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169 .negative_advice = ipv4_negative_advice,
170 .link_failure = ipv4_link_failure,
171 .update_pmtu = ip_rt_update_pmtu,
David S. Millere47a1852012-07-11 20:55:47 -0700172 .redirect = ip_do_redirect,
Eric W. Biedermanb92dacd2015-10-07 16:48:37 -0500173 .local_out = __ip_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700174 .neigh_lookup = ipv4_neigh_lookup,
Julian Anastasov63fca652017-02-06 23:14:15 +0200175 .confirm_neigh = ipv4_confirm_neigh,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176};
177
178#define ECN_OR_COST(class) TC_PRIO_##class
179
Philippe De Muyter4839c522007-07-09 15:32:57 -0700180const __u8 ip_tos2prio[16] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181 TC_PRIO_BESTEFFORT,
Dan Siemon4a2b9c32011-03-15 13:56:07 +0000182 ECN_OR_COST(BESTEFFORT),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183 TC_PRIO_BESTEFFORT,
184 ECN_OR_COST(BESTEFFORT),
185 TC_PRIO_BULK,
186 ECN_OR_COST(BULK),
187 TC_PRIO_BULK,
188 ECN_OR_COST(BULK),
189 TC_PRIO_INTERACTIVE,
190 ECN_OR_COST(INTERACTIVE),
191 TC_PRIO_INTERACTIVE,
192 ECN_OR_COST(INTERACTIVE),
193 TC_PRIO_INTERACTIVE_BULK,
194 ECN_OR_COST(INTERACTIVE_BULK),
195 TC_PRIO_INTERACTIVE_BULK,
196 ECN_OR_COST(INTERACTIVE_BULK)
197};
Amir Vadaid4a96862012-04-04 21:33:28 +0000198EXPORT_SYMBOL(ip_tos2prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199
Eric Dumazet2f970d82006-01-17 02:54:36 -0800200static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
Christoph Lameter3ed66e92014-04-07 15:39:40 -0700201#define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
205{
Eric Dumazet29e75252008-01-31 17:05:09 -0800206 if (*pos)
David S. Miller89aef892012-07-17 11:00:09 -0700207 return NULL;
Eric Dumazet29e75252008-01-31 17:05:09 -0800208 return SEQ_START_TOKEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209}
210
211static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
212{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213 ++*pos;
David S. Miller89aef892012-07-17 11:00:09 -0700214 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215}
216
217static void rt_cache_seq_stop(struct seq_file *seq, void *v)
218{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700219}
220
221static int rt_cache_seq_show(struct seq_file *seq, void *v)
222{
223 if (v == SEQ_START_TOKEN)
224 seq_printf(seq, "%-127s\n",
225 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
226 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
227 "HHUptod\tSpecDst");
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900228 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229}
230
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700231static const struct seq_operations rt_cache_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232 .start = rt_cache_seq_start,
233 .next = rt_cache_seq_next,
234 .stop = rt_cache_seq_stop,
235 .show = rt_cache_seq_show,
236};
237
238static int rt_cache_seq_open(struct inode *inode, struct file *file)
239{
David S. Miller89aef892012-07-17 11:00:09 -0700240 return seq_open(file, &rt_cache_seq_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241}
242
Arjan van de Ven9a321442007-02-12 00:55:35 -0800243static const struct file_operations rt_cache_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244 .open = rt_cache_seq_open,
245 .read = seq_read,
246 .llseek = seq_lseek,
David S. Miller89aef892012-07-17 11:00:09 -0700247 .release = seq_release,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248};
249
250
251static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
252{
253 int cpu;
254
255 if (*pos == 0)
256 return SEQ_START_TOKEN;
257
Rusty Russell0f23174a2008-12-29 12:23:42 +0000258 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 if (!cpu_possible(cpu))
260 continue;
261 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800262 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263 }
264 return NULL;
265}
266
267static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
268{
269 int cpu;
270
Rusty Russell0f23174a2008-12-29 12:23:42 +0000271 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272 if (!cpu_possible(cpu))
273 continue;
274 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800275 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700276 }
277 return NULL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900278
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279}
280
281static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
282{
283
284}
285
286static int rt_cpu_seq_show(struct seq_file *seq, void *v)
287{
288 struct rt_cache_stat *st = v;
289
290 if (v == SEQ_START_TOKEN) {
Olaf Rempel5bec0032005-04-28 12:16:08 -0700291 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292 return 0;
293 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900294
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
296 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
Eric Dumazetfc66f952010-10-08 06:37:34 +0000297 dst_entries_get_slow(&ipv4_dst_ops),
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700298 0, /* st->in_hit */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299 st->in_slow_tot,
300 st->in_slow_mc,
301 st->in_no_route,
302 st->in_brd,
303 st->in_martian_dst,
304 st->in_martian_src,
305
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700306 0, /* st->out_hit */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307 st->out_slow_tot,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900308 st->out_slow_mc,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700310 0, /* st->gc_total */
311 0, /* st->gc_ignored */
312 0, /* st->gc_goal_miss */
313 0, /* st->gc_dst_overflow */
314 0, /* st->in_hlist_search */
315 0 /* st->out_hlist_search */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316 );
317 return 0;
318}
319
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700320static const struct seq_operations rt_cpu_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321 .start = rt_cpu_seq_start,
322 .next = rt_cpu_seq_next,
323 .stop = rt_cpu_seq_stop,
324 .show = rt_cpu_seq_show,
325};
326
327
328static int rt_cpu_seq_open(struct inode *inode, struct file *file)
329{
330 return seq_open(file, &rt_cpu_seq_ops);
331}
332
Arjan van de Ven9a321442007-02-12 00:55:35 -0800333static const struct file_operations rt_cpu_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334 .open = rt_cpu_seq_open,
335 .read = seq_read,
336 .llseek = seq_lseek,
337 .release = seq_release,
338};
339
Patrick McHardyc7066f72011-01-14 13:36:42 +0100340#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800341static int rt_acct_proc_show(struct seq_file *m, void *v)
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800342{
Alexey Dobriyana661c412009-11-25 15:40:35 -0800343 struct ip_rt_acct *dst, *src;
344 unsigned int i, j;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800345
Alexey Dobriyana661c412009-11-25 15:40:35 -0800346 dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
347 if (!dst)
348 return -ENOMEM;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800349
Alexey Dobriyana661c412009-11-25 15:40:35 -0800350 for_each_possible_cpu(i) {
351 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
352 for (j = 0; j < 256; j++) {
353 dst[j].o_bytes += src[j].o_bytes;
354 dst[j].o_packets += src[j].o_packets;
355 dst[j].i_bytes += src[j].i_bytes;
356 dst[j].i_packets += src[j].i_packets;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800357 }
358 }
Alexey Dobriyana661c412009-11-25 15:40:35 -0800359
360 seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
361 kfree(dst);
362 return 0;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800363}
364#endif
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800365
Denis V. Lunev73b38712008-02-28 20:51:18 -0800366static int __net_init ip_rt_do_proc_init(struct net *net)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800367{
368 struct proc_dir_entry *pde;
369
Joe Perchesd6444062018-03-23 15:54:38 -0700370 pde = proc_create("rt_cache", 0444, net->proc_net,
Gao fengd4beaa62013-02-18 01:34:54 +0000371 &rt_cache_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800372 if (!pde)
373 goto err1;
374
Joe Perchesd6444062018-03-23 15:54:38 -0700375 pde = proc_create("rt_cache", 0444,
Wang Chen77020722008-02-28 14:14:25 -0800376 net->proc_net_stat, &rt_cpu_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800377 if (!pde)
378 goto err2;
379
Patrick McHardyc7066f72011-01-14 13:36:42 +0100380#ifdef CONFIG_IP_ROUTE_CLASSID
Christoph Hellwig3f3942a2018-05-15 15:57:23 +0200381 pde = proc_create_single("rt_acct", 0, net->proc_net,
382 rt_acct_proc_show);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800383 if (!pde)
384 goto err3;
385#endif
386 return 0;
387
Patrick McHardyc7066f72011-01-14 13:36:42 +0100388#ifdef CONFIG_IP_ROUTE_CLASSID
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800389err3:
390 remove_proc_entry("rt_cache", net->proc_net_stat);
391#endif
392err2:
393 remove_proc_entry("rt_cache", net->proc_net);
394err1:
395 return -ENOMEM;
396}
Denis V. Lunev73b38712008-02-28 20:51:18 -0800397
398static void __net_exit ip_rt_do_proc_exit(struct net *net)
399{
400 remove_proc_entry("rt_cache", net->proc_net_stat);
401 remove_proc_entry("rt_cache", net->proc_net);
Patrick McHardyc7066f72011-01-14 13:36:42 +0100402#ifdef CONFIG_IP_ROUTE_CLASSID
Denis V. Lunev73b38712008-02-28 20:51:18 -0800403 remove_proc_entry("rt_acct", net->proc_net);
Alexey Dobriyan0a931ac2010-01-17 03:32:50 +0000404#endif
Denis V. Lunev73b38712008-02-28 20:51:18 -0800405}
406
407static struct pernet_operations ip_rt_proc_ops __net_initdata = {
408 .init = ip_rt_do_proc_init,
409 .exit = ip_rt_do_proc_exit,
410};
411
412static int __init ip_rt_proc_init(void)
413{
414 return register_pernet_subsys(&ip_rt_proc_ops);
415}
416
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800417#else
Denis V. Lunev73b38712008-02-28 20:51:18 -0800418static inline int ip_rt_proc_init(void)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800419{
420 return 0;
421}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422#endif /* CONFIG_PROC_FS */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900423
Eric Dumazet4331deb2012-07-25 05:11:23 +0000424static inline bool rt_is_expired(const struct rtable *rth)
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700425{
fan.duca4c3fc2013-07-30 08:33:53 +0800426 return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev));
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700427}
428
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +0000429void rt_cache_flush(struct net *net)
Eric Dumazet29e75252008-01-31 17:05:09 -0800430{
fan.duca4c3fc2013-07-30 08:33:53 +0800431 rt_genid_bump_ipv4(net);
Eric Dumazet98376382010-03-08 03:20:00 +0000432}
433
David S. Millerf894cbf2012-07-02 21:52:24 -0700434static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
435 struct sk_buff *skb,
436 const void *daddr)
David Miller3769cff2011-07-11 22:44:24 +0000437{
David Ahern1550c172019-04-05 16:30:27 -0700438 const struct rtable *rt = container_of(dst, struct rtable, dst);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700439 struct net_device *dev = dst->dev;
David Miller3769cff2011-07-11 22:44:24 +0000440 struct neighbour *n;
441
David Ahern5c9f7c12019-04-05 16:30:34 -0700442 rcu_read_lock_bh();
David S. Millerd3aaeb32011-07-18 00:40:17 -0700443
David Ahern5c9f7c12019-04-05 16:30:34 -0700444 if (likely(rt->rt_gw_family == AF_INET)) {
445 n = ip_neigh_gw4(dev, rt->rt_gw4);
446 } else if (rt->rt_gw_family == AF_INET6) {
447 n = ip_neigh_gw6(dev, &rt->rt_gw6);
448 } else {
449 __be32 pkey;
450
451 pkey = skb ? ip_hdr(skb)->daddr : *((__be32 *) daddr);
452 n = ip_neigh_gw4(dev, pkey);
453 }
454
455 if (n && !refcount_inc_not_zero(&n->refcnt))
456 n = NULL;
457
458 rcu_read_unlock_bh();
459
460 return n;
David S. Millerd3aaeb32011-07-18 00:40:17 -0700461}
462
Julian Anastasov63fca652017-02-06 23:14:15 +0200463static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)
464{
David Ahern1550c172019-04-05 16:30:27 -0700465 const struct rtable *rt = container_of(dst, struct rtable, dst);
Julian Anastasov63fca652017-02-06 23:14:15 +0200466 struct net_device *dev = dst->dev;
467 const __be32 *pkey = daddr;
Julian Anastasov63fca652017-02-06 23:14:15 +0200468
David Ahern6de9c052019-04-05 16:30:36 -0700469 if (rt->rt_gw_family == AF_INET) {
David Ahern1550c172019-04-05 16:30:27 -0700470 pkey = (const __be32 *)&rt->rt_gw4;
David Ahern6de9c052019-04-05 16:30:36 -0700471 } else if (rt->rt_gw_family == AF_INET6) {
472 return __ipv6_confirm_neigh_stub(dev, &rt->rt_gw6);
473 } else if (!daddr ||
Julian Anastasov63fca652017-02-06 23:14:15 +0200474 (rt->rt_flags &
David Ahern6de9c052019-04-05 16:30:36 -0700475 (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) {
Julian Anastasov63fca652017-02-06 23:14:15 +0200476 return;
David Ahern6de9c052019-04-05 16:30:36 -0700477 }
Julian Anastasov63fca652017-02-06 23:14:15 +0200478 __ipv4_confirm_neigh(dev, *(__force u32 *)pkey);
479}
480
Eric Dumazet04ca6972014-07-26 08:58:10 +0200481#define IP_IDENTS_SZ 2048u
Eric Dumazet04ca6972014-07-26 08:58:10 +0200482
Eric Dumazet355b590c2015-05-01 10:37:49 -0700483static atomic_t *ip_idents __read_mostly;
484static u32 *ip_tstamps __read_mostly;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200485
486/* In order to protect privacy, we add a perturbation to identifiers
487 * if one generator is seldom used. This makes hard for an attacker
488 * to infer how many packets were sent between two points in time.
489 */
490u32 ip_idents_reserve(u32 hash, int segs)
491{
Eric Dumazet355b590c2015-05-01 10:37:49 -0700492 u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
493 atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
Mark Rutland6aa7de02017-10-23 14:07:29 -0700494 u32 old = READ_ONCE(*p_tstamp);
Eric Dumazet04ca6972014-07-26 08:58:10 +0200495 u32 now = (u32)jiffies;
Eric Dumazetadb03112016-09-20 18:06:17 -0700496 u32 new, delta = 0;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200497
Eric Dumazet355b590c2015-05-01 10:37:49 -0700498 if (old != now && cmpxchg(p_tstamp, old, now) == old)
Eric Dumazet04ca6972014-07-26 08:58:10 +0200499 delta = prandom_u32_max(now - old);
500
Eric Dumazetadb03112016-09-20 18:06:17 -0700501 /* Do not use atomic_add_return() as it makes UBSAN unhappy */
502 do {
503 old = (u32)atomic_read(p_id);
504 new = old + delta + segs;
505 } while (atomic_cmpxchg(p_id, old, new) != old);
506
507 return new - segs;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200508}
509EXPORT_SYMBOL(ip_idents_reserve);
Eric Dumazet73f156a2014-06-02 05:26:03 -0700510
Hannes Frederic Sowab6a77192015-03-25 17:07:44 +0100511void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512{
Eric Dumazet73f156a2014-06-02 05:26:03 -0700513 u32 hash, id;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700514
Eric Dumazetdf453702019-03-27 12:40:33 -0700515 /* Note the following code is not safe, but this is okay. */
516 if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key)))
517 get_random_bytes(&net->ipv4.ip_id_key,
518 sizeof(net->ipv4.ip_id_key));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519
Eric Dumazetdf453702019-03-27 12:40:33 -0700520 hash = siphash_3u32((__force u32)iph->daddr,
Eric Dumazet04ca6972014-07-26 08:58:10 +0200521 (__force u32)iph->saddr,
Eric Dumazetdf453702019-03-27 12:40:33 -0700522 iph->protocol,
523 &net->ipv4.ip_id_key);
Eric Dumazet73f156a2014-06-02 05:26:03 -0700524 id = ip_idents_reserve(hash, segs);
525 iph->id = htons(id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700526}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000527EXPORT_SYMBOL(__ip_select_ident);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900529static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
530 const struct sock *sk,
David S. Miller4895c772012-07-17 04:19:00 -0700531 const struct iphdr *iph,
532 int oif, u8 tos,
533 u8 prot, u32 mark, int flow_flags)
534{
535 if (sk) {
536 const struct inet_sock *inet = inet_sk(sk);
537
538 oif = sk->sk_bound_dev_if;
539 mark = sk->sk_mark;
540 tos = RT_CONN_FLAGS(sk);
541 prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
542 }
543 flowi4_init_output(fl4, oif, mark, tos,
544 RT_SCOPE_UNIVERSE, prot,
545 flow_flags,
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900546 iph->daddr, iph->saddr, 0, 0,
547 sock_net_uid(net, sk));
David S. Miller4895c772012-07-17 04:19:00 -0700548}
549
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200550static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
551 const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700552{
Lorenzo Colittid109e612016-11-30 02:56:47 +0900553 const struct net *net = dev_net(skb->dev);
David S. Miller4895c772012-07-17 04:19:00 -0700554 const struct iphdr *iph = ip_hdr(skb);
555 int oif = skb->dev->ifindex;
556 u8 tos = RT_TOS(iph->tos);
557 u8 prot = iph->protocol;
558 u32 mark = skb->mark;
559
Lorenzo Colittid109e612016-11-30 02:56:47 +0900560 __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0);
David S. Miller4895c772012-07-17 04:19:00 -0700561}
562
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200563static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700564{
565 const struct inet_sock *inet = inet_sk(sk);
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200566 const struct ip_options_rcu *inet_opt;
David S. Miller4895c772012-07-17 04:19:00 -0700567 __be32 daddr = inet->inet_daddr;
568
569 rcu_read_lock();
570 inet_opt = rcu_dereference(inet->inet_opt);
571 if (inet_opt && inet_opt->opt.srr)
572 daddr = inet_opt->opt.faddr;
573 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
574 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
575 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
576 inet_sk_flowi_flags(sk),
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900577 daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
David S. Miller4895c772012-07-17 04:19:00 -0700578 rcu_read_unlock();
579}
580
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200581static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
582 const struct sk_buff *skb)
David S. Miller4895c772012-07-17 04:19:00 -0700583{
584 if (skb)
585 build_skb_flow_key(fl4, skb, sk);
586 else
587 build_sk_flow_key(fl4, sk);
588}
589
David S. Millerc5038a82012-07-31 15:02:02 -0700590static DEFINE_SPINLOCK(fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700591
Timo Teräs2ffae992013-06-27 10:27:05 +0300592static void fnhe_flush_routes(struct fib_nh_exception *fnhe)
593{
594 struct rtable *rt;
595
596 rt = rcu_dereference(fnhe->fnhe_rth_input);
597 if (rt) {
598 RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL);
Wei Wang95c47f92017-06-17 10:42:30 -0700599 dst_dev_put(&rt->dst);
Wei Wang08301062017-06-17 10:42:29 -0700600 dst_release(&rt->dst);
Timo Teräs2ffae992013-06-27 10:27:05 +0300601 }
602 rt = rcu_dereference(fnhe->fnhe_rth_output);
603 if (rt) {
604 RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL);
Wei Wang95c47f92017-06-17 10:42:30 -0700605 dst_dev_put(&rt->dst);
Wei Wang08301062017-06-17 10:42:29 -0700606 dst_release(&rt->dst);
Timo Teräs2ffae992013-06-27 10:27:05 +0300607 }
608}
609
Julian Anastasovaee06da2012-07-18 10:15:35 +0000610static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
David S. Miller4895c772012-07-17 04:19:00 -0700611{
612 struct fib_nh_exception *fnhe, *oldest;
613
614 oldest = rcu_dereference(hash->chain);
615 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
616 fnhe = rcu_dereference(fnhe->fnhe_next)) {
617 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
618 oldest = fnhe;
619 }
Timo Teräs2ffae992013-06-27 10:27:05 +0300620 fnhe_flush_routes(oldest);
David S. Miller4895c772012-07-17 04:19:00 -0700621 return oldest;
622}
623
David S. Millerd3a25c92012-07-17 13:23:08 -0700624static inline u32 fnhe_hashfun(__be32 daddr)
625{
Eric Dumazetd546c622014-09-04 08:21:31 -0700626 static u32 fnhe_hashrnd __read_mostly;
David S. Millerd3a25c92012-07-17 13:23:08 -0700627 u32 hval;
628
Eric Dumazetd546c622014-09-04 08:21:31 -0700629 net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd));
630 hval = jhash_1word((__force u32) daddr, fnhe_hashrnd);
631 return hash_32(hval, FNHE_HASH_SHIFT);
David S. Millerd3a25c92012-07-17 13:23:08 -0700632}
633
Timo Teräs387aa652013-05-27 20:46:31 +0000634static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
635{
636 rt->rt_pmtu = fnhe->fnhe_pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100637 rt->rt_mtu_locked = fnhe->fnhe_mtu_locked;
Timo Teräs387aa652013-05-27 20:46:31 +0000638 rt->dst.expires = fnhe->fnhe_expires;
639
640 if (fnhe->fnhe_gw) {
641 rt->rt_flags |= RTCF_REDIRECTED;
David Ahern1550c172019-04-05 16:30:27 -0700642 rt->rt_gw_family = AF_INET;
643 rt->rt_gw4 = fnhe->fnhe_gw;
Timo Teräs387aa652013-05-27 20:46:31 +0000644 }
645}
646
David Aherna5995e72019-04-30 07:45:50 -0700647static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr,
648 __be32 gw, u32 pmtu, bool lock,
649 unsigned long expires)
David S. Miller4895c772012-07-17 04:19:00 -0700650{
Julian Anastasovaee06da2012-07-18 10:15:35 +0000651 struct fnhe_hash_bucket *hash;
David S. Miller4895c772012-07-17 04:19:00 -0700652 struct fib_nh_exception *fnhe;
Timo Teräs387aa652013-05-27 20:46:31 +0000653 struct rtable *rt;
Xin Longcebe84c2017-11-17 14:27:18 +0800654 u32 genid, hval;
Timo Teräs387aa652013-05-27 20:46:31 +0000655 unsigned int i;
David S. Miller4895c772012-07-17 04:19:00 -0700656 int depth;
Xin Longcebe84c2017-11-17 14:27:18 +0800657
David Aherna5995e72019-04-30 07:45:50 -0700658 genid = fnhe_genid(dev_net(nhc->nhc_dev));
Xin Longcebe84c2017-11-17 14:27:18 +0800659 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -0700660
David S. Millerc5038a82012-07-31 15:02:02 -0700661 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000662
David Aherna5995e72019-04-30 07:45:50 -0700663 hash = rcu_dereference(nhc->nhc_exceptions);
David S. Miller4895c772012-07-17 04:19:00 -0700664 if (!hash) {
Kees Cook6396bb22018-06-12 14:03:40 -0700665 hash = kcalloc(FNHE_HASH_SIZE, sizeof(*hash), GFP_ATOMIC);
David S. Miller4895c772012-07-17 04:19:00 -0700666 if (!hash)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000667 goto out_unlock;
David Aherna5995e72019-04-30 07:45:50 -0700668 rcu_assign_pointer(nhc->nhc_exceptions, hash);
David S. Miller4895c772012-07-17 04:19:00 -0700669 }
670
David S. Miller4895c772012-07-17 04:19:00 -0700671 hash += hval;
672
673 depth = 0;
674 for (fnhe = rcu_dereference(hash->chain); fnhe;
675 fnhe = rcu_dereference(fnhe->fnhe_next)) {
676 if (fnhe->fnhe_daddr == daddr)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000677 break;
David S. Miller4895c772012-07-17 04:19:00 -0700678 depth++;
679 }
680
Julian Anastasovaee06da2012-07-18 10:15:35 +0000681 if (fnhe) {
Xin Longcebe84c2017-11-17 14:27:18 +0800682 if (fnhe->fnhe_genid != genid)
683 fnhe->fnhe_genid = genid;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000684 if (gw)
685 fnhe->fnhe_gw = gw;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100686 if (pmtu) {
Julian Anastasovaee06da2012-07-18 10:15:35 +0000687 fnhe->fnhe_pmtu = pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100688 fnhe->fnhe_mtu_locked = lock;
689 }
Xin Longe39d5242017-11-17 14:27:06 +0800690 fnhe->fnhe_expires = max(1UL, expires);
Timo Teräs387aa652013-05-27 20:46:31 +0000691 /* Update all cached dsts too */
Timo Teräs2ffae992013-06-27 10:27:05 +0300692 rt = rcu_dereference(fnhe->fnhe_rth_input);
693 if (rt)
694 fill_route_from_fnhe(rt, fnhe);
695 rt = rcu_dereference(fnhe->fnhe_rth_output);
Timo Teräs387aa652013-05-27 20:46:31 +0000696 if (rt)
697 fill_route_from_fnhe(rt, fnhe);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000698 } else {
699 if (depth > FNHE_RECLAIM_DEPTH)
700 fnhe = fnhe_oldest(hash);
701 else {
702 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
703 if (!fnhe)
704 goto out_unlock;
705
706 fnhe->fnhe_next = hash->chain;
707 rcu_assign_pointer(hash->chain, fnhe);
708 }
Xin Longcebe84c2017-11-17 14:27:18 +0800709 fnhe->fnhe_genid = genid;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000710 fnhe->fnhe_daddr = daddr;
711 fnhe->fnhe_gw = gw;
712 fnhe->fnhe_pmtu = pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100713 fnhe->fnhe_mtu_locked = lock;
Julian Anastasov94720e32018-05-02 09:41:19 +0300714 fnhe->fnhe_expires = max(1UL, expires);
Timo Teräs387aa652013-05-27 20:46:31 +0000715
716 /* Exception created; mark the cached routes for the nexthop
717 * stale, so anyone caching it rechecks if this exception
718 * applies to them.
719 */
David Ahern0f457a32019-04-30 07:45:48 -0700720 rt = rcu_dereference(nhc->nhc_rth_input);
Timo Teräs2ffae992013-06-27 10:27:05 +0300721 if (rt)
722 rt->dst.obsolete = DST_OBSOLETE_KILL;
723
Timo Teräs387aa652013-05-27 20:46:31 +0000724 for_each_possible_cpu(i) {
725 struct rtable __rcu **prt;
David Ahern0f457a32019-04-30 07:45:48 -0700726 prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i);
Timo Teräs387aa652013-05-27 20:46:31 +0000727 rt = rcu_dereference(*prt);
728 if (rt)
729 rt->dst.obsolete = DST_OBSOLETE_KILL;
730 }
David S. Miller4895c772012-07-17 04:19:00 -0700731 }
David S. Miller4895c772012-07-17 04:19:00 -0700732
David S. Miller4895c772012-07-17 04:19:00 -0700733 fnhe->fnhe_stamp = jiffies;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000734
735out_unlock:
David S. Millerc5038a82012-07-31 15:02:02 -0700736 spin_unlock_bh(&fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700737}
738
David S. Millerceb33202012-07-17 11:31:28 -0700739static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
740 bool kill_route)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700741{
David S. Millere47a1852012-07-11 20:55:47 -0700742 __be32 new_gw = icmp_hdr(skb)->un.gateway;
David S. Miller94206122012-07-11 20:38:08 -0700743 __be32 old_gw = ip_hdr(skb)->saddr;
David S. Millere47a1852012-07-11 20:55:47 -0700744 struct net_device *dev = skb->dev;
David S. Millere47a1852012-07-11 20:55:47 -0700745 struct in_device *in_dev;
David S. Miller4895c772012-07-17 04:19:00 -0700746 struct fib_result res;
David S. Millere47a1852012-07-11 20:55:47 -0700747 struct neighbour *n;
Denis V. Lunev317805b2008-02-28 20:50:06 -0800748 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700749
David S. Miller94206122012-07-11 20:38:08 -0700750 switch (icmp_hdr(skb)->code & 7) {
751 case ICMP_REDIR_NET:
752 case ICMP_REDIR_NETTOS:
753 case ICMP_REDIR_HOST:
754 case ICMP_REDIR_HOSTTOS:
755 break;
756
757 default:
758 return;
759 }
760
David Ahern1550c172019-04-05 16:30:27 -0700761 if (rt->rt_gw_family != AF_INET || rt->rt_gw4 != old_gw)
David S. Millere47a1852012-07-11 20:55:47 -0700762 return;
763
764 in_dev = __in_dev_get_rcu(dev);
765 if (!in_dev)
766 return;
767
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900768 net = dev_net(dev);
Joe Perches9d4fb272009-11-23 10:41:23 -0800769 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
770 ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
771 ipv4_is_zeronet(new_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772 goto reject_redirect;
773
774 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
775 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
776 goto reject_redirect;
777 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
778 goto reject_redirect;
779 } else {
Denis V. Lunev317805b2008-02-28 20:50:06 -0800780 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781 goto reject_redirect;
782 }
783
Stephen Suryaputra Lin969447f2016-11-10 11:16:15 -0500784 n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
785 if (!n)
786 n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
WANG Cong2c1a4312014-09-24 17:07:53 -0700787 if (!IS_ERR(n)) {
David S. Millere47a1852012-07-11 20:55:47 -0700788 if (!(n->nud_state & NUD_VALID)) {
789 neigh_event_send(n, NULL);
790 } else {
Andy Gospodarek0eeb0752015-06-23 13:45:37 -0400791 if (fib_lookup(net, fl4, &res, 0) == 0) {
David Aherneba618a2019-04-02 14:11:55 -0700792 struct fib_nh_common *nhc = FIB_RES_NHC(res);
David S. Miller4895c772012-07-17 04:19:00 -0700793
David Aherna5995e72019-04-30 07:45:50 -0700794 update_or_create_fnhe(nhc, fl4->daddr, new_gw,
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100795 0, false,
796 jiffies + ip_rt_gc_timeout);
David S. Miller4895c772012-07-17 04:19:00 -0700797 }
David S. Millerceb33202012-07-17 11:31:28 -0700798 if (kill_route)
799 rt->dst.obsolete = DST_OBSOLETE_KILL;
David S. Millere47a1852012-07-11 20:55:47 -0700800 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
801 }
802 neigh_release(n);
803 }
804 return;
805
806reject_redirect:
807#ifdef CONFIG_IP_ROUTE_VERBOSE
David S. Miller99ee0382012-07-12 07:40:05 -0700808 if (IN_DEV_LOG_MARTIANS(in_dev)) {
809 const struct iphdr *iph = (const struct iphdr *) skb->data;
810 __be32 daddr = iph->daddr;
811 __be32 saddr = iph->saddr;
812
David S. Millere47a1852012-07-11 20:55:47 -0700813 net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
814 " Advised path = %pI4 -> %pI4\n",
815 &old_gw, dev->name, &new_gw,
816 &saddr, &daddr);
David S. Miller99ee0382012-07-12 07:40:05 -0700817 }
David S. Millere47a1852012-07-11 20:55:47 -0700818#endif
819 ;
820}
821
David S. Miller4895c772012-07-17 04:19:00 -0700822static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
823{
824 struct rtable *rt;
825 struct flowi4 fl4;
Michal Kubecekf96ef982013-05-28 08:26:49 +0200826 const struct iphdr *iph = (const struct iphdr *) skb->data;
Lorenzo Colitti7d995692016-12-23 00:33:57 +0900827 struct net *net = dev_net(skb->dev);
Michal Kubecekf96ef982013-05-28 08:26:49 +0200828 int oif = skb->dev->ifindex;
829 u8 tos = RT_TOS(iph->tos);
830 u8 prot = iph->protocol;
831 u32 mark = skb->mark;
David S. Miller4895c772012-07-17 04:19:00 -0700832
833 rt = (struct rtable *) dst;
834
Lorenzo Colitti7d995692016-12-23 00:33:57 +0900835 __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
David S. Millerceb33202012-07-17 11:31:28 -0700836 __ip_do_redirect(rt, skb, &fl4, true);
David S. Miller4895c772012-07-17 04:19:00 -0700837}
838
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
840{
Eric Dumazetee6b9672008-03-05 18:30:47 -0800841 struct rtable *rt = (struct rtable *)dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700842 struct dst_entry *ret = dst;
843
844 if (rt) {
Timo Teräsd11a4dc2010-03-18 23:20:20 +0000845 if (dst->obsolete > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846 ip_rt_put(rt);
847 ret = NULL;
David S. Miller59436342012-07-10 06:58:42 -0700848 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
849 rt->dst.expires) {
David S. Miller89aef892012-07-17 11:00:09 -0700850 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700851 ret = NULL;
852 }
853 }
854 return ret;
855}
856
857/*
858 * Algorithm:
859 * 1. The first ip_rt_redirect_number redirects are sent
860 * with exponential backoff, then we stop sending them at all,
861 * assuming that the host ignores our redirects.
862 * 2. If we did not see packets requiring redirects
863 * during ip_rt_redirect_silence, we assume that the host
864 * forgot redirected route and start to send redirects again.
865 *
866 * This algorithm is much cheaper and more intelligent than dumb load limiting
867 * in icmp.c.
868 *
869 * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
870 * and "frag. need" (breaks PMTU discovery) in icmp.c.
871 */
872
873void ip_rt_send_redirect(struct sk_buff *skb)
874{
Eric Dumazet511c3f92009-06-02 05:14:27 +0000875 struct rtable *rt = skb_rtable(skb);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700876 struct in_device *in_dev;
David S. Miller92d86822011-02-04 15:55:25 -0800877 struct inet_peer *peer;
David S. Miller1d861aa2012-07-10 03:58:16 -0700878 struct net *net;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700879 int log_martians;
David Ahern192132b2015-08-27 16:07:03 -0700880 int vif;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700881
Eric Dumazet30038fc2009-08-28 23:52:01 -0700882 rcu_read_lock();
Changli Gaod8d1f302010-06-10 23:31:35 -0700883 in_dev = __in_dev_get_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700884 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
885 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886 return;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700887 }
888 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
David Ahern385add92015-09-29 20:07:13 -0700889 vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700890 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700891
David S. Miller1d861aa2012-07-10 03:58:16 -0700892 net = dev_net(rt->dst.dev);
David Ahern192132b2015-08-27 16:07:03 -0700893 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800894 if (!peer) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000895 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
896 rt_nexthop(rt, ip_hdr(skb)->daddr));
David S. Miller92d86822011-02-04 15:55:25 -0800897 return;
898 }
899
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900 /* No redirected packets during ip_rt_redirect_silence;
901 * reset the algorithm.
902 */
Lorenzo Bianconic09551c62019-02-06 19:18:04 +0100903 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) {
David S. Miller92d86822011-02-04 15:55:25 -0800904 peer->rate_tokens = 0;
Lorenzo Bianconic09551c62019-02-06 19:18:04 +0100905 peer->n_redirects = 0;
906 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907
908 /* Too many ignored redirects; do not send anything
Changli Gaod8d1f302010-06-10 23:31:35 -0700909 * set dst.rate_last to the last seen redirected packet.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910 */
Lorenzo Bianconic09551c62019-02-06 19:18:04 +0100911 if (peer->n_redirects >= ip_rt_redirect_number) {
David S. Miller92d86822011-02-04 15:55:25 -0800912 peer->rate_last = jiffies;
David S. Miller1d861aa2012-07-10 03:58:16 -0700913 goto out_put_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700914 }
915
916 /* Check for load limit; set rate_last to the latest sent
917 * redirect.
918 */
David S. Miller92d86822011-02-04 15:55:25 -0800919 if (peer->rate_tokens == 0 ||
Li Yewang14fb8a72006-12-18 00:26:35 -0800920 time_after(jiffies,
David S. Miller92d86822011-02-04 15:55:25 -0800921 (peer->rate_last +
922 (ip_rt_redirect_load << peer->rate_tokens)))) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000923 __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
924
925 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
David S. Miller92d86822011-02-04 15:55:25 -0800926 peer->rate_last = jiffies;
927 ++peer->rate_tokens;
Lorenzo Bianconic09551c62019-02-06 19:18:04 +0100928 ++peer->n_redirects;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929#ifdef CONFIG_IP_ROUTE_VERBOSE
Eric Dumazet30038fc2009-08-28 23:52:01 -0700930 if (log_martians &&
Joe Perchese87cc472012-05-13 21:56:26 +0000931 peer->rate_tokens == ip_rt_redirect_number)
932 net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
David S. Miller92101b32012-07-23 16:29:00 -0700933 &ip_hdr(skb)->saddr, inet_iif(skb),
Julian Anastasove81da0e2012-10-08 11:41:15 +0000934 &ip_hdr(skb)->daddr, &gw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935#endif
936 }
David S. Miller1d861aa2012-07-10 03:58:16 -0700937out_put_peer:
938 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939}
940
941static int ip_error(struct sk_buff *skb)
942{
Eric Dumazet511c3f92009-06-02 05:14:27 +0000943 struct rtable *rt = skb_rtable(skb);
Stephen Suryaputrae2c0dc12018-02-28 12:20:44 -0500944 struct net_device *dev = skb->dev;
945 struct in_device *in_dev;
David S. Miller92d86822011-02-04 15:55:25 -0800946 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700947 unsigned long now;
David S. Miller251da412012-06-26 16:27:09 -0700948 struct net *net;
David S. Miller92d86822011-02-04 15:55:25 -0800949 bool send;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950 int code;
951
Stephen Suryaputrae2c0dc12018-02-28 12:20:44 -0500952 if (netif_is_l3_master(skb->dev)) {
953 dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif);
954 if (!dev)
955 goto out;
956 }
957
958 in_dev = __in_dev_get_rcu(dev);
959
Eric W. Biederman381c7592015-05-22 04:58:12 -0500960 /* IP on this device is disabled. */
961 if (!in_dev)
962 goto out;
963
David S. Miller251da412012-06-26 16:27:09 -0700964 net = dev_net(rt->dst.dev);
965 if (!IN_DEV_FORWARD(in_dev)) {
966 switch (rt->dst.error) {
967 case EHOSTUNREACH:
Eric Dumazetb45386e2016-04-27 16:44:35 -0700968 __IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS);
David S. Miller251da412012-06-26 16:27:09 -0700969 break;
970
971 case ENETUNREACH:
Eric Dumazetb45386e2016-04-27 16:44:35 -0700972 __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
David S. Miller251da412012-06-26 16:27:09 -0700973 break;
974 }
975 goto out;
976 }
977
Changli Gaod8d1f302010-06-10 23:31:35 -0700978 switch (rt->dst.error) {
Joe Perches4500ebf2011-07-01 09:43:07 +0000979 case EINVAL:
980 default:
981 goto out;
982 case EHOSTUNREACH:
983 code = ICMP_HOST_UNREACH;
984 break;
985 case ENETUNREACH:
986 code = ICMP_NET_UNREACH;
Eric Dumazetb45386e2016-04-27 16:44:35 -0700987 __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
Joe Perches4500ebf2011-07-01 09:43:07 +0000988 break;
989 case EACCES:
990 code = ICMP_PKT_FILTERED;
991 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992 }
993
David Ahern192132b2015-08-27 16:07:03 -0700994 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
David Ahern385add92015-09-29 20:07:13 -0700995 l3mdev_master_ifindex(skb->dev), 1);
David S. Miller92d86822011-02-04 15:55:25 -0800996
997 send = true;
998 if (peer) {
999 now = jiffies;
1000 peer->rate_tokens += now - peer->rate_last;
1001 if (peer->rate_tokens > ip_rt_error_burst)
1002 peer->rate_tokens = ip_rt_error_burst;
1003 peer->rate_last = now;
1004 if (peer->rate_tokens >= ip_rt_error_cost)
1005 peer->rate_tokens -= ip_rt_error_cost;
1006 else
1007 send = false;
David S. Miller1d861aa2012-07-10 03:58:16 -07001008 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009 }
David S. Miller92d86822011-02-04 15:55:25 -08001010 if (send)
1011 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012
1013out: kfree_skb(skb);
1014 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001015}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016
Steffen Klassertd851c122012-10-07 22:47:25 +00001017static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001018{
Steffen Klassertd851c122012-10-07 22:47:25 +00001019 struct dst_entry *dst = &rt->dst;
Sabrina Dubroca28d35bc2018-10-09 17:48:15 +02001020 u32 old_mtu = ipv4_mtu(dst);
David S. Miller4895c772012-07-17 04:19:00 -07001021 struct fib_result res;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001022 bool lock = false;
David S. Miller2c8cec52011-02-09 20:42:07 -08001023
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001024 if (ip_mtu_locked(dst))
Steffen Klassertfa1e4922013-01-16 20:58:10 +00001025 return;
1026
Sabrina Dubroca28d35bc2018-10-09 17:48:15 +02001027 if (old_mtu < mtu)
Li Wei3cdaa5b2015-01-29 16:09:03 +08001028 return;
1029
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001030 if (mtu < ip_rt_min_pmtu) {
1031 lock = true;
Sabrina Dubroca28d35bc2018-10-09 17:48:15 +02001032 mtu = min(old_mtu, ip_rt_min_pmtu);
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001033 }
Eric Dumazetfe6fe792011-06-08 06:07:07 +00001034
Sabrina Dubroca28d35bc2018-10-09 17:48:15 +02001035 if (rt->rt_pmtu == mtu && !lock &&
Timo Teräsf0162292013-05-27 20:46:32 +00001036 time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
1037 return;
1038
Eric Dumazetc5ae7d42012-08-28 12:33:07 +00001039 rcu_read_lock();
Andy Gospodarek0eeb0752015-06-23 13:45:37 -04001040 if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
David Aherneba618a2019-04-02 14:11:55 -07001041 struct fib_nh_common *nhc = FIB_RES_NHC(res);
David S. Miller4895c772012-07-17 04:19:00 -07001042
David Aherna5995e72019-04-30 07:45:50 -07001043 update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
Julian Anastasovaee06da2012-07-18 10:15:35 +00001044 jiffies + ip_rt_mtu_expires);
David S. Miller4895c772012-07-17 04:19:00 -07001045 }
Eric Dumazetc5ae7d42012-08-28 12:33:07 +00001046 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047}
1048
David S. Miller4895c772012-07-17 04:19:00 -07001049static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1050 struct sk_buff *skb, u32 mtu)
1051{
1052 struct rtable *rt = (struct rtable *) dst;
1053 struct flowi4 fl4;
1054
1055 ip_rt_build_flow_key(&fl4, sk, skb);
Steffen Klassertd851c122012-10-07 22:47:25 +00001056 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller4895c772012-07-17 04:19:00 -07001057}
1058
David S. Miller36393392012-06-14 22:21:46 -07001059void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
Maciej Żenczykowskid888f392018-09-25 20:56:26 -07001060 int oif, u8 protocol)
David S. Miller36393392012-06-14 22:21:46 -07001061{
David S. Miller4895c772012-07-17 04:19:00 -07001062 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Miller36393392012-06-14 22:21:46 -07001063 struct flowi4 fl4;
1064 struct rtable *rt;
Maciej Żenczykowskid888f392018-09-25 20:56:26 -07001065 u32 mark = IP4_REPLY_MARK(net, skb->mark);
Lorenzo Colitti1b3c61d2014-05-13 10:17:34 -07001066
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001067 __build_flow_key(net, &fl4, NULL, iph, oif,
Maciej Żenczykowskid888f392018-09-25 20:56:26 -07001068 RT_TOS(iph->tos), protocol, mark, 0);
David S. Miller36393392012-06-14 22:21:46 -07001069 rt = __ip_route_output_key(net, &fl4);
1070 if (!IS_ERR(rt)) {
David S. Miller4895c772012-07-17 04:19:00 -07001071 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller36393392012-06-14 22:21:46 -07001072 ip_rt_put(rt);
1073 }
1074}
1075EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
1076
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001077static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
David S. Miller36393392012-06-14 22:21:46 -07001078{
David S. Miller4895c772012-07-17 04:19:00 -07001079 const struct iphdr *iph = (const struct iphdr *) skb->data;
1080 struct flowi4 fl4;
1081 struct rtable *rt;
David S. Miller36393392012-06-14 22:21:46 -07001082
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001083 __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0);
Lorenzo Colitti1b3c61d2014-05-13 10:17:34 -07001084
1085 if (!fl4.flowi4_mark)
1086 fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
1087
David S. Miller4895c772012-07-17 04:19:00 -07001088 rt = __ip_route_output_key(sock_net(sk), &fl4);
1089 if (!IS_ERR(rt)) {
1090 __ip_rt_update_pmtu(rt, &fl4, mtu);
1091 ip_rt_put(rt);
1092 }
David S. Miller36393392012-06-14 22:21:46 -07001093}
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001094
1095void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1096{
1097 const struct iphdr *iph = (const struct iphdr *) skb->data;
1098 struct flowi4 fl4;
1099 struct rtable *rt;
Eric Dumazet7f502362014-06-30 01:26:23 -07001100 struct dst_entry *odst = NULL;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001101 bool new = false;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001102 struct net *net = sock_net(sk);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001103
1104 bh_lock_sock(sk);
Hannes Frederic Sowa482fc602013-11-05 02:24:17 +01001105
1106 if (!ip_sk_accept_pmtu(sk))
1107 goto out;
1108
Eric Dumazet7f502362014-06-30 01:26:23 -07001109 odst = sk_dst_get(sk);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001110
Eric Dumazet7f502362014-06-30 01:26:23 -07001111 if (sock_owned_by_user(sk) || !odst) {
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001112 __ipv4_sk_update_pmtu(skb, sk, mtu);
1113 goto out;
1114 }
1115
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001116 __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001117
Eric Dumazet7f502362014-06-30 01:26:23 -07001118 rt = (struct rtable *)odst;
Ian Morris51456b22015-04-03 09:17:26 +01001119 if (odst->obsolete && !odst->ops->check(odst, 0)) {
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001120 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1121 if (IS_ERR(rt))
1122 goto out;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001123
1124 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001125 }
1126
David Miller0f6c4802017-11-28 15:40:46 -05001127 __ip_rt_update_pmtu((struct rtable *) xfrm_dst_path(&rt->dst), &fl4, mtu);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001128
Eric Dumazet7f502362014-06-30 01:26:23 -07001129 if (!dst_check(&rt->dst, 0)) {
Steffen Klassertb44108d2013-01-22 00:01:28 +00001130 if (new)
1131 dst_release(&rt->dst);
1132
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001133 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1134 if (IS_ERR(rt))
1135 goto out;
1136
Steffen Klassertb44108d2013-01-22 00:01:28 +00001137 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001138 }
1139
Steffen Klassertb44108d2013-01-22 00:01:28 +00001140 if (new)
Eric Dumazet7f502362014-06-30 01:26:23 -07001141 sk_dst_set(sk, &rt->dst);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001142
1143out:
1144 bh_unlock_sock(sk);
Eric Dumazet7f502362014-06-30 01:26:23 -07001145 dst_release(odst);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001146}
David S. Miller36393392012-06-14 22:21:46 -07001147EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
David S. Millerf39925d2011-02-09 22:00:16 -08001148
David S. Millerb42597e2012-07-11 21:25:45 -07001149void ipv4_redirect(struct sk_buff *skb, struct net *net,
Maciej Żenczykowski1042caa2018-09-25 20:56:27 -07001150 int oif, u8 protocol)
David S. Millerb42597e2012-07-11 21:25:45 -07001151{
David S. Miller4895c772012-07-17 04:19:00 -07001152 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Millerb42597e2012-07-11 21:25:45 -07001153 struct flowi4 fl4;
1154 struct rtable *rt;
1155
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001156 __build_flow_key(net, &fl4, NULL, iph, oif,
Maciej Żenczykowski1042caa2018-09-25 20:56:27 -07001157 RT_TOS(iph->tos), protocol, 0, 0);
David S. Millerb42597e2012-07-11 21:25:45 -07001158 rt = __ip_route_output_key(net, &fl4);
1159 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001160 __ip_do_redirect(rt, skb, &fl4, false);
David S. Millerb42597e2012-07-11 21:25:45 -07001161 ip_rt_put(rt);
1162 }
1163}
1164EXPORT_SYMBOL_GPL(ipv4_redirect);
1165
1166void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
1167{
David S. Miller4895c772012-07-17 04:19:00 -07001168 const struct iphdr *iph = (const struct iphdr *) skb->data;
1169 struct flowi4 fl4;
1170 struct rtable *rt;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001171 struct net *net = sock_net(sk);
David S. Millerb42597e2012-07-11 21:25:45 -07001172
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001173 __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
1174 rt = __ip_route_output_key(net, &fl4);
David S. Miller4895c772012-07-17 04:19:00 -07001175 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001176 __ip_do_redirect(rt, skb, &fl4, false);
David S. Miller4895c772012-07-17 04:19:00 -07001177 ip_rt_put(rt);
1178 }
David S. Millerb42597e2012-07-11 21:25:45 -07001179}
1180EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1181
David S. Millerefbc368d2011-12-01 13:38:59 -05001182static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1183{
1184 struct rtable *rt = (struct rtable *) dst;
1185
David S. Millerceb33202012-07-17 11:31:28 -07001186 /* All IPV4 dsts are created with ->obsolete set to the value
1187 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1188 * into this function always.
1189 *
Timo Teräs387aa652013-05-27 20:46:31 +00001190 * When a PMTU/redirect information update invalidates a route,
1191 * this is indicated by setting obsolete to DST_OBSOLETE_KILL or
Julian Wiedmann02afc7a2019-03-20 20:02:56 +01001192 * DST_OBSOLETE_DEAD.
David S. Millerceb33202012-07-17 11:31:28 -07001193 */
Timo Teräs387aa652013-05-27 20:46:31 +00001194 if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
David S. Millerefbc368d2011-12-01 13:38:59 -05001195 return NULL;
Timo Teräsd11a4dc2010-03-18 23:20:20 +00001196 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001197}
1198
Eric Dumazet20ff83f2019-04-24 08:04:05 -07001199static void ipv4_send_dest_unreach(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001200{
Stephen Suryaputraed0de452019-04-12 16:19:27 -04001201 struct ip_options opt;
Eric Dumazetc543cb42019-04-13 17:32:21 -07001202 int res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001203
Stephen Suryaputraed0de452019-04-12 16:19:27 -04001204 /* Recompile ip options since IPCB may not be valid anymore.
Eric Dumazet20ff83f2019-04-24 08:04:05 -07001205 * Also check we have a reasonable ipv4 header.
Stephen Suryaputraed0de452019-04-12 16:19:27 -04001206 */
Eric Dumazet20ff83f2019-04-24 08:04:05 -07001207 if (!pskb_network_may_pull(skb, sizeof(struct iphdr)) ||
1208 ip_hdr(skb)->version != 4 || ip_hdr(skb)->ihl < 5)
Stephen Suryaputraed0de452019-04-12 16:19:27 -04001209 return;
1210
Eric Dumazet20ff83f2019-04-24 08:04:05 -07001211 memset(&opt, 0, sizeof(opt));
1212 if (ip_hdr(skb)->ihl > 5) {
1213 if (!pskb_network_may_pull(skb, ip_hdr(skb)->ihl * 4))
1214 return;
1215 opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr);
1216
1217 rcu_read_lock();
1218 res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL);
1219 rcu_read_unlock();
1220
1221 if (res)
1222 return;
1223 }
Stephen Suryaputraed0de452019-04-12 16:19:27 -04001224 __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt);
Eric Dumazet20ff83f2019-04-24 08:04:05 -07001225}
1226
1227static void ipv4_link_failure(struct sk_buff *skb)
1228{
1229 struct rtable *rt;
1230
1231 ipv4_send_dest_unreach(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001232
Eric Dumazet511c3f92009-06-02 05:14:27 +00001233 rt = skb_rtable(skb);
David S. Miller59436342012-07-10 06:58:42 -07001234 if (rt)
1235 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001236}
1237
Eric W. Biedermanede20592015-10-07 16:48:47 -05001238static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001239{
Joe Perches91df42b2012-05-15 14:11:54 +00001240 pr_debug("%s: %pI4 -> %pI4, %s\n",
1241 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1242 skb->dev ? skb->dev->name : "?");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001243 kfree_skb(skb);
Dave Jonesc378a9c2011-05-21 07:16:42 +00001244 WARN_ON(1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001245 return 0;
1246}
1247
1248/*
1249 We do not cache source address of outgoing interface,
1250 because it is used only by IP RR, TS and SRR options,
1251 so that it out of fast path.
1252
1253 BTW remember: "addr" is allowed to be not aligned
1254 in IP options!
1255 */
1256
David S. Miller8e363602011-05-13 17:29:41 -04001257void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001258{
Al Viroa61ced52006-09-26 21:27:54 -07001259 __be32 src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260
David S. Millerc7537962010-11-11 17:07:48 -08001261 if (rt_is_output_route(rt))
David S. Millerc5be24f2011-05-13 18:01:21 -04001262 src = ip_hdr(skb)->saddr;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001263 else {
David S. Miller8e363602011-05-13 17:29:41 -04001264 struct fib_result res;
Maciej Żenczykowskie351bb62018-09-29 23:44:46 -07001265 struct iphdr *iph = ip_hdr(skb);
1266 struct flowi4 fl4 = {
1267 .daddr = iph->daddr,
1268 .saddr = iph->saddr,
1269 .flowi4_tos = RT_TOS(iph->tos),
1270 .flowi4_oif = rt->dst.dev->ifindex,
1271 .flowi4_iif = skb->dev->ifindex,
1272 .flowi4_mark = skb->mark,
1273 };
David S. Miller5e2b61f2011-03-04 21:47:09 -08001274
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001275 rcu_read_lock();
Andy Gospodarek0eeb0752015-06-23 13:45:37 -04001276 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0)
David Aherneba618a2019-04-02 14:11:55 -07001277 src = fib_result_prefsrc(dev_net(rt->dst.dev), &res);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001278 else
David S. Millerf8126f12012-07-13 05:03:45 -07001279 src = inet_select_addr(rt->dst.dev,
1280 rt_nexthop(rt, iph->daddr),
1281 RT_SCOPE_UNIVERSE);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001282 rcu_read_unlock();
1283 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001284 memcpy(addr, &src, 4);
1285}
1286
Patrick McHardyc7066f72011-01-14 13:36:42 +01001287#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001288static void set_class_tag(struct rtable *rt, u32 tag)
1289{
Changli Gaod8d1f302010-06-10 23:31:35 -07001290 if (!(rt->dst.tclassid & 0xFFFF))
1291 rt->dst.tclassid |= tag & 0xFFFF;
1292 if (!(rt->dst.tclassid & 0xFFFF0000))
1293 rt->dst.tclassid |= tag & 0xFFFF0000;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001294}
1295#endif
1296
David S. Miller0dbaee32010-12-13 12:52:14 -08001297static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1298{
Gao Feng7ed14d92017-04-12 12:34:03 +08001299 unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
Eric Dumazet164a5e72017-10-18 17:02:03 -07001300 unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
Gao Feng7ed14d92017-04-12 12:34:03 +08001301 ip_rt_min_advmss);
David S. Miller0dbaee32010-12-13 12:52:14 -08001302
Gao Feng7ed14d92017-04-12 12:34:03 +08001303 return min(advmss, IPV4_MAX_PMTU - header_size);
David S. Miller0dbaee32010-12-13 12:52:14 -08001304}
1305
Steffen Klassertebb762f2011-11-23 02:12:51 +00001306static unsigned int ipv4_mtu(const struct dst_entry *dst)
David S. Millerd33e4552010-12-14 13:01:14 -08001307{
Steffen Klassert261663b2011-11-23 02:14:50 +00001308 const struct rtable *rt = (const struct rtable *) dst;
David S. Miller59436342012-07-10 06:58:42 -07001309 unsigned int mtu = rt->rt_pmtu;
1310
Alexander Duyck98d75c32012-08-27 06:30:01 +00001311 if (!mtu || time_after_eq(jiffies, rt->dst.expires))
David S. Miller59436342012-07-10 06:58:42 -07001312 mtu = dst_metric_raw(dst, RTAX_MTU);
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001313
Steffen Klassert38d523e2013-01-16 20:55:01 +00001314 if (mtu)
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001315 return mtu;
1316
Eric Dumazetc780a042017-08-16 11:09:12 -07001317 mtu = READ_ONCE(dst->dev->mtu);
David S. Millerd33e4552010-12-14 13:01:14 -08001318
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001319 if (unlikely(ip_mtu_locked(dst))) {
David Ahern1550c172019-04-05 16:30:27 -07001320 if (rt->rt_gw_family && mtu > 576)
David S. Millerd33e4552010-12-14 13:01:14 -08001321 mtu = 576;
1322 }
1323
Roopa Prabhu14972cb2016-08-24 20:10:43 -07001324 mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
1325
1326 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
David S. Millerd33e4552010-12-14 13:01:14 -08001327}
1328
David Aherna5995e72019-04-30 07:45:50 -07001329static void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr)
Julian Anastasov94720e32018-05-02 09:41:19 +03001330{
1331 struct fnhe_hash_bucket *hash;
1332 struct fib_nh_exception *fnhe, __rcu **fnhe_p;
1333 u32 hval = fnhe_hashfun(daddr);
1334
1335 spin_lock_bh(&fnhe_lock);
1336
David Aherna5995e72019-04-30 07:45:50 -07001337 hash = rcu_dereference_protected(nhc->nhc_exceptions,
Julian Anastasov94720e32018-05-02 09:41:19 +03001338 lockdep_is_held(&fnhe_lock));
1339 hash += hval;
1340
1341 fnhe_p = &hash->chain;
1342 fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
1343 while (fnhe) {
1344 if (fnhe->fnhe_daddr == daddr) {
1345 rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
1346 fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
Xin Longee60ad22019-03-08 14:50:54 +08001347 /* set fnhe_daddr to 0 to ensure it won't bind with
1348 * new dsts in rt_bind_exception().
1349 */
1350 fnhe->fnhe_daddr = 0;
Julian Anastasov94720e32018-05-02 09:41:19 +03001351 fnhe_flush_routes(fnhe);
1352 kfree_rcu(fnhe, rcu);
1353 break;
1354 }
1355 fnhe_p = &fnhe->fnhe_next;
1356 fnhe = rcu_dereference_protected(fnhe->fnhe_next,
1357 lockdep_is_held(&fnhe_lock));
1358 }
1359
1360 spin_unlock_bh(&fnhe_lock);
1361}
1362
David Aherna5995e72019-04-30 07:45:50 -07001363static struct fib_nh_exception *find_exception(struct fib_nh_common *nhc,
1364 __be32 daddr)
David S. Miller4895c772012-07-17 04:19:00 -07001365{
David Aherna5995e72019-04-30 07:45:50 -07001366 struct fnhe_hash_bucket *hash = rcu_dereference(nhc->nhc_exceptions);
David S. Miller4895c772012-07-17 04:19:00 -07001367 struct fib_nh_exception *fnhe;
1368 u32 hval;
1369
David S. Millerf2bb4be2012-07-17 12:20:47 -07001370 if (!hash)
1371 return NULL;
1372
David S. Millerd3a25c92012-07-17 13:23:08 -07001373 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -07001374
1375 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1376 fnhe = rcu_dereference(fnhe->fnhe_next)) {
Julian Anastasov94720e32018-05-02 09:41:19 +03001377 if (fnhe->fnhe_daddr == daddr) {
1378 if (fnhe->fnhe_expires &&
1379 time_after(jiffies, fnhe->fnhe_expires)) {
David Aherna5995e72019-04-30 07:45:50 -07001380 ip_del_fnhe(nhc, daddr);
Julian Anastasov94720e32018-05-02 09:41:19 +03001381 break;
1382 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001383 return fnhe;
Julian Anastasov94720e32018-05-02 09:41:19 +03001384 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001385 }
1386 return NULL;
1387}
David S. Miller4895c772012-07-17 04:19:00 -07001388
David Ahern50d889b2018-05-21 09:08:13 -07001389/* MTU selection:
1390 * 1. mtu on route is locked - use it
1391 * 2. mtu from nexthop exception
1392 * 3. mtu from egress device
1393 */
1394
1395u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
1396{
David Aherneba618a2019-04-02 14:11:55 -07001397 struct fib_nh_common *nhc = res->nhc;
1398 struct net_device *dev = nhc->nhc_dev;
David Ahern50d889b2018-05-21 09:08:13 -07001399 struct fib_info *fi = res->fi;
David Ahern50d889b2018-05-21 09:08:13 -07001400 u32 mtu = 0;
1401
1402 if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu ||
1403 fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU))
1404 mtu = fi->fib_mtu;
1405
1406 if (likely(!mtu)) {
1407 struct fib_nh_exception *fnhe;
1408
David Aherna5995e72019-04-30 07:45:50 -07001409 fnhe = find_exception(nhc, daddr);
David Ahern50d889b2018-05-21 09:08:13 -07001410 if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires))
1411 mtu = fnhe->fnhe_pmtu;
1412 }
1413
1414 if (likely(!mtu))
1415 mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU);
1416
David Aherneba618a2019-04-02 14:11:55 -07001417 return mtu - lwtunnel_headroom(nhc->nhc_lwtstate, mtu);
David Ahern50d889b2018-05-21 09:08:13 -07001418}
1419
David S. Millercaacf052012-07-31 15:06:50 -07001420static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
Wei Wanga4c2fd72017-06-17 10:42:42 -07001421 __be32 daddr, const bool do_cache)
David S. Millerf2bb4be2012-07-17 12:20:47 -07001422{
David S. Millercaacf052012-07-31 15:06:50 -07001423 bool ret = false;
1424
David S. Millerc5038a82012-07-31 15:02:02 -07001425 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +00001426
David S. Millerc5038a82012-07-31 15:02:02 -07001427 if (daddr == fnhe->fnhe_daddr) {
Timo Teräs2ffae992013-06-27 10:27:05 +03001428 struct rtable __rcu **porig;
1429 struct rtable *orig;
Timo Teräs5aad1de2013-05-27 20:46:33 +00001430 int genid = fnhe_genid(dev_net(rt->dst.dev));
Timo Teräs2ffae992013-06-27 10:27:05 +03001431
1432 if (rt_is_input_route(rt))
1433 porig = &fnhe->fnhe_rth_input;
1434 else
1435 porig = &fnhe->fnhe_rth_output;
1436 orig = rcu_dereference(*porig);
Timo Teräs5aad1de2013-05-27 20:46:33 +00001437
1438 if (fnhe->fnhe_genid != genid) {
1439 fnhe->fnhe_genid = genid;
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001440 fnhe->fnhe_gw = 0;
1441 fnhe->fnhe_pmtu = 0;
1442 fnhe->fnhe_expires = 0;
Hangbin Liu0e8411e42018-05-09 18:06:44 +08001443 fnhe->fnhe_mtu_locked = false;
Timo Teräs2ffae992013-06-27 10:27:05 +03001444 fnhe_flush_routes(fnhe);
1445 orig = NULL;
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001446 }
Timo Teräs387aa652013-05-27 20:46:31 +00001447 fill_route_from_fnhe(rt, fnhe);
David Ahern1550c172019-04-05 16:30:27 -07001448 if (!rt->rt_gw4) {
1449 rt->rt_gw4 = daddr;
1450 rt->rt_gw_family = AF_INET;
1451 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001452
Wei Wanga4c2fd72017-06-17 10:42:42 -07001453 if (do_cache) {
Wei Wang08301062017-06-17 10:42:29 -07001454 dst_hold(&rt->dst);
Timo Teräs2ffae992013-06-27 10:27:05 +03001455 rcu_assign_pointer(*porig, rt);
Wei Wang08301062017-06-17 10:42:29 -07001456 if (orig) {
Wei Wang95c47f92017-06-17 10:42:30 -07001457 dst_dev_put(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001458 dst_release(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001459 }
Timo Teräs2ffae992013-06-27 10:27:05 +03001460 ret = true;
1461 }
David S. Millerc5038a82012-07-31 15:02:02 -07001462
1463 fnhe->fnhe_stamp = jiffies;
David S. Millerc5038a82012-07-31 15:02:02 -07001464 }
1465 spin_unlock_bh(&fnhe_lock);
David S. Millercaacf052012-07-31 15:06:50 -07001466
1467 return ret;
Eric Dumazet54764bb2012-07-31 01:08:23 +00001468}
1469
David Ahern87063a1f2019-04-30 07:45:49 -07001470static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt)
David S. Millerf2bb4be2012-07-17 12:20:47 -07001471{
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001472 struct rtable *orig, *prev, **p;
David S. Millercaacf052012-07-31 15:06:50 -07001473 bool ret = true;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001474
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001475 if (rt_is_input_route(rt)) {
David Ahern0f457a32019-04-30 07:45:48 -07001476 p = (struct rtable **)&nhc->nhc_rth_input;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001477 } else {
David Ahern0f457a32019-04-30 07:45:48 -07001478 p = (struct rtable **)raw_cpu_ptr(nhc->nhc_pcpu_rth_output);
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001479 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001480 orig = *p;
1481
Wei Wang08301062017-06-17 10:42:29 -07001482 /* hold dst before doing cmpxchg() to avoid race condition
1483 * on this dst
1484 */
1485 dst_hold(&rt->dst);
David S. Millerf2bb4be2012-07-17 12:20:47 -07001486 prev = cmpxchg(p, orig, rt);
1487 if (prev == orig) {
Wei Wang08301062017-06-17 10:42:29 -07001488 if (orig) {
Wei Wang95c47f92017-06-17 10:42:30 -07001489 dst_dev_put(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001490 dst_release(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001491 }
1492 } else {
1493 dst_release(&rt->dst);
David S. Millercaacf052012-07-31 15:06:50 -07001494 ret = false;
Wei Wang08301062017-06-17 10:42:29 -07001495 }
David S. Millercaacf052012-07-31 15:06:50 -07001496
1497 return ret;
1498}
1499
Eric Dumazet5055c372015-01-14 15:17:06 -08001500struct uncached_list {
1501 spinlock_t lock;
1502 struct list_head head;
1503};
1504
1505static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
David S. Millercaacf052012-07-31 15:06:50 -07001506
Xin Long510c3212018-02-14 19:06:02 +08001507void rt_add_uncached_list(struct rtable *rt)
David S. Millercaacf052012-07-31 15:06:50 -07001508{
Eric Dumazet5055c372015-01-14 15:17:06 -08001509 struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list);
1510
1511 rt->rt_uncached_list = ul;
1512
1513 spin_lock_bh(&ul->lock);
1514 list_add_tail(&rt->rt_uncached, &ul->head);
1515 spin_unlock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001516}
1517
Xin Long510c3212018-02-14 19:06:02 +08001518void rt_del_uncached_list(struct rtable *rt)
David S. Millercaacf052012-07-31 15:06:50 -07001519{
Eric Dumazet78df76a2012-08-24 05:40:47 +00001520 if (!list_empty(&rt->rt_uncached)) {
Eric Dumazet5055c372015-01-14 15:17:06 -08001521 struct uncached_list *ul = rt->rt_uncached_list;
1522
1523 spin_lock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001524 list_del(&rt->rt_uncached);
Eric Dumazet5055c372015-01-14 15:17:06 -08001525 spin_unlock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001526 }
1527}
1528
Xin Long510c3212018-02-14 19:06:02 +08001529static void ipv4_dst_destroy(struct dst_entry *dst)
1530{
Xin Long510c3212018-02-14 19:06:02 +08001531 struct rtable *rt = (struct rtable *)dst;
1532
David Ahern1620a332018-10-04 20:07:54 -07001533 ip_dst_metrics_put(dst);
Xin Long510c3212018-02-14 19:06:02 +08001534 rt_del_uncached_list(rt);
1535}
1536
David S. Millercaacf052012-07-31 15:06:50 -07001537void rt_flush_dev(struct net_device *dev)
1538{
Eric Dumazet5055c372015-01-14 15:17:06 -08001539 struct net *net = dev_net(dev);
1540 struct rtable *rt;
1541 int cpu;
David S. Millercaacf052012-07-31 15:06:50 -07001542
Eric Dumazet5055c372015-01-14 15:17:06 -08001543 for_each_possible_cpu(cpu) {
1544 struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
1545
1546 spin_lock_bh(&ul->lock);
1547 list_for_each_entry(rt, &ul->head, rt_uncached) {
David S. Millercaacf052012-07-31 15:06:50 -07001548 if (rt->dst.dev != dev)
1549 continue;
1550 rt->dst.dev = net->loopback_dev;
1551 dev_hold(rt->dst.dev);
1552 dev_put(dev);
1553 }
Eric Dumazet5055c372015-01-14 15:17:06 -08001554 spin_unlock_bh(&ul->lock);
David S. Miller4895c772012-07-17 04:19:00 -07001555 }
1556}
1557
Eric Dumazet4331deb2012-07-25 05:11:23 +00001558static bool rt_cache_valid(const struct rtable *rt)
David S. Millerd2d68ba92012-07-17 12:58:50 -07001559{
Eric Dumazet4331deb2012-07-25 05:11:23 +00001560 return rt &&
1561 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1562 !rt_is_expired(rt);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001563}
1564
David S. Millerf2bb4be2012-07-17 12:20:47 -07001565static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
David S. Miller5e2b61f2011-03-04 21:47:09 -08001566 const struct fib_result *res,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001567 struct fib_nh_exception *fnhe,
Wei Wanga4c2fd72017-06-17 10:42:42 -07001568 struct fib_info *fi, u16 type, u32 itag,
1569 const bool do_cache)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570{
David S. Millercaacf052012-07-31 15:06:50 -07001571 bool cached = false;
1572
Linus Torvalds1da177e2005-04-16 15:20:36 -07001573 if (fi) {
David Aherneba618a2019-04-02 14:11:55 -07001574 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
David S. Miller4895c772012-07-17 04:19:00 -07001575
David Ahern0f5f7d72019-04-05 16:30:29 -07001576 if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) {
1577 rt->rt_gw_family = nhc->nhc_gw_family;
1578 /* only INET and INET6 are supported */
1579 if (likely(nhc->nhc_gw_family == AF_INET))
1580 rt->rt_gw4 = nhc->nhc_gw.ipv4;
1581 else
1582 rt->rt_gw6 = nhc->nhc_gw.ipv6;
Julian Anastasov155e8332012-10-08 11:41:18 +00001583 }
David Ahern0f5f7d72019-04-05 16:30:29 -07001584
David Aherne1255ed2018-10-04 20:07:53 -07001585 ip_dst_init_metrics(&rt->dst, fi->fib_metrics);
1586
Patrick McHardyc7066f72011-01-14 13:36:42 +01001587#ifdef CONFIG_IP_ROUTE_CLASSID
David Ahern87063a1f2019-04-30 07:45:49 -07001588 {
1589 struct fib_nh *nh;
1590
1591 nh = container_of(nhc, struct fib_nh, nh_common);
1592 rt->dst.tclassid = nh->nh_tclassid;
1593 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001594#endif
David Ahern87063a1f2019-04-30 07:45:49 -07001595 rt->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);
David S. Millerc5038a82012-07-31 15:02:02 -07001596 if (unlikely(fnhe))
Wei Wanga4c2fd72017-06-17 10:42:42 -07001597 cached = rt_bind_exception(rt, fnhe, daddr, do_cache);
1598 else if (do_cache)
David Ahern87063a1f2019-04-30 07:45:49 -07001599 cached = rt_cache_route(nhc, rt);
Julian Anastasov155e8332012-10-08 11:41:18 +00001600 if (unlikely(!cached)) {
1601 /* Routes we intend to cache in nexthop exception or
1602 * FIB nexthop have the DST_NOCACHE bit clear.
1603 * However, if we are unsuccessful at storing this
1604 * route into the cache we really need to set it.
1605 */
David Ahern1550c172019-04-05 16:30:27 -07001606 if (!rt->rt_gw4) {
1607 rt->rt_gw_family = AF_INET;
1608 rt->rt_gw4 = daddr;
1609 }
Julian Anastasov155e8332012-10-08 11:41:18 +00001610 rt_add_uncached_list(rt);
1611 }
1612 } else
David S. Millercaacf052012-07-31 15:06:50 -07001613 rt_add_uncached_list(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001614
Patrick McHardyc7066f72011-01-14 13:36:42 +01001615#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001616#ifdef CONFIG_IP_MULTIPLE_TABLES
David S. Miller85b91b02012-07-13 08:21:29 -07001617 set_class_tag(rt, res->tclassid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001618#endif
1619 set_class_tag(rt, itag);
1620#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001621}
1622
David Ahern9ab179d2016-04-07 11:10:06 -07001623struct rtable *rt_dst_alloc(struct net_device *dev,
1624 unsigned int flags, u16 type,
1625 bool nopolicy, bool noxfrm, bool will_cache)
David S. Miller0c4dcd52011-02-17 15:42:37 -08001626{
David Ahernd08c4f32015-09-02 13:58:34 -07001627 struct rtable *rt;
1628
1629 rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
Wei Wanga4c2fd72017-06-17 10:42:42 -07001630 (will_cache ? 0 : DST_HOST) |
David Ahernd08c4f32015-09-02 13:58:34 -07001631 (nopolicy ? DST_NOPOLICY : 0) |
Wei Wangb2a9c0e2017-06-17 10:42:41 -07001632 (noxfrm ? DST_NOXFRM : 0));
David Ahernd08c4f32015-09-02 13:58:34 -07001633
1634 if (rt) {
1635 rt->rt_genid = rt_genid_ipv4(dev_net(dev));
1636 rt->rt_flags = flags;
1637 rt->rt_type = type;
1638 rt->rt_is_input = 0;
1639 rt->rt_iif = 0;
1640 rt->rt_pmtu = 0;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001641 rt->rt_mtu_locked = 0;
David Ahern1550c172019-04-05 16:30:27 -07001642 rt->rt_gw_family = 0;
1643 rt->rt_gw4 = 0;
David Ahernd08c4f32015-09-02 13:58:34 -07001644 INIT_LIST_HEAD(&rt->rt_uncached);
1645
1646 rt->dst.output = ip_output;
1647 if (flags & RTCF_LOCAL)
1648 rt->dst.input = ip_local_deliver;
1649 }
1650
1651 return rt;
David S. Miller0c4dcd52011-02-17 15:42:37 -08001652}
David Ahern9ab179d2016-04-07 11:10:06 -07001653EXPORT_SYMBOL(rt_dst_alloc);
David S. Miller0c4dcd52011-02-17 15:42:37 -08001654
Eric Dumazet96d36222010-06-02 19:21:31 +00001655/* called in rcu_read_lock() section */
Paolo Abenibc044e82017-09-28 15:51:37 +02001656int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1657 u8 tos, struct net_device *dev,
1658 struct in_device *in_dev, u32 *itag)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001659{
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001660 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001661
1662 /* Primary sanity checks. */
Ian Morris51456b22015-04-03 09:17:26 +01001663 if (!in_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001664 return -EINVAL;
1665
Jan Engelhardt1e637c72008-01-21 03:18:08 -08001666 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
Thomas Grafd0daebc32012-06-12 00:44:01 +00001667 skb->protocol != htons(ETH_P_IP))
Paolo Abenibc044e82017-09-28 15:51:37 +02001668 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001669
Alexander Duyck75fea732015-09-28 11:10:38 -07001670 if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
Paolo Abenibc044e82017-09-28 15:51:37 +02001671 return -EINVAL;
Thomas Grafd0daebc32012-06-12 00:44:01 +00001672
Joe Perchesf97c1e02007-12-16 13:45:43 -08001673 if (ipv4_is_zeronet(saddr)) {
Edward Chron1d2f4eb2019-01-31 15:00:40 -08001674 if (!ipv4_is_local_multicast(daddr) &&
1675 ip_hdr(skb)->protocol != IPPROTO_IGMP)
Paolo Abenibc044e82017-09-28 15:51:37 +02001676 return -EINVAL;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001677 } else {
David S. Miller9e56e382012-06-28 18:54:02 -07001678 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
Paolo Abenibc044e82017-09-28 15:51:37 +02001679 in_dev, itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001680 if (err < 0)
Paolo Abenibc044e82017-09-28 15:51:37 +02001681 return err;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001682 }
Paolo Abenibc044e82017-09-28 15:51:37 +02001683 return 0;
1684}
1685
1686/* called in rcu_read_lock() section */
1687static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1688 u8 tos, struct net_device *dev, int our)
1689{
1690 struct in_device *in_dev = __in_dev_get_rcu(dev);
1691 unsigned int flags = RTCF_MULTICAST;
1692 struct rtable *rth;
1693 u32 itag = 0;
1694 int err;
1695
1696 err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag);
1697 if (err)
1698 return err;
1699
David Ahernd08c4f32015-09-02 13:58:34 -07001700 if (our)
1701 flags |= RTCF_LOCAL;
1702
1703 rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001704 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001705 if (!rth)
Paolo Abenibc044e82017-09-28 15:51:37 +02001706 return -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001707
Patrick McHardyc7066f72011-01-14 13:36:42 +01001708#ifdef CONFIG_IP_ROUTE_CLASSID
Changli Gaod8d1f302010-06-10 23:31:35 -07001709 rth->dst.tclassid = itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001710#endif
David S. Millercf911662011-04-28 14:31:47 -07001711 rth->dst.output = ip_rt_bug;
David S. Miller9917e1e82012-07-17 14:44:26 -07001712 rth->rt_is_input= 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713
1714#ifdef CONFIG_IP_MROUTE
Joe Perchesf97c1e02007-12-16 13:45:43 -08001715 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
Changli Gaod8d1f302010-06-10 23:31:35 -07001716 rth->dst.input = ip_mr_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001717#endif
1718 RT_CACHE_STAT_INC(in_slow_mc);
1719
David S. Miller89aef892012-07-17 11:00:09 -07001720 skb_dst_set(skb, &rth->dst);
1721 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001722}
1723
1724
1725static void ip_handle_martian_source(struct net_device *dev,
1726 struct in_device *in_dev,
1727 struct sk_buff *skb,
Al Viro9e12bb22006-09-26 21:25:20 -07001728 __be32 daddr,
1729 __be32 saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001730{
1731 RT_CACHE_STAT_INC(in_martian_src);
1732#ifdef CONFIG_IP_ROUTE_VERBOSE
1733 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1734 /*
1735 * RFC1812 recommendation, if source is martian,
1736 * the only hint is MAC header.
1737 */
Joe Perches058bd4d2012-03-11 18:36:11 +00001738 pr_warn("martian source %pI4 from %pI4, on dev %s\n",
Harvey Harrison673d57e2008-10-31 00:53:57 -07001739 &daddr, &saddr, dev->name);
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001740 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
Joe Perches058bd4d2012-03-11 18:36:11 +00001741 print_hex_dump(KERN_WARNING, "ll header: ",
1742 DUMP_PREFIX_OFFSET, 16, 1,
1743 skb_mac_header(skb),
David S. Millerb2c85102018-11-20 10:15:36 -08001744 dev->hard_header_len, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001745 }
1746 }
1747#endif
1748}
1749
Eric Dumazet47360222010-06-03 04:13:21 +00001750/* called in rcu_read_lock() section */
Stephen Hemminger5969f712008-04-10 01:52:09 -07001751static int __mkroute_input(struct sk_buff *skb,
David S. Miller982721f2011-02-16 21:44:24 -08001752 const struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001753 struct in_device *in_dev,
David S. Millerc6cffba2012-07-26 11:14:38 +00001754 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001755{
David Aherneba618a2019-04-02 14:11:55 -07001756 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
1757 struct net_device *dev = nhc->nhc_dev;
Timo Teräs2ffae992013-06-27 10:27:05 +03001758 struct fib_nh_exception *fnhe;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001759 struct rtable *rth;
1760 int err;
1761 struct in_device *out_dev;
David S. Millerd2d68ba92012-07-17 12:58:50 -07001762 bool do_cache;
Li RongQingfbdc0ad2014-05-22 16:36:55 +08001763 u32 itag = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764
1765 /* get a working reference to the output device */
David Aherneba618a2019-04-02 14:11:55 -07001766 out_dev = __in_dev_get_rcu(dev);
Ian Morris51456b22015-04-03 09:17:26 +01001767 if (!out_dev) {
Joe Perchese87cc472012-05-13 21:56:26 +00001768 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001769 return -EINVAL;
1770 }
1771
Michael Smith5c04c812011-04-07 04:51:50 +00001772 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
David S. Miller9e56e382012-06-28 18:54:02 -07001773 in_dev->dev, in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001774 if (err < 0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001775 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001776 saddr);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001777
Linus Torvalds1da177e2005-04-16 15:20:36 -07001778 goto cleanup;
1779 }
1780
Julian Anastasove81da0e2012-10-08 11:41:15 +00001781 do_cache = res->fi && !itag;
1782 if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
David Aherneba618a2019-04-02 14:11:55 -07001783 skb->protocol == htons(ETH_P_IP)) {
David Ahernbdf00462019-04-05 16:30:26 -07001784 __be32 gw;
David Aherneba618a2019-04-02 14:11:55 -07001785
David Ahernbdf00462019-04-05 16:30:26 -07001786 gw = nhc->nhc_gw_family == AF_INET ? nhc->nhc_gw.ipv4 : 0;
David Aherneba618a2019-04-02 14:11:55 -07001787 if (IN_DEV_SHARED_MEDIA(out_dev) ||
1788 inet_addr_onlink(out_dev, saddr, gw))
1789 IPCB(skb)->flags |= IPSKB_DOREDIRECT;
1790 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001791
1792 if (skb->protocol != htons(ETH_P_IP)) {
1793 /* Not IP (i.e. ARP). Do not create route, if it is
1794 * invalid for proxy arp. DNAT routes are always valid.
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001795 *
1796 * Proxy arp feature have been extended to allow, ARP
1797 * replies back to the same interface, to support
1798 * Private VLAN switch technologies. See arp.c.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001799 */
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001800 if (out_dev == in_dev &&
1801 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802 err = -EINVAL;
1803 goto cleanup;
1804 }
1805 }
1806
David Aherna5995e72019-04-30 07:45:50 -07001807 fnhe = find_exception(nhc, daddr);
Julian Anastasove81da0e2012-10-08 11:41:15 +00001808 if (do_cache) {
Julian Anastasov94720e32018-05-02 09:41:19 +03001809 if (fnhe)
Timo Teräs2ffae992013-06-27 10:27:05 +03001810 rth = rcu_dereference(fnhe->fnhe_rth_input);
Julian Anastasov94720e32018-05-02 09:41:19 +03001811 else
David Ahern0f457a32019-04-30 07:45:48 -07001812 rth = rcu_dereference(nhc->nhc_rth_input);
Julian Anastasove81da0e2012-10-08 11:41:15 +00001813 if (rt_cache_valid(rth)) {
1814 skb_dst_set_noref(skb, &rth->dst);
1815 goto out;
David S. Millerd2d68ba92012-07-17 12:58:50 -07001816 }
1817 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001818
David Ahernd08c4f32015-09-02 13:58:34 -07001819 rth = rt_dst_alloc(out_dev->dev, 0, res->type,
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001820 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerd2d68ba92012-07-17 12:58:50 -07001821 IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001822 if (!rth) {
1823 err = -ENOBUFS;
1824 goto cleanup;
1825 }
1826
David S. Miller9917e1e82012-07-17 14:44:26 -07001827 rth->rt_is_input = 1;
Duan Jionga6254862014-02-17 15:23:43 +08001828 RT_CACHE_STAT_INC(in_slow_tot);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001829
Changli Gaod8d1f302010-06-10 23:31:35 -07001830 rth->dst.input = ip_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001831
Wei Wanga4c2fd72017-06-17 10:42:42 -07001832 rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag,
1833 do_cache);
David Ahern99428952018-02-13 20:32:04 -08001834 lwtunnel_set_redirect(&rth->dst);
David S. Millerc6cffba2012-07-26 11:14:38 +00001835 skb_dst_set(skb, &rth->dst);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001836out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001837 err = 0;
1838 cleanup:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001839 return err;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001840}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001841
Peter Nørlund79a13152015-09-30 10:12:22 +02001842#ifdef CONFIG_IP_ROUTE_MULTIPATH
Peter Nørlund79a13152015-09-30 10:12:22 +02001843/* To make ICMP packets follow the right flow, the multipath hash is
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001844 * calculated from the inner IP addresses.
Peter Nørlund79a13152015-09-30 10:12:22 +02001845 */
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001846static void ip_multipath_l3_keys(const struct sk_buff *skb,
1847 struct flow_keys *hash_keys)
Peter Nørlund79a13152015-09-30 10:12:22 +02001848{
1849 const struct iphdr *outer_iph = ip_hdr(skb);
David Ahern6f74b6c2018-03-02 08:32:13 -08001850 const struct iphdr *key_iph = outer_iph;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001851 const struct iphdr *inner_iph;
Peter Nørlund79a13152015-09-30 10:12:22 +02001852 const struct icmphdr *icmph;
1853 struct iphdr _inner_iph;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001854 struct icmphdr _icmph;
1855
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001856 if (likely(outer_iph->protocol != IPPROTO_ICMP))
David Ahern6f74b6c2018-03-02 08:32:13 -08001857 goto out;
Peter Nørlund79a13152015-09-30 10:12:22 +02001858
1859 if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
David Ahern6f74b6c2018-03-02 08:32:13 -08001860 goto out;
Peter Nørlund79a13152015-09-30 10:12:22 +02001861
1862 icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
1863 &_icmph);
1864 if (!icmph)
David Ahern6f74b6c2018-03-02 08:32:13 -08001865 goto out;
Peter Nørlund79a13152015-09-30 10:12:22 +02001866
1867 if (icmph->type != ICMP_DEST_UNREACH &&
1868 icmph->type != ICMP_REDIRECT &&
1869 icmph->type != ICMP_TIME_EXCEEDED &&
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001870 icmph->type != ICMP_PARAMETERPROB)
David Ahern6f74b6c2018-03-02 08:32:13 -08001871 goto out;
Peter Nørlund79a13152015-09-30 10:12:22 +02001872
1873 inner_iph = skb_header_pointer(skb,
1874 outer_iph->ihl * 4 + sizeof(_icmph),
1875 sizeof(_inner_iph), &_inner_iph);
1876 if (!inner_iph)
David Ahern6f74b6c2018-03-02 08:32:13 -08001877 goto out;
1878
1879 key_iph = inner_iph;
1880out:
1881 hash_keys->addrs.v4addrs.src = key_iph->saddr;
1882 hash_keys->addrs.v4addrs.dst = key_iph->daddr;
Peter Nørlund79a13152015-09-30 10:12:22 +02001883}
1884
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001885/* if skb is set it will be used and fl4 can be NULL */
David Ahern7efc0b62018-03-02 08:32:12 -08001886int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001887 const struct sk_buff *skb, struct flow_keys *flkeys)
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001888{
Ido Schimmel2a8e4992019-03-01 13:38:43 +00001889 u32 multipath_hash = fl4 ? fl4->flowi4_multipath_hash : 0;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001890 struct flow_keys hash_keys;
1891 u32 mhash;
1892
1893 switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
1894 case 0:
1895 memset(&hash_keys, 0, sizeof(hash_keys));
1896 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1897 if (skb) {
1898 ip_multipath_l3_keys(skb, &hash_keys);
1899 } else {
1900 hash_keys.addrs.v4addrs.src = fl4->saddr;
1901 hash_keys.addrs.v4addrs.dst = fl4->daddr;
1902 }
1903 break;
1904 case 1:
1905 /* skb is currently provided only when forwarding */
1906 if (skb) {
1907 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
1908 struct flow_keys keys;
1909
1910 /* short-circuit if we already have L4 hash present */
1911 if (skb->l4_hash)
1912 return skb_get_hash_raw(skb) >> 1;
David Ahernec7127a2018-03-02 08:32:14 -08001913
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001914 memset(&hash_keys, 0, sizeof(hash_keys));
David Ahern1fe4b112018-02-21 11:00:54 -08001915
David Ahernec7127a2018-03-02 08:32:14 -08001916 if (!flkeys) {
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001917 skb_flow_dissect_flow_keys(skb, &keys, flag);
David Ahernec7127a2018-03-02 08:32:14 -08001918 flkeys = &keys;
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001919 }
David Ahernec7127a2018-03-02 08:32:14 -08001920
1921 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1922 hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
1923 hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
1924 hash_keys.ports.src = flkeys->ports.src;
1925 hash_keys.ports.dst = flkeys->ports.dst;
1926 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001927 } else {
1928 memset(&hash_keys, 0, sizeof(hash_keys));
1929 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1930 hash_keys.addrs.v4addrs.src = fl4->saddr;
1931 hash_keys.addrs.v4addrs.dst = fl4->daddr;
1932 hash_keys.ports.src = fl4->fl4_sport;
1933 hash_keys.ports.dst = fl4->fl4_dport;
1934 hash_keys.basic.ip_proto = fl4->flowi4_proto;
1935 }
1936 break;
1937 }
1938 mhash = flow_hash_from_keys(&hash_keys);
1939
wenxu24ba1442019-02-24 11:36:20 +08001940 if (multipath_hash)
1941 mhash = jhash_2words(mhash, multipath_hash, 0);
1942
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001943 return mhash >> 1;
1944}
Peter Nørlund79a13152015-09-30 10:12:22 +02001945#endif /* CONFIG_IP_ROUTE_MULTIPATH */
1946
Stephen Hemminger5969f712008-04-10 01:52:09 -07001947static int ip_mkroute_input(struct sk_buff *skb,
1948 struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001949 struct in_device *in_dev,
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001950 __be32 daddr, __be32 saddr, u32 tos,
1951 struct flow_keys *hkeys)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001952{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001953#ifdef CONFIG_IP_ROUTE_MULTIPATH
David Ahern5481d732019-06-03 20:19:49 -07001954 if (res->fi && fib_info_num_path(res->fi) > 1) {
David Ahern7efc0b62018-03-02 08:32:12 -08001955 int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
Peter Nørlund0e884c72015-09-30 10:12:21 +02001956
Peter Nørlund0e884c72015-09-30 10:12:21 +02001957 fib_select_multipath(res, h);
1958 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001959#endif
1960
1961 /* create a routing cache entry */
David S. Millerc6cffba2012-07-26 11:14:38 +00001962 return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001963}
1964
Linus Torvalds1da177e2005-04-16 15:20:36 -07001965/*
1966 * NOTE. We drop all the packets that has local source
1967 * addresses, because every properly looped back packet
1968 * must have correct destination already attached by output routine.
1969 *
1970 * Such approach solves two big problems:
1971 * 1. Not simplex devices are handled properly.
1972 * 2. IP spoofing attempts are filtered with 100% of guarantee.
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001973 * called with rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001974 */
1975
Al Viro9e12bb22006-09-26 21:25:20 -07001976static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
David Ahern5510cdf2017-05-25 10:42:34 -07001977 u8 tos, struct net_device *dev,
1978 struct fib_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001979{
Eric Dumazet96d36222010-06-02 19:21:31 +00001980 struct in_device *in_dev = __in_dev_get_rcu(dev);
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001981 struct flow_keys *flkeys = NULL, _flkeys;
1982 struct net *net = dev_net(dev);
Thomas Graf1b7179d2015-07-21 10:43:59 +02001983 struct ip_tunnel_info *tun_info;
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001984 int err = -EINVAL;
Eric Dumazet95c96172012-04-15 05:58:06 +00001985 unsigned int flags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001986 u32 itag = 0;
Eric Dumazet95c96172012-04-15 05:58:06 +00001987 struct rtable *rth;
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001988 struct flowi4 fl4;
David S. Millerd2d68ba92012-07-17 12:58:50 -07001989 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001990
1991 /* IP on this device is disabled. */
1992
1993 if (!in_dev)
1994 goto out;
1995
1996 /* Check for the most weird martians, which can be not detected
1997 by fib_lookup.
1998 */
1999
Jiri Benc61adedf2015-08-20 13:56:25 +02002000 tun_info = skb_tunnel_info(skb);
Jiri Benc46fa0622015-08-28 20:48:19 +02002001 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
Thomas Graf1b7179d2015-07-21 10:43:59 +02002002 fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id;
2003 else
2004 fl4.flowi4_tun_key.tun_id = 0;
Thomas Graff38a9eb2015-07-21 10:43:56 +02002005 skb_dst_drop(skb);
2006
Thomas Grafd0daebc32012-06-12 00:44:01 +00002007 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002008 goto martian_source;
2009
David Ahern5510cdf2017-05-25 10:42:34 -07002010 res->fi = NULL;
2011 res->table = NULL;
Andy Walls27a954b2010-10-17 15:11:22 +00002012 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002013 goto brd_input;
2014
2015 /* Accept zero addresses only to limited broadcast;
2016 * I even do not know to fix it or not. Waiting for complains :-)
2017 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08002018 if (ipv4_is_zeronet(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002019 goto martian_source;
2020
Thomas Grafd0daebc32012-06-12 00:44:01 +00002021 if (ipv4_is_zeronet(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002022 goto martian_destination;
2023
Eric Dumazet9eb43e72012-08-03 21:27:25 +00002024 /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
2025 * and call it once if daddr or/and saddr are loopback addresses
2026 */
2027 if (ipv4_is_loopback(daddr)) {
2028 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00002029 goto martian_destination;
Eric Dumazet9eb43e72012-08-03 21:27:25 +00002030 } else if (ipv4_is_loopback(saddr)) {
2031 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00002032 goto martian_source;
2033 }
2034
Linus Torvalds1da177e2005-04-16 15:20:36 -07002035 /*
2036 * Now we are ready to route packet.
2037 */
David S. Miller68a5e3d2011-03-11 20:07:33 -05002038 fl4.flowi4_oif = 0;
David Aherne0d56fd2016-09-10 12:09:57 -07002039 fl4.flowi4_iif = dev->ifindex;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002040 fl4.flowi4_mark = skb->mark;
2041 fl4.flowi4_tos = tos;
2042 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
David Ahernb84f7872015-09-29 19:07:07 -07002043 fl4.flowi4_flags = 0;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002044 fl4.daddr = daddr;
2045 fl4.saddr = saddr;
Julian Anastasov8bcfd092017-02-26 15:50:52 +02002046 fl4.flowi4_uid = sock_net_uid(net, NULL);
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002047
David Ahern5a847a62018-05-16 13:36:40 -07002048 if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) {
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002049 flkeys = &_flkeys;
David Ahern5a847a62018-05-16 13:36:40 -07002050 } else {
2051 fl4.flowi4_proto = 0;
2052 fl4.fl4_sport = 0;
2053 fl4.fl4_dport = 0;
2054 }
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002055
David Ahern5510cdf2017-05-25 10:42:34 -07002056 err = fib_lookup(net, &fl4, res, 0);
Duan Jiongcd0f0b92014-02-14 18:26:22 +08002057 if (err != 0) {
2058 if (!IN_DEV_FORWARD(in_dev))
2059 err = -EHOSTUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002060 goto no_route;
Duan Jiongcd0f0b92014-02-14 18:26:22 +08002061 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002062
Xin Long5cbf7772018-07-27 16:37:28 +08002063 if (res->type == RTN_BROADCAST) {
2064 if (IN_DEV_BFORWARD(in_dev))
2065 goto make_route;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002066 goto brd_input;
Xin Long5cbf7772018-07-27 16:37:28 +08002067 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002068
David Ahern5510cdf2017-05-25 10:42:34 -07002069 if (res->type == RTN_LOCAL) {
Michael Smith5c04c812011-04-07 04:51:50 +00002070 err = fib_validate_source(skb, saddr, daddr, tos,
Cong Wang0d5edc62014-04-15 16:25:35 -07002071 0, dev, in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00002072 if (err < 0)
David Ahern0d753962015-09-28 11:10:44 -07002073 goto martian_source;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002074 goto local_input;
2075 }
2076
Duan Jiongcd0f0b92014-02-14 18:26:22 +08002077 if (!IN_DEV_FORWARD(in_dev)) {
2078 err = -EHOSTUNREACH;
David S. Miller251da412012-06-26 16:27:09 -07002079 goto no_route;
Duan Jiongcd0f0b92014-02-14 18:26:22 +08002080 }
David Ahern5510cdf2017-05-25 10:42:34 -07002081 if (res->type != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002082 goto martian_destination;
2083
Xin Long5cbf7772018-07-27 16:37:28 +08002084make_route:
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002085 err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002086out: return err;
2087
2088brd_input:
2089 if (skb->protocol != htons(ETH_P_IP))
2090 goto e_inval;
2091
David S. Miller41347dc2012-06-28 04:05:27 -07002092 if (!ipv4_is_zeronet(saddr)) {
David S. Miller9e56e382012-06-28 18:54:02 -07002093 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
2094 in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002095 if (err < 0)
David Ahern0d753962015-09-28 11:10:44 -07002096 goto martian_source;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002097 }
2098 flags |= RTCF_BROADCAST;
David Ahern5510cdf2017-05-25 10:42:34 -07002099 res->type = RTN_BROADCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002100 RT_CACHE_STAT_INC(in_brd);
2101
2102local_input:
David S. Millerd2d68ba92012-07-17 12:58:50 -07002103 do_cache = false;
David Ahern5510cdf2017-05-25 10:42:34 -07002104 if (res->fi) {
David S. Millerfe3edf42012-07-23 13:22:20 -07002105 if (!itag) {
David Aherneba618a2019-04-02 14:11:55 -07002106 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
David Aherneba618a2019-04-02 14:11:55 -07002107
David Ahern0f457a32019-04-30 07:45:48 -07002108 rth = rcu_dereference(nhc->nhc_rth_input);
David S. Millerd2d68ba92012-07-17 12:58:50 -07002109 if (rt_cache_valid(rth)) {
David S. Millerc6cffba2012-07-26 11:14:38 +00002110 skb_dst_set_noref(skb, &rth->dst);
2111 err = 0;
2112 goto out;
David S. Millerd2d68ba92012-07-17 12:58:50 -07002113 }
2114 do_cache = true;
2115 }
2116 }
2117
David Ahernf5a0aab2016-12-29 15:29:03 -08002118 rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
David Ahern5510cdf2017-05-25 10:42:34 -07002119 flags | RTCF_LOCAL, res->type,
David S. Millerd2d68ba92012-07-17 12:58:50 -07002120 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002121 if (!rth)
2122 goto e_nobufs;
2123
Changli Gaod8d1f302010-06-10 23:31:35 -07002124 rth->dst.output= ip_rt_bug;
David S. Millercf911662011-04-28 14:31:47 -07002125#ifdef CONFIG_IP_ROUTE_CLASSID
2126 rth->dst.tclassid = itag;
2127#endif
David S. Miller9917e1e82012-07-17 14:44:26 -07002128 rth->rt_is_input = 1;
Roopa Prabhu571e7222015-07-21 10:43:47 +02002129
Duan Jionga6254862014-02-17 15:23:43 +08002130 RT_CACHE_STAT_INC(in_slow_tot);
David Ahern5510cdf2017-05-25 10:42:34 -07002131 if (res->type == RTN_UNREACHABLE) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002132 rth->dst.input= ip_error;
2133 rth->dst.error= -err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002134 rth->rt_flags &= ~RTCF_LOCAL;
2135 }
Thomas Grafefd85702016-11-30 17:10:09 +01002136
Alexei Starovoitovdcdfdf52013-11-19 19:12:34 -08002137 if (do_cache) {
David Aherneba618a2019-04-02 14:11:55 -07002138 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
Thomas Grafefd85702016-11-30 17:10:09 +01002139
David Aherneba618a2019-04-02 14:11:55 -07002140 rth->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);
Thomas Grafefd85702016-11-30 17:10:09 +01002141 if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
2142 WARN_ON(rth->dst.input == lwtunnel_input);
2143 rth->dst.lwtstate->orig_input = rth->dst.input;
2144 rth->dst.input = lwtunnel_input;
2145 }
2146
David Ahern87063a1f2019-04-30 07:45:49 -07002147 if (unlikely(!rt_cache_route(nhc, rth)))
Alexei Starovoitovdcdfdf52013-11-19 19:12:34 -08002148 rt_add_uncached_list(rth);
Alexei Starovoitovdcdfdf52013-11-19 19:12:34 -08002149 }
David S. Miller89aef892012-07-17 11:00:09 -07002150 skb_dst_set(skb, &rth->dst);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002151 err = 0;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002152 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002153
2154no_route:
2155 RT_CACHE_STAT_INC(in_no_route);
David Ahern5510cdf2017-05-25 10:42:34 -07002156 res->type = RTN_UNREACHABLE;
2157 res->fi = NULL;
2158 res->table = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002159 goto local_input;
2160
2161 /*
2162 * Do not cache martian addresses: they should be logged (RFC1812)
2163 */
2164martian_destination:
2165 RT_CACHE_STAT_INC(in_martian_dst);
2166#ifdef CONFIG_IP_ROUTE_VERBOSE
Joe Perchese87cc472012-05-13 21:56:26 +00002167 if (IN_DEV_LOG_MARTIANS(in_dev))
2168 net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
2169 &daddr, &saddr, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002170#endif
Dietmar Eggemann2c2910a2005-06-28 13:06:23 -07002171
Linus Torvalds1da177e2005-04-16 15:20:36 -07002172e_inval:
2173 err = -EINVAL;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002174 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002175
2176e_nobufs:
2177 err = -ENOBUFS;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002178 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002179
2180martian_source:
2181 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002182 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002183}
2184
David S. Millerc6cffba2012-07-26 11:14:38 +00002185int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2186 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002187{
David Ahern5510cdf2017-05-25 10:42:34 -07002188 struct fib_result res;
2189 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002190
Julian Anastasov6e280992017-02-26 17:14:35 +02002191 tos &= IPTOS_RT_MASK;
Eric Dumazet96d36222010-06-02 19:21:31 +00002192 rcu_read_lock();
David Ahern5510cdf2017-05-25 10:42:34 -07002193 err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res);
2194 rcu_read_unlock();
Eric Dumazet96d36222010-06-02 19:21:31 +00002195
David Ahern5510cdf2017-05-25 10:42:34 -07002196 return err;
2197}
2198EXPORT_SYMBOL(ip_route_input_noref);
2199
2200/* called with rcu_read_lock held */
2201int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2202 u8 tos, struct net_device *dev, struct fib_result *res)
2203{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002204 /* Multicast recognition logic is moved from route cache to here.
2205 The problem was that too many Ethernet cards have broken/missing
2206 hardware multicast filters :-( As result the host on multicasting
2207 network acquires a lot of useless route cache entries, sort of
2208 SDR messages from all the world. Now we try to get rid of them.
2209 Really, provided software IP multicast filter is organized
2210 reasonably (at least, hashed), it does not result in a slowdown
2211 comparing with route cache reject entries.
2212 Note, that multicast routers are not affected, because
2213 route cache entry is created eventually.
2214 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08002215 if (ipv4_is_multicast(daddr)) {
Eric Dumazet96d36222010-06-02 19:21:31 +00002216 struct in_device *in_dev = __in_dev_get_rcu(dev);
David Aherne58e4152016-10-31 15:54:00 -07002217 int our = 0;
David Ahern5510cdf2017-05-25 10:42:34 -07002218 int err = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002219
Paolo Abeni22c74762019-03-06 10:42:53 +01002220 if (!in_dev)
2221 return err;
2222 our = ip_check_mc_rcu(in_dev, daddr, saddr,
2223 ip_hdr(skb)->protocol);
David Aherne58e4152016-10-31 15:54:00 -07002224
2225 /* check l3 master if no match yet */
Paolo Abeni22c74762019-03-06 10:42:53 +01002226 if (!our && netif_is_l3_slave(dev)) {
David Aherne58e4152016-10-31 15:54:00 -07002227 struct in_device *l3_in_dev;
2228
2229 l3_in_dev = __in_dev_get_rcu(skb->dev);
2230 if (l3_in_dev)
2231 our = ip_check_mc_rcu(l3_in_dev, daddr, saddr,
2232 ip_hdr(skb)->protocol);
2233 }
2234
David Aherne58e4152016-10-31 15:54:00 -07002235 if (our
Linus Torvalds1da177e2005-04-16 15:20:36 -07002236#ifdef CONFIG_IP_MROUTE
David Aherne58e4152016-10-31 15:54:00 -07002237 ||
2238 (!ipv4_is_local_multicast(daddr) &&
2239 IN_DEV_MFORWARD(in_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002240#endif
David Aherne58e4152016-10-31 15:54:00 -07002241 ) {
David Ahern5510cdf2017-05-25 10:42:34 -07002242 err = ip_route_input_mc(skb, daddr, saddr,
David Aherne58e4152016-10-31 15:54:00 -07002243 tos, dev, our);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002244 }
David Ahern5510cdf2017-05-25 10:42:34 -07002245 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002246 }
David Ahern5510cdf2017-05-25 10:42:34 -07002247
2248 return ip_route_input_slow(skb, daddr, saddr, tos, dev, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002249}
2250
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002251/* called with rcu_read_lock() */
David S. Miller982721f2011-02-16 21:44:24 -08002252static struct rtable *__mkroute_output(const struct fib_result *res,
David Miller1a00fee2012-07-01 02:02:56 +00002253 const struct flowi4 *fl4, int orig_oif,
Julian Anastasovf61759e2011-12-02 11:39:42 +00002254 struct net_device *dev_out,
David S. Miller5ada5522011-02-17 15:29:00 -08002255 unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002256{
David S. Miller982721f2011-02-16 21:44:24 -08002257 struct fib_info *fi = res->fi;
David S. Millerf2bb4be2012-07-17 12:20:47 -07002258 struct fib_nh_exception *fnhe;
David S. Miller5ada5522011-02-17 15:29:00 -08002259 struct in_device *in_dev;
David S. Miller982721f2011-02-16 21:44:24 -08002260 u16 type = res->type;
David S. Miller5ada5522011-02-17 15:29:00 -08002261 struct rtable *rth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00002262 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002263
Thomas Grafd0daebc32012-06-12 00:44:01 +00002264 in_dev = __in_dev_get_rcu(dev_out);
2265 if (!in_dev)
David S. Miller5ada5522011-02-17 15:29:00 -08002266 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002267
Thomas Grafd0daebc32012-06-12 00:44:01 +00002268 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
David Ahern5f02ce242016-09-10 12:09:54 -07002269 if (ipv4_is_loopback(fl4->saddr) &&
2270 !(dev_out->flags & IFF_LOOPBACK) &&
2271 !netif_is_l3_master(dev_out))
Thomas Grafd0daebc32012-06-12 00:44:01 +00002272 return ERR_PTR(-EINVAL);
2273
David S. Miller68a5e3d2011-03-11 20:07:33 -05002274 if (ipv4_is_lbcast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08002275 type = RTN_BROADCAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002276 else if (ipv4_is_multicast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08002277 type = RTN_MULTICAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002278 else if (ipv4_is_zeronet(fl4->daddr))
David S. Miller5ada5522011-02-17 15:29:00 -08002279 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002280
2281 if (dev_out->flags & IFF_LOOPBACK)
2282 flags |= RTCF_LOCAL;
2283
Julian Anastasov63617422012-11-22 23:04:14 +02002284 do_cache = true;
David S. Miller982721f2011-02-16 21:44:24 -08002285 if (type == RTN_BROADCAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002286 flags |= RTCF_BROADCAST | RTCF_LOCAL;
David S. Miller982721f2011-02-16 21:44:24 -08002287 fi = NULL;
2288 } else if (type == RTN_MULTICAST) {
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00002289 flags |= RTCF_MULTICAST | RTCF_LOCAL;
David S. Miller813b3b52011-04-28 14:48:42 -07002290 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
2291 fl4->flowi4_proto))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002292 flags &= ~RTCF_LOCAL;
Julian Anastasov63617422012-11-22 23:04:14 +02002293 else
2294 do_cache = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002295 /* If multicast route do not exist use
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00002296 * default one, but do not gateway in this case.
2297 * Yes, it is hack.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002298 */
David S. Miller982721f2011-02-16 21:44:24 -08002299 if (fi && res->prefixlen < 4)
2300 fi = NULL;
Chris Friesend6d5e992016-04-08 15:21:30 -06002301 } else if ((type == RTN_LOCAL) && (orig_oif != 0) &&
2302 (orig_oif != dev_out->ifindex)) {
2303 /* For local routes that require a particular output interface
2304 * we do not want to cache the result. Caching the result
2305 * causes incorrect behaviour when there are multiple source
2306 * addresses on the interface, the end result being that if the
2307 * intended recipient is waiting on that interface for the
2308 * packet he won't receive it because it will be delivered on
2309 * the loopback interface and the IP_PKTINFO ipi_ifindex will
2310 * be set to the loopback interface as well.
2311 */
Julian Anastasov94720e32018-05-02 09:41:19 +03002312 do_cache = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002313 }
2314
David S. Millerf2bb4be2012-07-17 12:20:47 -07002315 fnhe = NULL;
Julian Anastasov63617422012-11-22 23:04:14 +02002316 do_cache &= fi != NULL;
Julian Anastasov94720e32018-05-02 09:41:19 +03002317 if (fi) {
David Aherneba618a2019-04-02 14:11:55 -07002318 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
David S. Millerc5038a82012-07-31 15:02:02 -07002319 struct rtable __rcu **prth;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00002320
David Aherna5995e72019-04-30 07:45:50 -07002321 fnhe = find_exception(nhc, fl4->daddr);
Julian Anastasov94720e32018-05-02 09:41:19 +03002322 if (!do_cache)
2323 goto add;
Xin Longdeed49d2016-02-18 21:21:19 +08002324 if (fnhe) {
Timo Teräs2ffae992013-06-27 10:27:05 +03002325 prth = &fnhe->fnhe_rth_output;
Julian Anastasov94720e32018-05-02 09:41:19 +03002326 } else {
2327 if (unlikely(fl4->flowi4_flags &
2328 FLOWI_FLAG_KNOWN_NH &&
David Ahernbdf00462019-04-05 16:30:26 -07002329 !(nhc->nhc_gw_family &&
David Aherneba618a2019-04-02 14:11:55 -07002330 nhc->nhc_scope == RT_SCOPE_LINK))) {
Julian Anastasov94720e32018-05-02 09:41:19 +03002331 do_cache = false;
2332 goto add;
Julian Anastasovc92b9652012-10-08 11:41:19 +00002333 }
David Ahern0f457a32019-04-30 07:45:48 -07002334 prth = raw_cpu_ptr(nhc->nhc_pcpu_rth_output);
Julian Anastasovc92b9652012-10-08 11:41:19 +00002335 }
David S. Millerc5038a82012-07-31 15:02:02 -07002336 rth = rcu_dereference(*prth);
Wei Wang9df16ef2017-06-17 10:42:31 -07002337 if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst))
David S. Millerc5038a82012-07-31 15:02:02 -07002338 return rth;
David S. Millerf2bb4be2012-07-17 12:20:47 -07002339 }
Julian Anastasovc92b9652012-10-08 11:41:19 +00002340
2341add:
David Ahernd08c4f32015-09-02 13:58:34 -07002342 rth = rt_dst_alloc(dev_out, flags, type,
David S. Miller5c1e6aa2011-04-28 14:13:38 -07002343 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerf2bb4be2012-07-17 12:20:47 -07002344 IN_DEV_CONF_GET(in_dev, NOXFRM),
Julian Anastasovc92b9652012-10-08 11:41:19 +00002345 do_cache);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00002346 if (!rth)
David S. Miller5ada5522011-02-17 15:29:00 -08002347 return ERR_PTR(-ENOBUFS);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00002348
David Ahern9438c872017-08-11 17:02:02 -07002349 rth->rt_iif = orig_oif;
David Ahernb7503e02015-09-02 13:58:35 -07002350
Linus Torvalds1da177e2005-04-16 15:20:36 -07002351 RT_CACHE_STAT_INC(out_slow_tot);
2352
Linus Torvalds1da177e2005-04-16 15:20:36 -07002353 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002354 if (flags & RTCF_LOCAL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07002355 !(dev_out->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002356 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002357 RT_CACHE_STAT_INC(out_slow_mc);
2358 }
2359#ifdef CONFIG_IP_MROUTE
David S. Miller982721f2011-02-16 21:44:24 -08002360 if (type == RTN_MULTICAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002361 if (IN_DEV_MFORWARD(in_dev) &&
David S. Miller813b3b52011-04-28 14:48:42 -07002362 !ipv4_is_local_multicast(fl4->daddr)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002363 rth->dst.input = ip_mr_input;
2364 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002365 }
2366 }
2367#endif
2368 }
2369
Wei Wanga4c2fd72017-06-17 10:42:42 -07002370 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache);
David Ahern99428952018-02-13 20:32:04 -08002371 lwtunnel_set_redirect(&rth->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002372
David S. Miller5ada5522011-02-17 15:29:00 -08002373 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002374}
2375
Linus Torvalds1da177e2005-04-16 15:20:36 -07002376/*
2377 * Major route resolver routine.
2378 */
2379
David Ahern3abd1ade2017-05-25 10:42:33 -07002380struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
2381 const struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002382{
Julian Anastasovf61759e2011-12-02 11:39:42 +00002383 __u8 tos = RT_FL_TOS(fl4);
Eric Dumazetd0ea2b12018-04-07 13:42:42 -07002384 struct fib_result res = {
2385 .type = RTN_UNSPEC,
2386 .fi = NULL,
2387 .table = NULL,
2388 .tclassid = 0,
2389 };
David S. Miller5ada5522011-02-17 15:29:00 -08002390 struct rtable *rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002391
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002392 fl4->flowi4_iif = LOOPBACK_IFINDEX;
David S. Miller813b3b52011-04-28 14:48:42 -07002393 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
2394 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
2395 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
David S. Miller44713b62011-03-04 21:24:47 -08002396
David S. Miller010c2702011-02-17 15:37:09 -08002397 rcu_read_lock();
David Ahern3abd1ade2017-05-25 10:42:33 -07002398 rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
2399 rcu_read_unlock();
2400
2401 return rth;
2402}
2403EXPORT_SYMBOL_GPL(ip_route_output_key_hash);
2404
2405struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
2406 struct fib_result *res,
2407 const struct sk_buff *skb)
2408{
2409 struct net_device *dev_out = NULL;
2410 int orig_oif = fl4->flowi4_oif;
2411 unsigned int flags = 0;
2412 struct rtable *rth;
2413 int err = -ENETUNREACH;
2414
David S. Miller813b3b52011-04-28 14:48:42 -07002415 if (fl4->saddr) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08002416 rth = ERR_PTR(-EINVAL);
David S. Miller813b3b52011-04-28 14:48:42 -07002417 if (ipv4_is_multicast(fl4->saddr) ||
2418 ipv4_is_lbcast(fl4->saddr) ||
2419 ipv4_is_zeronet(fl4->saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002420 goto out;
2421
Linus Torvalds1da177e2005-04-16 15:20:36 -07002422 /* I removed check for oif == dev_out->oif here.
2423 It was wrong for two reasons:
Denis V. Lunev1ab35272008-01-22 22:04:30 -08002424 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
2425 is assigned to multiple interfaces.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002426 2. Moreover, we are allowed to send packets with saddr
2427 of another iface. --ANK
2428 */
2429
David S. Miller813b3b52011-04-28 14:48:42 -07002430 if (fl4->flowi4_oif == 0 &&
2431 (ipv4_is_multicast(fl4->daddr) ||
2432 ipv4_is_lbcast(fl4->daddr))) {
Julian Anastasova210d012008-10-01 07:28:28 -07002433 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07002434 dev_out = __ip_dev_find(net, fl4->saddr, false);
Ian Morris51456b22015-04-03 09:17:26 +01002435 if (!dev_out)
Julian Anastasova210d012008-10-01 07:28:28 -07002436 goto out;
2437
Linus Torvalds1da177e2005-04-16 15:20:36 -07002438 /* Special hack: user can direct multicasts
2439 and limited broadcast via necessary interface
2440 without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
2441 This hack is not just for fun, it allows
2442 vic,vat and friends to work.
2443 They bind socket to loopback, set ttl to zero
2444 and expect that it will work.
2445 From the viewpoint of routing cache they are broken,
2446 because we are not allowed to build multicast path
2447 with loopback source addr (look, routing cache
2448 cannot know, that ttl is zero, so that packet
2449 will not leave this host and route is valid).
2450 Luckily, this hack is good workaround.
2451 */
2452
David S. Miller813b3b52011-04-28 14:48:42 -07002453 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002454 goto make_route;
2455 }
Julian Anastasova210d012008-10-01 07:28:28 -07002456
David S. Miller813b3b52011-04-28 14:48:42 -07002457 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
Julian Anastasova210d012008-10-01 07:28:28 -07002458 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07002459 if (!__ip_dev_find(net, fl4->saddr, false))
Julian Anastasova210d012008-10-01 07:28:28 -07002460 goto out;
Julian Anastasova210d012008-10-01 07:28:28 -07002461 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002462 }
2463
2464
David S. Miller813b3b52011-04-28 14:48:42 -07002465 if (fl4->flowi4_oif) {
2466 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002467 rth = ERR_PTR(-ENODEV);
Ian Morris51456b22015-04-03 09:17:26 +01002468 if (!dev_out)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002469 goto out;
Herbert Xue5ed6392005-10-03 14:35:55 -07002470
2471 /* RACE: Check return value of inet_select_addr instead. */
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002472 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08002473 rth = ERR_PTR(-ENETUNREACH);
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002474 goto out;
2475 }
David S. Miller813b3b52011-04-28 14:48:42 -07002476 if (ipv4_is_local_multicast(fl4->daddr) ||
Andrew Lunn6a211652015-05-01 16:39:54 +02002477 ipv4_is_lbcast(fl4->daddr) ||
2478 fl4->flowi4_proto == IPPROTO_IGMP) {
David S. Miller813b3b52011-04-28 14:48:42 -07002479 if (!fl4->saddr)
2480 fl4->saddr = inet_select_addr(dev_out, 0,
2481 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002482 goto make_route;
2483 }
Jiri Benc0a7e2262013-10-04 17:04:48 +02002484 if (!fl4->saddr) {
David S. Miller813b3b52011-04-28 14:48:42 -07002485 if (ipv4_is_multicast(fl4->daddr))
2486 fl4->saddr = inet_select_addr(dev_out, 0,
2487 fl4->flowi4_scope);
2488 else if (!fl4->daddr)
2489 fl4->saddr = inet_select_addr(dev_out, 0,
2490 RT_SCOPE_HOST);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002491 }
2492 }
2493
David S. Miller813b3b52011-04-28 14:48:42 -07002494 if (!fl4->daddr) {
2495 fl4->daddr = fl4->saddr;
2496 if (!fl4->daddr)
2497 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
Denis V. Lunevb40afd02008-01-22 22:06:19 -08002498 dev_out = net->loopback_dev;
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002499 fl4->flowi4_oif = LOOPBACK_IFINDEX;
David Ahern3abd1ade2017-05-25 10:42:33 -07002500 res->type = RTN_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002501 flags |= RTCF_LOCAL;
2502 goto make_route;
2503 }
2504
David Ahern3abd1ade2017-05-25 10:42:33 -07002505 err = fib_lookup(net, fl4, res, 0);
Nikola Forró0315e382015-09-17 16:01:32 +02002506 if (err) {
David Ahern3abd1ade2017-05-25 10:42:33 -07002507 res->fi = NULL;
2508 res->table = NULL;
David Ahern6104e112016-10-12 13:20:11 -07002509 if (fl4->flowi4_oif &&
David Aherne58e4152016-10-31 15:54:00 -07002510 (ipv4_is_multicast(fl4->daddr) ||
2511 !netif_index_is_l3_master(net, fl4->flowi4_oif))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002512 /* Apparently, routing tables are wrong. Assume,
2513 that the destination is on link.
2514
2515 WHY? DW.
2516 Because we are allowed to send to iface
2517 even if it has NO routes and NO assigned
2518 addresses. When oif is specified, routing
2519 tables are looked up with only one purpose:
2520 to catch if destination is gatewayed, rather than
2521 direct. Moreover, if MSG_DONTROUTE is set,
2522 we send packet, ignoring both routing tables
2523 and ifaddr state. --ANK
2524
2525
2526 We could make it even if oif is unknown,
2527 likely IPv6, but we do not.
2528 */
2529
David S. Miller813b3b52011-04-28 14:48:42 -07002530 if (fl4->saddr == 0)
2531 fl4->saddr = inet_select_addr(dev_out, 0,
2532 RT_SCOPE_LINK);
David Ahern3abd1ade2017-05-25 10:42:33 -07002533 res->type = RTN_UNICAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002534 goto make_route;
2535 }
Nikola Forró0315e382015-09-17 16:01:32 +02002536 rth = ERR_PTR(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002537 goto out;
2538 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002539
David Ahern3abd1ade2017-05-25 10:42:33 -07002540 if (res->type == RTN_LOCAL) {
David S. Miller813b3b52011-04-28 14:48:42 -07002541 if (!fl4->saddr) {
David Ahern3abd1ade2017-05-25 10:42:33 -07002542 if (res->fi->fib_prefsrc)
2543 fl4->saddr = res->fi->fib_prefsrc;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002544 else
David S. Miller813b3b52011-04-28 14:48:42 -07002545 fl4->saddr = fl4->daddr;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002546 }
David Ahern5f02ce242016-09-10 12:09:54 -07002547
2548 /* L3 master device is the loopback for that domain */
David Ahern3abd1ade2017-05-25 10:42:33 -07002549 dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? :
Robert Shearmanb7c84872017-04-21 21:34:59 +01002550 net->loopback_dev;
David Ahern839da4d2017-08-10 13:49:10 -07002551
2552 /* make sure orig_oif points to fib result device even
2553 * though packet rx/tx happens over loopback or l3mdev
2554 */
2555 orig_oif = FIB_RES_OIF(*res);
2556
David S. Miller813b3b52011-04-28 14:48:42 -07002557 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002558 flags |= RTCF_LOCAL;
2559 goto make_route;
2560 }
2561
David Ahern3abd1ade2017-05-25 10:42:33 -07002562 fib_select_path(net, res, fl4, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002563
David Ahern3abd1ade2017-05-25 10:42:33 -07002564 dev_out = FIB_RES_DEV(*res);
David S. Miller813b3b52011-04-28 14:48:42 -07002565 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002566
2567
2568make_route:
David Ahern3abd1ade2017-05-25 10:42:33 -07002569 rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002570
David S. Miller010c2702011-02-17 15:37:09 -08002571out:
David S. Millerb23dd4f2011-03-02 14:31:35 -08002572 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002573}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002574
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002575static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2576{
2577 return NULL;
2578}
2579
Steffen Klassertebb762f2011-11-23 02:12:51 +00002580static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
Roland Dreierec831ea2011-01-31 13:16:00 -08002581{
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002582 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2583
2584 return mtu ? : dst->dev->mtu;
Roland Dreierec831ea2011-01-31 13:16:00 -08002585}
2586
David S. Miller6700c272012-07-17 03:29:28 -07002587static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2588 struct sk_buff *skb, u32 mtu)
David S. Miller14e50e52007-05-24 18:17:54 -07002589{
2590}
2591
David S. Miller6700c272012-07-17 03:29:28 -07002592static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2593 struct sk_buff *skb)
David S. Millerb587ee32012-07-12 00:39:24 -07002594{
2595}
2596
Held Bernhard0972ddb2011-04-24 22:07:32 +00002597static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2598 unsigned long old)
2599{
2600 return NULL;
2601}
2602
David S. Miller14e50e52007-05-24 18:17:54 -07002603static struct dst_ops ipv4_dst_blackhole_ops = {
2604 .family = AF_INET,
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002605 .check = ipv4_blackhole_dst_check,
Steffen Klassertebb762f2011-11-23 02:12:51 +00002606 .mtu = ipv4_blackhole_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -08002607 .default_advmss = ipv4_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -07002608 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
David S. Millerb587ee32012-07-12 00:39:24 -07002609 .redirect = ipv4_rt_blackhole_redirect,
Held Bernhard0972ddb2011-04-24 22:07:32 +00002610 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -07002611 .neigh_lookup = ipv4_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -07002612};
2613
David S. Miller2774c132011-03-01 14:59:04 -08002614struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -07002615{
David S. Miller2774c132011-03-01 14:59:04 -08002616 struct rtable *ort = (struct rtable *) dst_orig;
David S. Millerf5b0a872012-07-19 12:31:33 -07002617 struct rtable *rt;
David S. Miller14e50e52007-05-24 18:17:54 -07002618
Steffen Klassert6c0e7282017-10-09 08:43:55 +02002619 rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0);
David S. Miller14e50e52007-05-24 18:17:54 -07002620 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002621 struct dst_entry *new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -07002622
David S. Miller14e50e52007-05-24 18:17:54 -07002623 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -08002624 new->input = dst_discard;
Eric W. Biedermanede20592015-10-07 16:48:47 -05002625 new->output = dst_discard_out;
David S. Miller14e50e52007-05-24 18:17:54 -07002626
Wei Wang1dbe32522017-06-17 10:42:26 -07002627 new->dev = net->loopback_dev;
David S. Miller14e50e52007-05-24 18:17:54 -07002628 if (new->dev)
2629 dev_hold(new->dev);
2630
David S. Miller9917e1e82012-07-17 14:44:26 -07002631 rt->rt_is_input = ort->rt_is_input;
David S. Miller5e2b61f2011-03-04 21:47:09 -08002632 rt->rt_iif = ort->rt_iif;
David S. Miller59436342012-07-10 06:58:42 -07002633 rt->rt_pmtu = ort->rt_pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01002634 rt->rt_mtu_locked = ort->rt_mtu_locked;
David S. Miller14e50e52007-05-24 18:17:54 -07002635
fan.duca4c3fc2013-07-30 08:33:53 +08002636 rt->rt_genid = rt_genid_ipv4(net);
David S. Miller14e50e52007-05-24 18:17:54 -07002637 rt->rt_flags = ort->rt_flags;
2638 rt->rt_type = ort->rt_type;
David Ahern1550c172019-04-05 16:30:27 -07002639 rt->rt_gw_family = ort->rt_gw_family;
2640 if (rt->rt_gw_family == AF_INET)
2641 rt->rt_gw4 = ort->rt_gw4;
David Ahern0f5f7d72019-04-05 16:30:29 -07002642 else if (rt->rt_gw_family == AF_INET6)
2643 rt->rt_gw6 = ort->rt_gw6;
David S. Miller14e50e52007-05-24 18:17:54 -07002644
David S. Millercaacf052012-07-31 15:06:50 -07002645 INIT_LIST_HEAD(&rt->rt_uncached);
David S. Miller14e50e52007-05-24 18:17:54 -07002646 }
2647
David S. Miller2774c132011-03-01 14:59:04 -08002648 dst_release(dst_orig);
2649
2650 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -07002651}
2652
David S. Miller9d6ec932011-03-12 01:12:47 -05002653struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
Eric Dumazet6f9c9612015-09-25 07:39:10 -07002654 const struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002655{
David S. Miller9d6ec932011-03-12 01:12:47 -05002656 struct rtable *rt = __ip_route_output_key(net, flp4);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002657
David S. Millerb23dd4f2011-03-02 14:31:35 -08002658 if (IS_ERR(rt))
2659 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002660
David S. Miller56157872011-05-02 14:37:45 -07002661 if (flp4->flowi4_proto)
Steffen Klassertf92ee612014-09-16 10:08:40 +02002662 rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
2663 flowi4_to_flowi(flp4),
2664 sk, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002665
David S. Millerb23dd4f2011-03-02 14:31:35 -08002666 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002667}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002668EXPORT_SYMBOL_GPL(ip_route_output_flow);
2669
David Ahern3765d352017-05-25 10:42:36 -07002670/* called with rcu_read_lock held */
Roopa Prabhu404eb772018-05-22 14:03:27 -07002671static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
2672 struct rtable *rt, u32 table_id, struct flowi4 *fl4,
2673 struct sk_buff *skb, u32 portid, u32 seq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002674{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002675 struct rtmsg *r;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002676 struct nlmsghdr *nlh;
Steffen Klassert2bc8ca42011-10-11 01:12:02 +00002677 unsigned long expires = 0;
David S. Millerf1850712012-07-10 07:26:01 -07002678 u32 error;
Julian Anastasov521f5492012-07-20 12:02:08 +03002679 u32 metrics[RTAX_MAX];
Thomas Grafbe403ea2006-08-17 18:15:17 -07002680
David Ahernd3166e02017-05-25 10:42:35 -07002681 nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), 0);
Ian Morris51456b22015-04-03 09:17:26 +01002682 if (!nlh)
Patrick McHardy26932562007-01-31 23:16:40 -08002683 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002684
2685 r = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002686 r->rtm_family = AF_INET;
2687 r->rtm_dst_len = 32;
2688 r->rtm_src_len = 0;
David Millerd6c0a4f2012-07-01 02:02:59 +00002689 r->rtm_tos = fl4->flowi4_tos;
David Ahern8a430ed2017-01-11 15:42:17 -08002690 r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT;
David Ahernc36ba662015-09-02 13:58:36 -07002691 if (nla_put_u32(skb, RTA_TABLE, table_id))
David S. Millerf3756b72012-04-01 20:39:02 -04002692 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002693 r->rtm_type = rt->rt_type;
2694 r->rtm_scope = RT_SCOPE_UNIVERSE;
2695 r->rtm_protocol = RTPROT_UNSPEC;
2696 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2697 if (rt->rt_flags & RTCF_NOTIFY)
2698 r->rtm_flags |= RTM_F_NOTIFY;
Hannes Frederic Sowadf4d9252015-01-23 12:01:26 +01002699 if (IPCB(skb)->flags & IPSKB_DOREDIRECT)
2700 r->rtm_flags |= RTCF_DOREDIRECT;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002701
Jiri Benc930345e2015-03-29 16:59:25 +02002702 if (nla_put_in_addr(skb, RTA_DST, dst))
David S. Millerf3756b72012-04-01 20:39:02 -04002703 goto nla_put_failure;
David Miller1a00fee2012-07-01 02:02:56 +00002704 if (src) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002705 r->rtm_src_len = 32;
Jiri Benc930345e2015-03-29 16:59:25 +02002706 if (nla_put_in_addr(skb, RTA_SRC, src))
David S. Millerf3756b72012-04-01 20:39:02 -04002707 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002708 }
David S. Millerf3756b72012-04-01 20:39:02 -04002709 if (rt->dst.dev &&
2710 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2711 goto nla_put_failure;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002712#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf3756b72012-04-01 20:39:02 -04002713 if (rt->dst.tclassid &&
2714 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2715 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002716#endif
David S. Miller41347dc2012-06-28 04:05:27 -07002717 if (!rt_is_input_route(rt) &&
David Millerd6c0a4f2012-07-01 02:02:59 +00002718 fl4->saddr != src) {
Jiri Benc930345e2015-03-29 16:59:25 +02002719 if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
David S. Millerf3756b72012-04-01 20:39:02 -04002720 goto nla_put_failure;
2721 }
David Ahern1550c172019-04-05 16:30:27 -07002722 if (rt->rt_gw_family == AF_INET &&
David Ahern0f5f7d72019-04-05 16:30:29 -07002723 nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) {
David S. Millerf3756b72012-04-01 20:39:02 -04002724 goto nla_put_failure;
David Ahern0f5f7d72019-04-05 16:30:29 -07002725 } else if (rt->rt_gw_family == AF_INET6) {
2726 int alen = sizeof(struct in6_addr);
2727 struct nlattr *nla;
2728 struct rtvia *via;
2729
2730 nla = nla_reserve(skb, RTA_VIA, alen + 2);
2731 if (!nla)
2732 goto nla_put_failure;
2733
2734 via = nla_data(nla);
2735 via->rtvia_family = AF_INET6;
2736 memcpy(via->rtvia_addr, &rt->rt_gw6, alen);
2737 }
Thomas Grafbe403ea2006-08-17 18:15:17 -07002738
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002739 expires = rt->dst.expires;
2740 if (expires) {
2741 unsigned long now = jiffies;
2742
2743 if (time_before(now, expires))
2744 expires -= now;
2745 else
2746 expires = 0;
2747 }
2748
Julian Anastasov521f5492012-07-20 12:02:08 +03002749 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002750 if (rt->rt_pmtu && expires)
Julian Anastasov521f5492012-07-20 12:02:08 +03002751 metrics[RTAX_MTU - 1] = rt->rt_pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01002752 if (rt->rt_mtu_locked && expires)
2753 metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU);
Julian Anastasov521f5492012-07-20 12:02:08 +03002754 if (rtnetlink_put_metrics(skb, metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -07002755 goto nla_put_failure;
2756
David Millerb4869882012-07-01 02:03:01 +00002757 if (fl4->flowi4_mark &&
stephen hemminger68aaed52012-10-10 08:27:25 +00002758 nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
David S. Millerf3756b72012-04-01 20:39:02 -04002759 goto nla_put_failure;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002760
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002761 if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
2762 nla_put_u32(skb, RTA_UID,
2763 from_kuid_munged(current_user_ns(), fl4->flowi4_uid)))
2764 goto nla_put_failure;
2765
Changli Gaod8d1f302010-06-10 23:31:35 -07002766 error = rt->dst.error;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002767
David S. Millerc7537962010-11-11 17:07:48 -08002768 if (rt_is_input_route(rt)) {
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002769#ifdef CONFIG_IP_MROUTE
2770 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2771 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2772 int err = ipmr_get_route(net, skb,
2773 fl4->saddr, fl4->daddr,
David Ahern9f09eae2017-01-06 17:39:06 -08002774 r, portid);
Nikolay Aleksandrov2cf75072016-09-25 23:08:31 +02002775
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002776 if (err <= 0) {
David Ahern0c8d8032017-01-05 19:32:46 -08002777 if (err == 0)
2778 return 0;
2779 goto nla_put_failure;
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002780 }
2781 } else
2782#endif
Roopa Prabhu404eb772018-05-22 14:03:27 -07002783 if (nla_put_u32(skb, RTA_IIF, fl4->flowi4_iif))
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002784 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002785 }
2786
David S. Millerf1850712012-07-10 07:26:01 -07002787 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002788 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002789
Johannes Berg053c0952015-01-16 22:09:00 +01002790 nlmsg_end(skb, nlh);
2791 return 0;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002792
2793nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002794 nlmsg_cancel(skb, nlh);
2795 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002796}
2797
Roopa Prabhu404eb772018-05-22 14:03:27 -07002798static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst,
2799 u8 ip_proto, __be16 sport,
2800 __be16 dport)
2801{
2802 struct sk_buff *skb;
2803 struct iphdr *iph;
2804
2805 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2806 if (!skb)
2807 return NULL;
2808
2809 /* Reserve room for dummy headers, this skb can pass
2810 * through good chunk of routing engine.
2811 */
2812 skb_reset_mac_header(skb);
2813 skb_reset_network_header(skb);
2814 skb->protocol = htons(ETH_P_IP);
2815 iph = skb_put(skb, sizeof(struct iphdr));
2816 iph->protocol = ip_proto;
2817 iph->saddr = src;
2818 iph->daddr = dst;
2819 iph->version = 0x4;
2820 iph->frag_off = 0;
2821 iph->ihl = 0x5;
2822 skb_set_transport_header(skb, skb->len);
2823
2824 switch (iph->protocol) {
2825 case IPPROTO_UDP: {
2826 struct udphdr *udph;
2827
2828 udph = skb_put_zero(skb, sizeof(struct udphdr));
2829 udph->source = sport;
2830 udph->dest = dport;
2831 udph->len = sizeof(struct udphdr);
2832 udph->check = 0;
2833 break;
2834 }
2835 case IPPROTO_TCP: {
2836 struct tcphdr *tcph;
2837
2838 tcph = skb_put_zero(skb, sizeof(struct tcphdr));
2839 tcph->source = sport;
2840 tcph->dest = dport;
2841 tcph->doff = sizeof(struct tcphdr) / 4;
2842 tcph->rst = 1;
2843 tcph->check = ~tcp_v4_check(sizeof(struct tcphdr),
2844 src, dst, 0);
2845 break;
2846 }
2847 case IPPROTO_ICMP: {
2848 struct icmphdr *icmph;
2849
2850 icmph = skb_put_zero(skb, sizeof(struct icmphdr));
2851 icmph->type = ICMP_ECHO;
2852 icmph->code = 0;
2853 }
2854 }
2855
2856 return skb;
2857}
2858
Jakub Kicinskia00302b62019-01-18 10:46:19 -08002859static int inet_rtm_valid_getroute_req(struct sk_buff *skb,
2860 const struct nlmsghdr *nlh,
2861 struct nlattr **tb,
2862 struct netlink_ext_ack *extack)
2863{
2864 struct rtmsg *rtm;
2865 int i, err;
2866
2867 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
2868 NL_SET_ERR_MSG(extack,
2869 "ipv4: Invalid header for route get request");
2870 return -EINVAL;
2871 }
2872
2873 if (!netlink_strict_get_check(skb))
Johannes Berg8cb08172019-04-26 14:07:28 +02002874 return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
2875 rtm_ipv4_policy, extack);
Jakub Kicinskia00302b62019-01-18 10:46:19 -08002876
2877 rtm = nlmsg_data(nlh);
2878 if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) ||
2879 (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) ||
2880 rtm->rtm_table || rtm->rtm_protocol ||
2881 rtm->rtm_scope || rtm->rtm_type) {
2882 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for route get request");
2883 return -EINVAL;
2884 }
2885
2886 if (rtm->rtm_flags & ~(RTM_F_NOTIFY |
2887 RTM_F_LOOKUP_TABLE |
2888 RTM_F_FIB_MATCH)) {
2889 NL_SET_ERR_MSG(extack, "ipv4: Unsupported rtm_flags for route get request");
2890 return -EINVAL;
2891 }
2892
Johannes Berg8cb08172019-04-26 14:07:28 +02002893 err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
2894 rtm_ipv4_policy, extack);
Jakub Kicinskia00302b62019-01-18 10:46:19 -08002895 if (err)
2896 return err;
2897
2898 if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
2899 (tb[RTA_DST] && !rtm->rtm_dst_len)) {
2900 NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4");
2901 return -EINVAL;
2902 }
2903
2904 for (i = 0; i <= RTA_MAX; i++) {
2905 if (!tb[i])
2906 continue;
2907
2908 switch (i) {
2909 case RTA_IIF:
2910 case RTA_OIF:
2911 case RTA_SRC:
2912 case RTA_DST:
2913 case RTA_IP_PROTO:
2914 case RTA_SPORT:
2915 case RTA_DPORT:
2916 case RTA_MARK:
2917 case RTA_UID:
2918 break;
2919 default:
2920 NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in route get request");
2921 return -EINVAL;
2922 }
2923 }
2924
2925 return 0;
2926}
2927
David Ahernc21ef3e2017-04-16 09:48:24 -07002928static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2929 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002930{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002931 struct net *net = sock_net(in_skb->sk);
Thomas Grafd889ce32006-08-17 18:15:44 -07002932 struct nlattr *tb[RTA_MAX+1];
Roopa Prabhu404eb772018-05-22 14:03:27 -07002933 u32 table_id = RT_TABLE_MAIN;
2934 __be16 sport = 0, dport = 0;
David Ahern3765d352017-05-25 10:42:36 -07002935 struct fib_result res = {};
Roopa Prabhu404eb772018-05-22 14:03:27 -07002936 u8 ip_proto = IPPROTO_UDP;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002937 struct rtable *rt = NULL;
Roopa Prabhu404eb772018-05-22 14:03:27 -07002938 struct sk_buff *skb;
2939 struct rtmsg *rtm;
Maciej Żenczykowskie8e3fbe2018-09-29 23:44:47 -07002940 struct flowi4 fl4 = {};
Al Viro9e12bb22006-09-26 21:25:20 -07002941 __be32 dst = 0;
2942 __be32 src = 0;
Roopa Prabhu404eb772018-05-22 14:03:27 -07002943 kuid_t uid;
Al Viro9e12bb22006-09-26 21:25:20 -07002944 u32 iif;
Thomas Grafd889ce32006-08-17 18:15:44 -07002945 int err;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002946 int mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002947
Jakub Kicinskia00302b62019-01-18 10:46:19 -08002948 err = inet_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
Thomas Grafd889ce32006-08-17 18:15:44 -07002949 if (err < 0)
Roopa Prabhu404eb772018-05-22 14:03:27 -07002950 return err;
Thomas Grafd889ce32006-08-17 18:15:44 -07002951
2952 rtm = nlmsg_data(nlh);
Jiri Benc67b61f62015-03-29 16:59:26 +02002953 src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
2954 dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
Thomas Grafd889ce32006-08-17 18:15:44 -07002955 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002956 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002957 if (tb[RTA_UID])
2958 uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID]));
2959 else
2960 uid = (iif ? INVALID_UID : current_uid());
Linus Torvalds1da177e2005-04-16 15:20:36 -07002961
Roopa Prabhu404eb772018-05-22 14:03:27 -07002962 if (tb[RTA_IP_PROTO]) {
2963 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
Hangbin Liu5e1a99e2019-02-27 16:15:29 +08002964 &ip_proto, AF_INET, extack);
Roopa Prabhu404eb772018-05-22 14:03:27 -07002965 if (err)
2966 return err;
2967 }
Florian Laryschbbadb9a2017-04-07 14:42:20 +02002968
Roopa Prabhu404eb772018-05-22 14:03:27 -07002969 if (tb[RTA_SPORT])
2970 sport = nla_get_be16(tb[RTA_SPORT]);
2971
2972 if (tb[RTA_DPORT])
2973 dport = nla_get_be16(tb[RTA_DPORT]);
2974
2975 skb = inet_rtm_getroute_build_skb(src, dst, ip_proto, sport, dport);
2976 if (!skb)
2977 return -ENOBUFS;
Florian Laryschbbadb9a2017-04-07 14:42:20 +02002978
David Millerd6c0a4f2012-07-01 02:02:59 +00002979 fl4.daddr = dst;
2980 fl4.saddr = src;
2981 fl4.flowi4_tos = rtm->rtm_tos;
2982 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
2983 fl4.flowi4_mark = mark;
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002984 fl4.flowi4_uid = uid;
Roopa Prabhu404eb772018-05-22 14:03:27 -07002985 if (sport)
2986 fl4.fl4_sport = sport;
2987 if (dport)
2988 fl4.fl4_dport = dport;
2989 fl4.flowi4_proto = ip_proto;
David Millerd6c0a4f2012-07-01 02:02:59 +00002990
David Ahern3765d352017-05-25 10:42:36 -07002991 rcu_read_lock();
2992
Linus Torvalds1da177e2005-04-16 15:20:36 -07002993 if (iif) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002994 struct net_device *dev;
2995
David Ahern3765d352017-05-25 10:42:36 -07002996 dev = dev_get_by_index_rcu(net, iif);
Ian Morris51456b22015-04-03 09:17:26 +01002997 if (!dev) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002998 err = -ENODEV;
Roopa Prabhu404eb772018-05-22 14:03:27 -07002999 goto errout_rcu;
Thomas Grafd889ce32006-08-17 18:15:44 -07003000 }
3001
Roopa Prabhu404eb772018-05-22 14:03:27 -07003002 fl4.flowi4_iif = iif; /* for rt_fill_info */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003003 skb->dev = dev;
Eric Dumazet963bfee2010-07-20 22:03:14 +00003004 skb->mark = mark;
David Ahern3765d352017-05-25 10:42:36 -07003005 err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos,
3006 dev, &res);
Thomas Grafd889ce32006-08-17 18:15:44 -07003007
Eric Dumazet511c3f92009-06-02 05:14:27 +00003008 rt = skb_rtable(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07003009 if (err == 0 && rt->dst.error)
3010 err = -rt->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003011 } else {
Lorenzo Colitti6503a302018-01-11 18:36:26 +09003012 fl4.flowi4_iif = LOOPBACK_IFINDEX;
Ido Schimmel21f94772018-12-20 17:03:27 +00003013 skb->dev = net->loopback_dev;
David Ahern3765d352017-05-25 10:42:36 -07003014 rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb);
David S. Millerb23dd4f2011-03-02 14:31:35 -08003015 err = 0;
3016 if (IS_ERR(rt))
3017 err = PTR_ERR(rt);
Florian Westphal2c87d632017-08-14 00:52:58 +02003018 else
3019 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003020 }
Thomas Grafd889ce32006-08-17 18:15:44 -07003021
Linus Torvalds1da177e2005-04-16 15:20:36 -07003022 if (err)
Roopa Prabhu404eb772018-05-22 14:03:27 -07003023 goto errout_rcu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003024
Linus Torvalds1da177e2005-04-16 15:20:36 -07003025 if (rtm->rtm_flags & RTM_F_NOTIFY)
3026 rt->rt_flags |= RTCF_NOTIFY;
3027
David Ahernc36ba662015-09-02 13:58:36 -07003028 if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
David Ahern68e813a2018-02-14 14:24:28 -08003029 table_id = res.table ? res.table->tb_id : 0;
David Ahernc36ba662015-09-02 13:58:36 -07003030
Roopa Prabhu404eb772018-05-22 14:03:27 -07003031 /* reset skb for netlink reply msg */
3032 skb_trim(skb, 0);
3033 skb_reset_network_header(skb);
3034 skb_reset_transport_header(skb);
3035 skb_reset_mac_header(skb);
3036
Roopa Prabhubc3aae22017-08-16 12:38:52 -07003037 if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
3038 if (!res.fi) {
3039 err = fib_props[res.type].error;
3040 if (!err)
3041 err = -EHOSTUNREACH;
Roopa Prabhu404eb772018-05-22 14:03:27 -07003042 goto errout_rcu;
Roopa Prabhubc3aae22017-08-16 12:38:52 -07003043 }
Roopa Prabhub6179812017-05-25 10:42:39 -07003044 err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
3045 nlh->nlmsg_seq, RTM_NEWROUTE, table_id,
3046 rt->rt_type, res.prefix, res.prefixlen,
3047 fl4.flowi4_tos, res.fi, 0);
Roopa Prabhubc3aae22017-08-16 12:38:52 -07003048 } else {
Roopa Prabhu404eb772018-05-22 14:03:27 -07003049 err = rt_fill_info(net, dst, src, rt, table_id, &fl4, skb,
Roopa Prabhuba52d612017-05-31 22:53:25 -07003050 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq);
Roopa Prabhubc3aae22017-08-16 12:38:52 -07003051 }
David S. Miller7b46a642015-01-18 23:36:08 -05003052 if (err < 0)
Roopa Prabhu404eb772018-05-22 14:03:27 -07003053 goto errout_rcu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003054
David Ahern3765d352017-05-25 10:42:36 -07003055 rcu_read_unlock();
3056
Eric W. Biederman15e47302012-09-07 20:12:54 +00003057 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003058
Thomas Grafd889ce32006-08-17 18:15:44 -07003059errout_free:
Roopa Prabhu404eb772018-05-22 14:03:27 -07003060 return err;
3061errout_rcu:
David Ahern3765d352017-05-25 10:42:36 -07003062 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003063 kfree_skb(skb);
Roopa Prabhu404eb772018-05-22 14:03:27 -07003064 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003065}
3066
Linus Torvalds1da177e2005-04-16 15:20:36 -07003067void ip_rt_multicast_event(struct in_device *in_dev)
3068{
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00003069 rt_cache_flush(dev_net(in_dev->dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07003070}
3071
3072#ifdef CONFIG_SYSCTL
Gao feng082c7ca2013-02-19 00:43:12 +00003073static int ip_rt_gc_interval __read_mostly = 60 * HZ;
3074static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
3075static int ip_rt_gc_elasticity __read_mostly = 8;
Arnd Bergmann773daa32018-02-28 14:32:48 +01003076static int ip_min_valid_pmtu __read_mostly = IPV4_MIN_MTU;
Gao feng082c7ca2013-02-19 00:43:12 +00003077
Joe Perchesfe2c6332013-06-11 23:04:25 -07003078static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07003079 void __user *buffer,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003080 size_t *lenp, loff_t *ppos)
3081{
Timo Teräs5aad1de2013-05-27 20:46:33 +00003082 struct net *net = (struct net *)__ctl->extra1;
3083
Linus Torvalds1da177e2005-04-16 15:20:36 -07003084 if (write) {
Timo Teräs5aad1de2013-05-27 20:46:33 +00003085 rt_cache_flush(net);
3086 fnhe_genid_bump(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003087 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09003088 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003089
3090 return -EINVAL;
3091}
3092
Joe Perchesfe2c6332013-06-11 23:04:25 -07003093static struct ctl_table ipv4_route_table[] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09003094 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003095 .procname = "gc_thresh",
3096 .data = &ipv4_dst_ops.gc_thresh,
3097 .maxlen = sizeof(int),
3098 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003099 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003100 },
3101 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003102 .procname = "max_size",
3103 .data = &ip_rt_max_size,
3104 .maxlen = sizeof(int),
3105 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003106 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003107 },
3108 {
3109 /* Deprecated. Use gc_min_interval_ms */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09003110
Linus Torvalds1da177e2005-04-16 15:20:36 -07003111 .procname = "gc_min_interval",
3112 .data = &ip_rt_gc_min_interval,
3113 .maxlen = sizeof(int),
3114 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003115 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003116 },
3117 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003118 .procname = "gc_min_interval_ms",
3119 .data = &ip_rt_gc_min_interval,
3120 .maxlen = sizeof(int),
3121 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003122 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003123 },
3124 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003125 .procname = "gc_timeout",
3126 .data = &ip_rt_gc_timeout,
3127 .maxlen = sizeof(int),
3128 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003129 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003130 },
3131 {
Eric Dumazet9f28a2f2011-12-21 15:47:16 -05003132 .procname = "gc_interval",
3133 .data = &ip_rt_gc_interval,
3134 .maxlen = sizeof(int),
3135 .mode = 0644,
3136 .proc_handler = proc_dointvec_jiffies,
3137 },
3138 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003139 .procname = "redirect_load",
3140 .data = &ip_rt_redirect_load,
3141 .maxlen = sizeof(int),
3142 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003143 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003144 },
3145 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003146 .procname = "redirect_number",
3147 .data = &ip_rt_redirect_number,
3148 .maxlen = sizeof(int),
3149 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003150 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003151 },
3152 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003153 .procname = "redirect_silence",
3154 .data = &ip_rt_redirect_silence,
3155 .maxlen = sizeof(int),
3156 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003157 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003158 },
3159 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003160 .procname = "error_cost",
3161 .data = &ip_rt_error_cost,
3162 .maxlen = sizeof(int),
3163 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003164 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003165 },
3166 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003167 .procname = "error_burst",
3168 .data = &ip_rt_error_burst,
3169 .maxlen = sizeof(int),
3170 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003171 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003172 },
3173 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003174 .procname = "gc_elasticity",
3175 .data = &ip_rt_gc_elasticity,
3176 .maxlen = sizeof(int),
3177 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003178 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003179 },
3180 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003181 .procname = "mtu_expires",
3182 .data = &ip_rt_mtu_expires,
3183 .maxlen = sizeof(int),
3184 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003185 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003186 },
3187 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003188 .procname = "min_pmtu",
3189 .data = &ip_rt_min_pmtu,
3190 .maxlen = sizeof(int),
3191 .mode = 0644,
Sabrina Dubrocac7272c22018-02-26 16:13:43 +01003192 .proc_handler = proc_dointvec_minmax,
3193 .extra1 = &ip_min_valid_pmtu,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003194 },
3195 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003196 .procname = "min_adv_mss",
3197 .data = &ip_rt_min_advmss,
3198 .maxlen = sizeof(int),
3199 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003200 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003201 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08003202 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003203};
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003204
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003205static struct ctl_table ipv4_route_flush_table[] = {
3206 {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003207 .procname = "flush",
3208 .maxlen = sizeof(int),
3209 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003210 .proc_handler = ipv4_sysctl_rtcache_flush,
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003211 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08003212 { },
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003213};
3214
3215static __net_init int sysctl_route_net_init(struct net *net)
3216{
3217 struct ctl_table *tbl;
3218
3219 tbl = ipv4_route_flush_table;
Octavian Purdila09ad9bc2009-11-25 15:14:13 -08003220 if (!net_eq(net, &init_net)) {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003221 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
Ian Morris51456b22015-04-03 09:17:26 +01003222 if (!tbl)
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003223 goto err_dup;
Eric W. Biederman464dc802012-11-16 03:02:59 +00003224
3225 /* Don't export sysctls to unprivileged users */
3226 if (net->user_ns != &init_user_ns)
3227 tbl[0].procname = NULL;
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003228 }
3229 tbl[0].extra1 = net;
3230
Eric W. Biedermanec8f23c2012-04-19 13:44:49 +00003231 net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
Ian Morris51456b22015-04-03 09:17:26 +01003232 if (!net->ipv4.route_hdr)
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003233 goto err_reg;
3234 return 0;
3235
3236err_reg:
3237 if (tbl != ipv4_route_flush_table)
3238 kfree(tbl);
3239err_dup:
3240 return -ENOMEM;
3241}
3242
3243static __net_exit void sysctl_route_net_exit(struct net *net)
3244{
3245 struct ctl_table *tbl;
3246
3247 tbl = net->ipv4.route_hdr->ctl_table_arg;
3248 unregister_net_sysctl_table(net->ipv4.route_hdr);
3249 BUG_ON(tbl == ipv4_route_flush_table);
3250 kfree(tbl);
3251}
3252
3253static __net_initdata struct pernet_operations sysctl_route_ops = {
3254 .init = sysctl_route_net_init,
3255 .exit = sysctl_route_net_exit,
3256};
Linus Torvalds1da177e2005-04-16 15:20:36 -07003257#endif
3258
Neil Horman3ee94372010-05-08 01:57:52 -07003259static __net_init int rt_genid_init(struct net *net)
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003260{
fan.duca4c3fc2013-07-30 08:33:53 +08003261 atomic_set(&net->ipv4.rt_genid, 0);
Timo Teräs5aad1de2013-05-27 20:46:33 +00003262 atomic_set(&net->fnhe_genid, 0);
Jason A. Donenfeld7aed9f72017-06-07 23:01:20 -04003263 atomic_set(&net->ipv4.dev_addr_genid, get_random_int());
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003264 return 0;
3265}
3266
Neil Horman3ee94372010-05-08 01:57:52 -07003267static __net_initdata struct pernet_operations rt_genid_ops = {
3268 .init = rt_genid_init,
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003269};
3270
David S. Millerc3426b42012-06-09 16:27:05 -07003271static int __net_init ipv4_inetpeer_init(struct net *net)
3272{
3273 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3274
3275 if (!bp)
3276 return -ENOMEM;
3277 inet_peer_base_init(bp);
3278 net->ipv4.peers = bp;
3279 return 0;
3280}
3281
3282static void __net_exit ipv4_inetpeer_exit(struct net *net)
3283{
3284 struct inet_peer_base *bp = net->ipv4.peers;
3285
3286 net->ipv4.peers = NULL;
David S. Miller56a6b242012-06-09 16:32:41 -07003287 inetpeer_invalidate_tree(bp);
David S. Millerc3426b42012-06-09 16:27:05 -07003288 kfree(bp);
3289}
3290
3291static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
3292 .init = ipv4_inetpeer_init,
3293 .exit = ipv4_inetpeer_exit,
3294};
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003295
Patrick McHardyc7066f72011-01-14 13:36:42 +01003296#ifdef CONFIG_IP_ROUTE_CLASSID
Tejun Heo7d720c32010-02-16 15:20:26 +00003297struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
Patrick McHardyc7066f72011-01-14 13:36:42 +01003298#endif /* CONFIG_IP_ROUTE_CLASSID */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003299
Linus Torvalds1da177e2005-04-16 15:20:36 -07003300int __init ip_rt_init(void)
3301{
Eric Dumazet5055c372015-01-14 15:17:06 -08003302 int cpu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003303
Kees Cook6da2ec52018-06-12 13:55:00 -07003304 ip_idents = kmalloc_array(IP_IDENTS_SZ, sizeof(*ip_idents),
3305 GFP_KERNEL);
Eric Dumazet73f156a2014-06-02 05:26:03 -07003306 if (!ip_idents)
3307 panic("IP: failed to allocate ip_idents\n");
3308
3309 prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
3310
Eric Dumazet355b590c2015-05-01 10:37:49 -07003311 ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL);
3312 if (!ip_tstamps)
3313 panic("IP: failed to allocate ip_tstamps\n");
3314
Eric Dumazet5055c372015-01-14 15:17:06 -08003315 for_each_possible_cpu(cpu) {
3316 struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
3317
3318 INIT_LIST_HEAD(&ul->head);
3319 spin_lock_init(&ul->lock);
3320 }
Patrick McHardyc7066f72011-01-14 13:36:42 +01003321#ifdef CONFIG_IP_ROUTE_CLASSID
Ingo Molnar0dcec8c2009-02-25 14:07:33 +01003322 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
Linus Torvalds1da177e2005-04-16 15:20:36 -07003323 if (!ip_rt_acct)
3324 panic("IP: failed to allocate ip_rt_acct\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003325#endif
3326
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07003327 ipv4_dst_ops.kmem_cachep =
3328 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09003329 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003330
David S. Miller14e50e52007-05-24 18:17:54 -07003331 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
3332
Eric Dumazetfc66f952010-10-08 06:37:34 +00003333 if (dst_entries_init(&ipv4_dst_ops) < 0)
3334 panic("IP: failed to allocate ipv4_dst_ops counter\n");
3335
3336 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
3337 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
3338
David S. Miller89aef892012-07-17 11:00:09 -07003339 ipv4_dst_ops.gc_thresh = ~0;
3340 ip_rt_max_size = INT_MAX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003341
Linus Torvalds1da177e2005-04-16 15:20:36 -07003342 devinet_init();
3343 ip_fib_init();
3344
Denis V. Lunev73b38712008-02-28 20:51:18 -08003345 if (ip_rt_proc_init())
Joe Perches058bd4d2012-03-11 18:36:11 +00003346 pr_err("Unable to create route proc files\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003347#ifdef CONFIG_XFRM
3348 xfrm_init();
Steffen Klassert703fb942012-11-13 08:52:24 +01003349 xfrm4_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003350#endif
Florian Westphal394f51a2017-08-15 16:34:44 +02003351 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL,
3352 RTNL_FLAG_DOIT_UNLOCKED);
Thomas Graf63f34442007-03-22 11:55:17 -07003353
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003354#ifdef CONFIG_SYSCTL
3355 register_pernet_subsys(&sysctl_route_ops);
3356#endif
Neil Horman3ee94372010-05-08 01:57:52 -07003357 register_pernet_subsys(&rt_genid_ops);
David S. Millerc3426b42012-06-09 16:27:05 -07003358 register_pernet_subsys(&ipv4_inetpeer_ops);
Tim Hansen1bcdca32017-10-04 15:59:49 -04003359 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003360}
3361
Al Viroa1bc6eb2008-07-30 06:32:52 -04003362#ifdef CONFIG_SYSCTL
Al Viroeeb61f72008-07-27 08:59:33 +01003363/*
3364 * We really need to sanitize the damn ipv4 init order, then all
3365 * this nonsense will go away.
3366 */
3367void __init ip_static_sysctl_init(void)
3368{
Eric W. Biederman4e5ca782012-04-19 13:32:39 +00003369 register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
Al Viroeeb61f72008-07-27 08:59:33 +01003370}
Al Viroa1bc6eb2008-07-30 06:32:52 -04003371#endif