blob: efa6a36cbfff4b7abfa0bdb18471b25e0307b456 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * ROUTE - implementation of the IP router.
7 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07008 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
12 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 *
14 * Fixes:
15 * Alan Cox : Verify area fixes.
16 * Alan Cox : cli() protects routing changes
17 * Rui Oliveira : ICMP routing table updates
18 * (rco@di.uminho.pt) Routing table insertion and update
19 * Linus Torvalds : Rewrote bits to be sensible
20 * Alan Cox : Added BSD route gw semantics
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090021 * Alan Cox : Super /proc >4K
Linus Torvalds1da177e2005-04-16 15:20:36 -070022 * Alan Cox : MTU in route table
23 * Alan Cox : MSS actually. Also added the window
24 * clamper.
25 * Sam Lantinga : Fixed route matching in rt_del()
26 * Alan Cox : Routing cache support.
27 * Alan Cox : Removed compatibility cruft.
28 * Alan Cox : RTF_REJECT support.
29 * Alan Cox : TCP irtt support.
30 * Jonathan Naylor : Added Metric support.
31 * Miquel van Smoorenburg : BSD API fixes.
32 * Miquel van Smoorenburg : Metrics.
33 * Alan Cox : Use __u32 properly
34 * Alan Cox : Aligned routing errors more closely with BSD
35 * our system is still very different.
36 * Alan Cox : Faster /proc handling
37 * Alexey Kuznetsov : Massive rework to support tree based routing,
38 * routing caches and better behaviour.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090039 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 * Olaf Erb : irtt wasn't being copied right.
41 * Bjorn Ekwall : Kerneld route support.
42 * Alan Cox : Multicast fixed (I hope)
43 * Pavel Krauz : Limited broadcast fixed
44 * Mike McLagan : Routing by source
45 * Alexey Kuznetsov : End of old history. Split to fib.c and
46 * route.c and rewritten from scratch.
47 * Andi Kleen : Load-limit warning messages.
48 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
49 * Vitaly E. Lavrov : Race condition in ip_route_input_slow.
50 * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow.
51 * Vladimir V. Ivanov : IP rule info (flowid) is really useful.
52 * Marc Boucher : routing by fwmark
53 * Robert Olsson : Added rt_cache statistics
54 * Arnaldo C. Melo : Convert proc stuff to seq_file
Eric Dumazetbb1d23b2005-07-05 15:00:32 -070055 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
Ilia Sotnikovcef26852006-03-25 01:38:55 -080056 * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect
57 * Ilia Sotnikov : Removed TOS from hash calculations
Linus Torvalds1da177e2005-04-16 15:20:36 -070058 *
59 * This program is free software; you can redistribute it and/or
60 * modify it under the terms of the GNU General Public License
61 * as published by the Free Software Foundation; either version
62 * 2 of the License, or (at your option) any later version.
63 */
64
Joe Perchesafd465032012-03-12 07:03:32 +000065#define pr_fmt(fmt) "IPv4: " fmt
66
Linus Torvalds1da177e2005-04-16 15:20:36 -070067#include <linux/module.h>
Linus Torvalds7c0f6ba2016-12-24 11:46:01 -080068#include <linux/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070069#include <linux/bitops.h>
70#include <linux/types.h>
71#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070072#include <linux/mm.h>
73#include <linux/string.h>
74#include <linux/socket.h>
75#include <linux/sockios.h>
76#include <linux/errno.h>
77#include <linux/in.h>
78#include <linux/inet.h>
79#include <linux/netdevice.h>
80#include <linux/proc_fs.h>
81#include <linux/init.h>
82#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070083#include <linux/inetdevice.h>
84#include <linux/igmp.h>
85#include <linux/pkt_sched.h>
86#include <linux/mroute.h>
87#include <linux/netfilter_ipv4.h>
88#include <linux/random.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070089#include <linux/rcupdate.h>
90#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090091#include <linux/slab.h>
Eric Dumazet73f156a2014-06-02 05:26:03 -070092#include <linux/jhash.h>
Herbert Xu352e5122007-11-13 21:34:06 -080093#include <net/dst.h>
Thomas Graf1b7179d2015-07-21 10:43:59 +020094#include <net/dst_metadata.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020095#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070096#include <net/protocol.h>
97#include <net/ip.h>
98#include <net/route.h>
99#include <net/inetpeer.h>
100#include <net/sock.h>
101#include <net/ip_fib.h>
102#include <net/arp.h>
103#include <net/tcp.h>
104#include <net/icmp.h>
105#include <net/xfrm.h>
Roopa Prabhu571e7222015-07-21 10:43:47 +0200106#include <net/lwtunnel.h>
Tom Tucker8d717402006-07-30 20:43:36 -0700107#include <net/netevent.h>
Thomas Graf63f34442007-03-22 11:55:17 -0700108#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109#ifdef CONFIG_SYSCTL
110#include <linux/sysctl.h>
111#endif
David S. Miller6e5714e2011-08-03 20:50:44 -0700112#include <net/secure_seq.h>
Thomas Graf1b7179d2015-07-21 10:43:59 +0200113#include <net/ip_tunnels.h>
David Ahern385add92015-09-29 20:07:13 -0700114#include <net/l3mdev.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115
Roopa Prabhub6179812017-05-25 10:42:39 -0700116#include "fib_lookup.h"
117
David S. Miller68a5e3d2011-03-11 20:07:33 -0500118#define RT_FL_TOS(oldflp4) \
Julian Anastasovf61759e2011-12-02 11:39:42 +0000119 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120
Linus Torvalds1da177e2005-04-16 15:20:36 -0700121#define RT_GC_TIMEOUT (300*HZ)
122
Linus Torvalds1da177e2005-04-16 15:20:36 -0700123static int ip_rt_max_size;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700124static int ip_rt_redirect_number __read_mostly = 9;
125static int ip_rt_redirect_load __read_mostly = HZ / 50;
126static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
127static int ip_rt_error_cost __read_mostly = HZ;
128static int ip_rt_error_burst __read_mostly = 5 * HZ;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700129static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
Sabrina Dubrocac7272c22018-02-26 16:13:43 +0100130static u32 ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700131static int ip_rt_min_advmss __read_mostly = 256;
Eric Dumazet9f28a2f2011-12-21 15:47:16 -0500132
Xin Longdeed49d2016-02-18 21:21:19 +0800133static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
Sabrina Dubrocac7272c22018-02-26 16:13:43 +0100134
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135/*
136 * Interface to generic destination cache.
137 */
138
139static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -0800140static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
Steffen Klassertebb762f2011-11-23 02:12:51 +0000141static unsigned int ipv4_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
143static void ipv4_link_failure(struct sk_buff *skb);
David S. Miller6700c272012-07-17 03:29:28 -0700144static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
145 struct sk_buff *skb, u32 mtu);
146static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
147 struct sk_buff *skb);
David S. Millercaacf052012-07-31 15:06:50 -0700148static void ipv4_dst_destroy(struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700149
David S. Miller62fa8a82011-01-26 20:51:05 -0800150static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
151{
David S. Miller31248732012-07-10 07:08:18 -0700152 WARN_ON(1);
153 return NULL;
David S. Miller62fa8a82011-01-26 20:51:05 -0800154}
155
David S. Millerf894cbf2012-07-02 21:52:24 -0700156static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
157 struct sk_buff *skb,
158 const void *daddr);
Julian Anastasov63fca652017-02-06 23:14:15 +0200159static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700160
Linus Torvalds1da177e2005-04-16 15:20:36 -0700161static struct dst_ops ipv4_dst_ops = {
162 .family = AF_INET,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163 .check = ipv4_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800164 .default_advmss = ipv4_default_advmss,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000165 .mtu = ipv4_mtu,
David S. Miller62fa8a82011-01-26 20:51:05 -0800166 .cow_metrics = ipv4_cow_metrics,
David S. Millercaacf052012-07-31 15:06:50 -0700167 .destroy = ipv4_dst_destroy,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168 .negative_advice = ipv4_negative_advice,
169 .link_failure = ipv4_link_failure,
170 .update_pmtu = ip_rt_update_pmtu,
David S. Millere47a1852012-07-11 20:55:47 -0700171 .redirect = ip_do_redirect,
Eric W. Biedermanb92dacd2015-10-07 16:48:37 -0500172 .local_out = __ip_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700173 .neigh_lookup = ipv4_neigh_lookup,
Julian Anastasov63fca652017-02-06 23:14:15 +0200174 .confirm_neigh = ipv4_confirm_neigh,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175};
176
177#define ECN_OR_COST(class) TC_PRIO_##class
178
Philippe De Muyter4839c522007-07-09 15:32:57 -0700179const __u8 ip_tos2prio[16] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180 TC_PRIO_BESTEFFORT,
Dan Siemon4a2b9c32011-03-15 13:56:07 +0000181 ECN_OR_COST(BESTEFFORT),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182 TC_PRIO_BESTEFFORT,
183 ECN_OR_COST(BESTEFFORT),
184 TC_PRIO_BULK,
185 ECN_OR_COST(BULK),
186 TC_PRIO_BULK,
187 ECN_OR_COST(BULK),
188 TC_PRIO_INTERACTIVE,
189 ECN_OR_COST(INTERACTIVE),
190 TC_PRIO_INTERACTIVE,
191 ECN_OR_COST(INTERACTIVE),
192 TC_PRIO_INTERACTIVE_BULK,
193 ECN_OR_COST(INTERACTIVE_BULK),
194 TC_PRIO_INTERACTIVE_BULK,
195 ECN_OR_COST(INTERACTIVE_BULK)
196};
Amir Vadaid4a96862012-04-04 21:33:28 +0000197EXPORT_SYMBOL(ip_tos2prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700198
Eric Dumazet2f970d82006-01-17 02:54:36 -0800199static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
Christoph Lameter3ed66e92014-04-07 15:39:40 -0700200#define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
204{
Eric Dumazet29e75252008-01-31 17:05:09 -0800205 if (*pos)
David S. Miller89aef892012-07-17 11:00:09 -0700206 return NULL;
Eric Dumazet29e75252008-01-31 17:05:09 -0800207 return SEQ_START_TOKEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208}
209
210static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
211{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212 ++*pos;
David S. Miller89aef892012-07-17 11:00:09 -0700213 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214}
215
216static void rt_cache_seq_stop(struct seq_file *seq, void *v)
217{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218}
219
220static int rt_cache_seq_show(struct seq_file *seq, void *v)
221{
222 if (v == SEQ_START_TOKEN)
223 seq_printf(seq, "%-127s\n",
224 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
225 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
226 "HHUptod\tSpecDst");
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900227 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700228}
229
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700230static const struct seq_operations rt_cache_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231 .start = rt_cache_seq_start,
232 .next = rt_cache_seq_next,
233 .stop = rt_cache_seq_stop,
234 .show = rt_cache_seq_show,
235};
236
237static int rt_cache_seq_open(struct inode *inode, struct file *file)
238{
David S. Miller89aef892012-07-17 11:00:09 -0700239 return seq_open(file, &rt_cache_seq_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240}
241
Arjan van de Ven9a321442007-02-12 00:55:35 -0800242static const struct file_operations rt_cache_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243 .open = rt_cache_seq_open,
244 .read = seq_read,
245 .llseek = seq_lseek,
David S. Miller89aef892012-07-17 11:00:09 -0700246 .release = seq_release,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247};
248
249
250static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
251{
252 int cpu;
253
254 if (*pos == 0)
255 return SEQ_START_TOKEN;
256
Rusty Russell0f23174a2008-12-29 12:23:42 +0000257 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 if (!cpu_possible(cpu))
259 continue;
260 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800261 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262 }
263 return NULL;
264}
265
266static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
267{
268 int cpu;
269
Rusty Russell0f23174a2008-12-29 12:23:42 +0000270 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 if (!cpu_possible(cpu))
272 continue;
273 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800274 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 }
276 return NULL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900277
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278}
279
280static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
281{
282
283}
284
285static int rt_cpu_seq_show(struct seq_file *seq, void *v)
286{
287 struct rt_cache_stat *st = v;
288
289 if (v == SEQ_START_TOKEN) {
Olaf Rempel5bec0032005-04-28 12:16:08 -0700290 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291 return 0;
292 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900293
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
295 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
Eric Dumazetfc66f952010-10-08 06:37:34 +0000296 dst_entries_get_slow(&ipv4_dst_ops),
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700297 0, /* st->in_hit */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700298 st->in_slow_tot,
299 st->in_slow_mc,
300 st->in_no_route,
301 st->in_brd,
302 st->in_martian_dst,
303 st->in_martian_src,
304
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700305 0, /* st->out_hit */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 st->out_slow_tot,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900307 st->out_slow_mc,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700309 0, /* st->gc_total */
310 0, /* st->gc_ignored */
311 0, /* st->gc_goal_miss */
312 0, /* st->gc_dst_overflow */
313 0, /* st->in_hlist_search */
314 0 /* st->out_hlist_search */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315 );
316 return 0;
317}
318
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700319static const struct seq_operations rt_cpu_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320 .start = rt_cpu_seq_start,
321 .next = rt_cpu_seq_next,
322 .stop = rt_cpu_seq_stop,
323 .show = rt_cpu_seq_show,
324};
325
326
327static int rt_cpu_seq_open(struct inode *inode, struct file *file)
328{
329 return seq_open(file, &rt_cpu_seq_ops);
330}
331
Arjan van de Ven9a321442007-02-12 00:55:35 -0800332static const struct file_operations rt_cpu_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333 .open = rt_cpu_seq_open,
334 .read = seq_read,
335 .llseek = seq_lseek,
336 .release = seq_release,
337};
338
Patrick McHardyc7066f72011-01-14 13:36:42 +0100339#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800340static int rt_acct_proc_show(struct seq_file *m, void *v)
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800341{
Alexey Dobriyana661c412009-11-25 15:40:35 -0800342 struct ip_rt_acct *dst, *src;
343 unsigned int i, j;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800344
Alexey Dobriyana661c412009-11-25 15:40:35 -0800345 dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
346 if (!dst)
347 return -ENOMEM;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800348
Alexey Dobriyana661c412009-11-25 15:40:35 -0800349 for_each_possible_cpu(i) {
350 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
351 for (j = 0; j < 256; j++) {
352 dst[j].o_bytes += src[j].o_bytes;
353 dst[j].o_packets += src[j].o_packets;
354 dst[j].i_bytes += src[j].i_bytes;
355 dst[j].i_packets += src[j].i_packets;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800356 }
357 }
Alexey Dobriyana661c412009-11-25 15:40:35 -0800358
359 seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
360 kfree(dst);
361 return 0;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800362}
363#endif
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800364
Denis V. Lunev73b38712008-02-28 20:51:18 -0800365static int __net_init ip_rt_do_proc_init(struct net *net)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800366{
367 struct proc_dir_entry *pde;
368
Joe Perchesd6444062018-03-23 15:54:38 -0700369 pde = proc_create("rt_cache", 0444, net->proc_net,
Gao fengd4beaa62013-02-18 01:34:54 +0000370 &rt_cache_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800371 if (!pde)
372 goto err1;
373
Joe Perchesd6444062018-03-23 15:54:38 -0700374 pde = proc_create("rt_cache", 0444,
Wang Chen77020722008-02-28 14:14:25 -0800375 net->proc_net_stat, &rt_cpu_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800376 if (!pde)
377 goto err2;
378
Patrick McHardyc7066f72011-01-14 13:36:42 +0100379#ifdef CONFIG_IP_ROUTE_CLASSID
Christoph Hellwig3f3942a2018-05-15 15:57:23 +0200380 pde = proc_create_single("rt_acct", 0, net->proc_net,
381 rt_acct_proc_show);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800382 if (!pde)
383 goto err3;
384#endif
385 return 0;
386
Patrick McHardyc7066f72011-01-14 13:36:42 +0100387#ifdef CONFIG_IP_ROUTE_CLASSID
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800388err3:
389 remove_proc_entry("rt_cache", net->proc_net_stat);
390#endif
391err2:
392 remove_proc_entry("rt_cache", net->proc_net);
393err1:
394 return -ENOMEM;
395}
Denis V. Lunev73b38712008-02-28 20:51:18 -0800396
397static void __net_exit ip_rt_do_proc_exit(struct net *net)
398{
399 remove_proc_entry("rt_cache", net->proc_net_stat);
400 remove_proc_entry("rt_cache", net->proc_net);
Patrick McHardyc7066f72011-01-14 13:36:42 +0100401#ifdef CONFIG_IP_ROUTE_CLASSID
Denis V. Lunev73b38712008-02-28 20:51:18 -0800402 remove_proc_entry("rt_acct", net->proc_net);
Alexey Dobriyan0a931ac2010-01-17 03:32:50 +0000403#endif
Denis V. Lunev73b38712008-02-28 20:51:18 -0800404}
405
406static struct pernet_operations ip_rt_proc_ops __net_initdata = {
407 .init = ip_rt_do_proc_init,
408 .exit = ip_rt_do_proc_exit,
409};
410
411static int __init ip_rt_proc_init(void)
412{
413 return register_pernet_subsys(&ip_rt_proc_ops);
414}
415
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800416#else
Denis V. Lunev73b38712008-02-28 20:51:18 -0800417static inline int ip_rt_proc_init(void)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800418{
419 return 0;
420}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421#endif /* CONFIG_PROC_FS */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900422
Eric Dumazet4331deb2012-07-25 05:11:23 +0000423static inline bool rt_is_expired(const struct rtable *rth)
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700424{
fan.duca4c3fc2013-07-30 08:33:53 +0800425 return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev));
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700426}
427
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +0000428void rt_cache_flush(struct net *net)
Eric Dumazet29e75252008-01-31 17:05:09 -0800429{
fan.duca4c3fc2013-07-30 08:33:53 +0800430 rt_genid_bump_ipv4(net);
Eric Dumazet98376382010-03-08 03:20:00 +0000431}
432
David S. Millerf894cbf2012-07-02 21:52:24 -0700433static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
434 struct sk_buff *skb,
435 const void *daddr)
David Miller3769cff2011-07-11 22:44:24 +0000436{
David Ahern1550c172019-04-05 16:30:27 -0700437 const struct rtable *rt = container_of(dst, struct rtable, dst);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700438 struct net_device *dev = dst->dev;
David Miller3769cff2011-07-11 22:44:24 +0000439 struct neighbour *n;
440
David Ahern5c9f7c12019-04-05 16:30:34 -0700441 rcu_read_lock_bh();
David S. Millerd3aaeb32011-07-18 00:40:17 -0700442
David Ahern5c9f7c12019-04-05 16:30:34 -0700443 if (likely(rt->rt_gw_family == AF_INET)) {
444 n = ip_neigh_gw4(dev, rt->rt_gw4);
445 } else if (rt->rt_gw_family == AF_INET6) {
446 n = ip_neigh_gw6(dev, &rt->rt_gw6);
447 } else {
448 __be32 pkey;
449
450 pkey = skb ? ip_hdr(skb)->daddr : *((__be32 *) daddr);
451 n = ip_neigh_gw4(dev, pkey);
452 }
453
454 if (n && !refcount_inc_not_zero(&n->refcnt))
455 n = NULL;
456
457 rcu_read_unlock_bh();
458
459 return n;
David S. Millerd3aaeb32011-07-18 00:40:17 -0700460}
461
Julian Anastasov63fca652017-02-06 23:14:15 +0200462static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)
463{
David Ahern1550c172019-04-05 16:30:27 -0700464 const struct rtable *rt = container_of(dst, struct rtable, dst);
Julian Anastasov63fca652017-02-06 23:14:15 +0200465 struct net_device *dev = dst->dev;
466 const __be32 *pkey = daddr;
Julian Anastasov63fca652017-02-06 23:14:15 +0200467
David Ahern6de9c052019-04-05 16:30:36 -0700468 if (rt->rt_gw_family == AF_INET) {
David Ahern1550c172019-04-05 16:30:27 -0700469 pkey = (const __be32 *)&rt->rt_gw4;
David Ahern6de9c052019-04-05 16:30:36 -0700470 } else if (rt->rt_gw_family == AF_INET6) {
471 return __ipv6_confirm_neigh_stub(dev, &rt->rt_gw6);
472 } else if (!daddr ||
Julian Anastasov63fca652017-02-06 23:14:15 +0200473 (rt->rt_flags &
David Ahern6de9c052019-04-05 16:30:36 -0700474 (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) {
Julian Anastasov63fca652017-02-06 23:14:15 +0200475 return;
David Ahern6de9c052019-04-05 16:30:36 -0700476 }
Julian Anastasov63fca652017-02-06 23:14:15 +0200477 __ipv4_confirm_neigh(dev, *(__force u32 *)pkey);
478}
479
Eric Dumazet04ca6972014-07-26 08:58:10 +0200480#define IP_IDENTS_SZ 2048u
Eric Dumazet04ca6972014-07-26 08:58:10 +0200481
Eric Dumazet355b590c2015-05-01 10:37:49 -0700482static atomic_t *ip_idents __read_mostly;
483static u32 *ip_tstamps __read_mostly;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200484
485/* In order to protect privacy, we add a perturbation to identifiers
486 * if one generator is seldom used. This makes hard for an attacker
487 * to infer how many packets were sent between two points in time.
488 */
489u32 ip_idents_reserve(u32 hash, int segs)
490{
Eric Dumazet355b590c2015-05-01 10:37:49 -0700491 u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
492 atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
Mark Rutland6aa7de02017-10-23 14:07:29 -0700493 u32 old = READ_ONCE(*p_tstamp);
Eric Dumazet04ca6972014-07-26 08:58:10 +0200494 u32 now = (u32)jiffies;
Eric Dumazetadb03112016-09-20 18:06:17 -0700495 u32 new, delta = 0;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200496
Eric Dumazet355b590c2015-05-01 10:37:49 -0700497 if (old != now && cmpxchg(p_tstamp, old, now) == old)
Eric Dumazet04ca6972014-07-26 08:58:10 +0200498 delta = prandom_u32_max(now - old);
499
Eric Dumazetadb03112016-09-20 18:06:17 -0700500 /* Do not use atomic_add_return() as it makes UBSAN unhappy */
501 do {
502 old = (u32)atomic_read(p_id);
503 new = old + delta + segs;
504 } while (atomic_cmpxchg(p_id, old, new) != old);
505
506 return new - segs;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200507}
508EXPORT_SYMBOL(ip_idents_reserve);
Eric Dumazet73f156a2014-06-02 05:26:03 -0700509
Hannes Frederic Sowab6a77192015-03-25 17:07:44 +0100510void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700511{
Eric Dumazet73f156a2014-06-02 05:26:03 -0700512 u32 hash, id;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700513
Eric Dumazetdf453702019-03-27 12:40:33 -0700514 /* Note the following code is not safe, but this is okay. */
515 if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key)))
516 get_random_bytes(&net->ipv4.ip_id_key,
517 sizeof(net->ipv4.ip_id_key));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700518
Eric Dumazetdf453702019-03-27 12:40:33 -0700519 hash = siphash_3u32((__force u32)iph->daddr,
Eric Dumazet04ca6972014-07-26 08:58:10 +0200520 (__force u32)iph->saddr,
Eric Dumazetdf453702019-03-27 12:40:33 -0700521 iph->protocol,
522 &net->ipv4.ip_id_key);
Eric Dumazet73f156a2014-06-02 05:26:03 -0700523 id = ip_idents_reserve(hash, segs);
524 iph->id = htons(id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000526EXPORT_SYMBOL(__ip_select_ident);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900528static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
529 const struct sock *sk,
David S. Miller4895c772012-07-17 04:19:00 -0700530 const struct iphdr *iph,
531 int oif, u8 tos,
532 u8 prot, u32 mark, int flow_flags)
533{
534 if (sk) {
535 const struct inet_sock *inet = inet_sk(sk);
536
537 oif = sk->sk_bound_dev_if;
538 mark = sk->sk_mark;
539 tos = RT_CONN_FLAGS(sk);
540 prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
541 }
542 flowi4_init_output(fl4, oif, mark, tos,
543 RT_SCOPE_UNIVERSE, prot,
544 flow_flags,
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900545 iph->daddr, iph->saddr, 0, 0,
546 sock_net_uid(net, sk));
David S. Miller4895c772012-07-17 04:19:00 -0700547}
548
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200549static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
550 const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700551{
Lorenzo Colittid109e612016-11-30 02:56:47 +0900552 const struct net *net = dev_net(skb->dev);
David S. Miller4895c772012-07-17 04:19:00 -0700553 const struct iphdr *iph = ip_hdr(skb);
554 int oif = skb->dev->ifindex;
555 u8 tos = RT_TOS(iph->tos);
556 u8 prot = iph->protocol;
557 u32 mark = skb->mark;
558
Lorenzo Colittid109e612016-11-30 02:56:47 +0900559 __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0);
David S. Miller4895c772012-07-17 04:19:00 -0700560}
561
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200562static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700563{
564 const struct inet_sock *inet = inet_sk(sk);
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200565 const struct ip_options_rcu *inet_opt;
David S. Miller4895c772012-07-17 04:19:00 -0700566 __be32 daddr = inet->inet_daddr;
567
568 rcu_read_lock();
569 inet_opt = rcu_dereference(inet->inet_opt);
570 if (inet_opt && inet_opt->opt.srr)
571 daddr = inet_opt->opt.faddr;
572 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
573 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
574 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
575 inet_sk_flowi_flags(sk),
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900576 daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
David S. Miller4895c772012-07-17 04:19:00 -0700577 rcu_read_unlock();
578}
579
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200580static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
581 const struct sk_buff *skb)
David S. Miller4895c772012-07-17 04:19:00 -0700582{
583 if (skb)
584 build_skb_flow_key(fl4, skb, sk);
585 else
586 build_sk_flow_key(fl4, sk);
587}
588
David S. Millerc5038a82012-07-31 15:02:02 -0700589static DEFINE_SPINLOCK(fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700590
Timo Teräs2ffae992013-06-27 10:27:05 +0300591static void fnhe_flush_routes(struct fib_nh_exception *fnhe)
592{
593 struct rtable *rt;
594
595 rt = rcu_dereference(fnhe->fnhe_rth_input);
596 if (rt) {
597 RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL);
Wei Wang95c47f92017-06-17 10:42:30 -0700598 dst_dev_put(&rt->dst);
Wei Wang08301062017-06-17 10:42:29 -0700599 dst_release(&rt->dst);
Timo Teräs2ffae992013-06-27 10:27:05 +0300600 }
601 rt = rcu_dereference(fnhe->fnhe_rth_output);
602 if (rt) {
603 RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL);
Wei Wang95c47f92017-06-17 10:42:30 -0700604 dst_dev_put(&rt->dst);
Wei Wang08301062017-06-17 10:42:29 -0700605 dst_release(&rt->dst);
Timo Teräs2ffae992013-06-27 10:27:05 +0300606 }
607}
608
Julian Anastasovaee06da2012-07-18 10:15:35 +0000609static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
David S. Miller4895c772012-07-17 04:19:00 -0700610{
611 struct fib_nh_exception *fnhe, *oldest;
612
613 oldest = rcu_dereference(hash->chain);
614 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
615 fnhe = rcu_dereference(fnhe->fnhe_next)) {
616 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
617 oldest = fnhe;
618 }
Timo Teräs2ffae992013-06-27 10:27:05 +0300619 fnhe_flush_routes(oldest);
David S. Miller4895c772012-07-17 04:19:00 -0700620 return oldest;
621}
622
David S. Millerd3a25c92012-07-17 13:23:08 -0700623static inline u32 fnhe_hashfun(__be32 daddr)
624{
Eric Dumazetd546c622014-09-04 08:21:31 -0700625 static u32 fnhe_hashrnd __read_mostly;
David S. Millerd3a25c92012-07-17 13:23:08 -0700626 u32 hval;
627
Eric Dumazetd546c622014-09-04 08:21:31 -0700628 net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd));
629 hval = jhash_1word((__force u32) daddr, fnhe_hashrnd);
630 return hash_32(hval, FNHE_HASH_SHIFT);
David S. Millerd3a25c92012-07-17 13:23:08 -0700631}
632
Timo Teräs387aa652013-05-27 20:46:31 +0000633static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
634{
635 rt->rt_pmtu = fnhe->fnhe_pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100636 rt->rt_mtu_locked = fnhe->fnhe_mtu_locked;
Timo Teräs387aa652013-05-27 20:46:31 +0000637 rt->dst.expires = fnhe->fnhe_expires;
638
639 if (fnhe->fnhe_gw) {
640 rt->rt_flags |= RTCF_REDIRECTED;
David Ahern1550c172019-04-05 16:30:27 -0700641 rt->rt_gw_family = AF_INET;
642 rt->rt_gw4 = fnhe->fnhe_gw;
Timo Teräs387aa652013-05-27 20:46:31 +0000643 }
644}
645
Julian Anastasovaee06da2012-07-18 10:15:35 +0000646static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100647 u32 pmtu, bool lock, unsigned long expires)
David S. Miller4895c772012-07-17 04:19:00 -0700648{
Julian Anastasovaee06da2012-07-18 10:15:35 +0000649 struct fnhe_hash_bucket *hash;
David S. Miller4895c772012-07-17 04:19:00 -0700650 struct fib_nh_exception *fnhe;
Timo Teräs387aa652013-05-27 20:46:31 +0000651 struct rtable *rt;
Xin Longcebe84c2017-11-17 14:27:18 +0800652 u32 genid, hval;
Timo Teräs387aa652013-05-27 20:46:31 +0000653 unsigned int i;
David S. Miller4895c772012-07-17 04:19:00 -0700654 int depth;
Xin Longcebe84c2017-11-17 14:27:18 +0800655
David Ahernb75ed8b2019-03-27 20:53:55 -0700656 genid = fnhe_genid(dev_net(nh->fib_nh_dev));
Xin Longcebe84c2017-11-17 14:27:18 +0800657 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -0700658
David S. Millerc5038a82012-07-31 15:02:02 -0700659 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000660
Eric Dumazetcaa41522014-09-03 22:21:56 -0700661 hash = rcu_dereference(nh->nh_exceptions);
David S. Miller4895c772012-07-17 04:19:00 -0700662 if (!hash) {
Kees Cook6396bb22018-06-12 14:03:40 -0700663 hash = kcalloc(FNHE_HASH_SIZE, sizeof(*hash), GFP_ATOMIC);
David S. Miller4895c772012-07-17 04:19:00 -0700664 if (!hash)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000665 goto out_unlock;
Eric Dumazetcaa41522014-09-03 22:21:56 -0700666 rcu_assign_pointer(nh->nh_exceptions, hash);
David S. Miller4895c772012-07-17 04:19:00 -0700667 }
668
David S. Miller4895c772012-07-17 04:19:00 -0700669 hash += hval;
670
671 depth = 0;
672 for (fnhe = rcu_dereference(hash->chain); fnhe;
673 fnhe = rcu_dereference(fnhe->fnhe_next)) {
674 if (fnhe->fnhe_daddr == daddr)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000675 break;
David S. Miller4895c772012-07-17 04:19:00 -0700676 depth++;
677 }
678
Julian Anastasovaee06da2012-07-18 10:15:35 +0000679 if (fnhe) {
Xin Longcebe84c2017-11-17 14:27:18 +0800680 if (fnhe->fnhe_genid != genid)
681 fnhe->fnhe_genid = genid;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000682 if (gw)
683 fnhe->fnhe_gw = gw;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100684 if (pmtu) {
Julian Anastasovaee06da2012-07-18 10:15:35 +0000685 fnhe->fnhe_pmtu = pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100686 fnhe->fnhe_mtu_locked = lock;
687 }
Xin Longe39d5242017-11-17 14:27:06 +0800688 fnhe->fnhe_expires = max(1UL, expires);
Timo Teräs387aa652013-05-27 20:46:31 +0000689 /* Update all cached dsts too */
Timo Teräs2ffae992013-06-27 10:27:05 +0300690 rt = rcu_dereference(fnhe->fnhe_rth_input);
691 if (rt)
692 fill_route_from_fnhe(rt, fnhe);
693 rt = rcu_dereference(fnhe->fnhe_rth_output);
Timo Teräs387aa652013-05-27 20:46:31 +0000694 if (rt)
695 fill_route_from_fnhe(rt, fnhe);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000696 } else {
697 if (depth > FNHE_RECLAIM_DEPTH)
698 fnhe = fnhe_oldest(hash);
699 else {
700 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
701 if (!fnhe)
702 goto out_unlock;
703
704 fnhe->fnhe_next = hash->chain;
705 rcu_assign_pointer(hash->chain, fnhe);
706 }
Xin Longcebe84c2017-11-17 14:27:18 +0800707 fnhe->fnhe_genid = genid;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000708 fnhe->fnhe_daddr = daddr;
709 fnhe->fnhe_gw = gw;
710 fnhe->fnhe_pmtu = pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100711 fnhe->fnhe_mtu_locked = lock;
Julian Anastasov94720e32018-05-02 09:41:19 +0300712 fnhe->fnhe_expires = max(1UL, expires);
Timo Teräs387aa652013-05-27 20:46:31 +0000713
714 /* Exception created; mark the cached routes for the nexthop
715 * stale, so anyone caching it rechecks if this exception
716 * applies to them.
717 */
Timo Teräs2ffae992013-06-27 10:27:05 +0300718 rt = rcu_dereference(nh->nh_rth_input);
719 if (rt)
720 rt->dst.obsolete = DST_OBSOLETE_KILL;
721
Timo Teräs387aa652013-05-27 20:46:31 +0000722 for_each_possible_cpu(i) {
723 struct rtable __rcu **prt;
724 prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i);
725 rt = rcu_dereference(*prt);
726 if (rt)
727 rt->dst.obsolete = DST_OBSOLETE_KILL;
728 }
David S. Miller4895c772012-07-17 04:19:00 -0700729 }
David S. Miller4895c772012-07-17 04:19:00 -0700730
David S. Miller4895c772012-07-17 04:19:00 -0700731 fnhe->fnhe_stamp = jiffies;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000732
733out_unlock:
David S. Millerc5038a82012-07-31 15:02:02 -0700734 spin_unlock_bh(&fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700735}
736
David S. Millerceb33202012-07-17 11:31:28 -0700737static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
738 bool kill_route)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739{
David S. Millere47a1852012-07-11 20:55:47 -0700740 __be32 new_gw = icmp_hdr(skb)->un.gateway;
David S. Miller94206122012-07-11 20:38:08 -0700741 __be32 old_gw = ip_hdr(skb)->saddr;
David S. Millere47a1852012-07-11 20:55:47 -0700742 struct net_device *dev = skb->dev;
David S. Millere47a1852012-07-11 20:55:47 -0700743 struct in_device *in_dev;
David S. Miller4895c772012-07-17 04:19:00 -0700744 struct fib_result res;
David S. Millere47a1852012-07-11 20:55:47 -0700745 struct neighbour *n;
Denis V. Lunev317805b2008-02-28 20:50:06 -0800746 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747
David S. Miller94206122012-07-11 20:38:08 -0700748 switch (icmp_hdr(skb)->code & 7) {
749 case ICMP_REDIR_NET:
750 case ICMP_REDIR_NETTOS:
751 case ICMP_REDIR_HOST:
752 case ICMP_REDIR_HOSTTOS:
753 break;
754
755 default:
756 return;
757 }
758
David Ahern1550c172019-04-05 16:30:27 -0700759 if (rt->rt_gw_family != AF_INET || rt->rt_gw4 != old_gw)
David S. Millere47a1852012-07-11 20:55:47 -0700760 return;
761
762 in_dev = __in_dev_get_rcu(dev);
763 if (!in_dev)
764 return;
765
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900766 net = dev_net(dev);
Joe Perches9d4fb272009-11-23 10:41:23 -0800767 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
768 ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
769 ipv4_is_zeronet(new_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770 goto reject_redirect;
771
772 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
773 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
774 goto reject_redirect;
775 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
776 goto reject_redirect;
777 } else {
Denis V. Lunev317805b2008-02-28 20:50:06 -0800778 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779 goto reject_redirect;
780 }
781
Stephen Suryaputra Lin969447f2016-11-10 11:16:15 -0500782 n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
783 if (!n)
784 n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
WANG Cong2c1a4312014-09-24 17:07:53 -0700785 if (!IS_ERR(n)) {
David S. Millere47a1852012-07-11 20:55:47 -0700786 if (!(n->nud_state & NUD_VALID)) {
787 neigh_event_send(n, NULL);
788 } else {
Andy Gospodarek0eeb0752015-06-23 13:45:37 -0400789 if (fib_lookup(net, fl4, &res, 0) == 0) {
David Aherneba618a2019-04-02 14:11:55 -0700790 struct fib_nh_common *nhc = FIB_RES_NHC(res);
791 struct fib_nh *nh;
David S. Miller4895c772012-07-17 04:19:00 -0700792
David Aherneba618a2019-04-02 14:11:55 -0700793 nh = container_of(nhc, struct fib_nh, nh_common);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000794 update_or_create_fnhe(nh, fl4->daddr, new_gw,
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100795 0, false,
796 jiffies + ip_rt_gc_timeout);
David S. Miller4895c772012-07-17 04:19:00 -0700797 }
David S. Millerceb33202012-07-17 11:31:28 -0700798 if (kill_route)
799 rt->dst.obsolete = DST_OBSOLETE_KILL;
David S. Millere47a1852012-07-11 20:55:47 -0700800 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
801 }
802 neigh_release(n);
803 }
804 return;
805
806reject_redirect:
807#ifdef CONFIG_IP_ROUTE_VERBOSE
David S. Miller99ee0382012-07-12 07:40:05 -0700808 if (IN_DEV_LOG_MARTIANS(in_dev)) {
809 const struct iphdr *iph = (const struct iphdr *) skb->data;
810 __be32 daddr = iph->daddr;
811 __be32 saddr = iph->saddr;
812
David S. Millere47a1852012-07-11 20:55:47 -0700813 net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
814 " Advised path = %pI4 -> %pI4\n",
815 &old_gw, dev->name, &new_gw,
816 &saddr, &daddr);
David S. Miller99ee0382012-07-12 07:40:05 -0700817 }
David S. Millere47a1852012-07-11 20:55:47 -0700818#endif
819 ;
820}
821
David S. Miller4895c772012-07-17 04:19:00 -0700822static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
823{
824 struct rtable *rt;
825 struct flowi4 fl4;
Michal Kubecekf96ef982013-05-28 08:26:49 +0200826 const struct iphdr *iph = (const struct iphdr *) skb->data;
Lorenzo Colitti7d995692016-12-23 00:33:57 +0900827 struct net *net = dev_net(skb->dev);
Michal Kubecekf96ef982013-05-28 08:26:49 +0200828 int oif = skb->dev->ifindex;
829 u8 tos = RT_TOS(iph->tos);
830 u8 prot = iph->protocol;
831 u32 mark = skb->mark;
David S. Miller4895c772012-07-17 04:19:00 -0700832
833 rt = (struct rtable *) dst;
834
Lorenzo Colitti7d995692016-12-23 00:33:57 +0900835 __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
David S. Millerceb33202012-07-17 11:31:28 -0700836 __ip_do_redirect(rt, skb, &fl4, true);
David S. Miller4895c772012-07-17 04:19:00 -0700837}
838
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
840{
Eric Dumazetee6b9672008-03-05 18:30:47 -0800841 struct rtable *rt = (struct rtable *)dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700842 struct dst_entry *ret = dst;
843
844 if (rt) {
Timo Teräsd11a4dc2010-03-18 23:20:20 +0000845 if (dst->obsolete > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846 ip_rt_put(rt);
847 ret = NULL;
David S. Miller59436342012-07-10 06:58:42 -0700848 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
849 rt->dst.expires) {
David S. Miller89aef892012-07-17 11:00:09 -0700850 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700851 ret = NULL;
852 }
853 }
854 return ret;
855}
856
857/*
858 * Algorithm:
859 * 1. The first ip_rt_redirect_number redirects are sent
860 * with exponential backoff, then we stop sending them at all,
861 * assuming that the host ignores our redirects.
862 * 2. If we did not see packets requiring redirects
863 * during ip_rt_redirect_silence, we assume that the host
864 * forgot redirected route and start to send redirects again.
865 *
866 * This algorithm is much cheaper and more intelligent than dumb load limiting
867 * in icmp.c.
868 *
869 * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
870 * and "frag. need" (breaks PMTU discovery) in icmp.c.
871 */
872
873void ip_rt_send_redirect(struct sk_buff *skb)
874{
Eric Dumazet511c3f92009-06-02 05:14:27 +0000875 struct rtable *rt = skb_rtable(skb);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700876 struct in_device *in_dev;
David S. Miller92d86822011-02-04 15:55:25 -0800877 struct inet_peer *peer;
David S. Miller1d861aa2012-07-10 03:58:16 -0700878 struct net *net;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700879 int log_martians;
David Ahern192132b2015-08-27 16:07:03 -0700880 int vif;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700881
Eric Dumazet30038fc2009-08-28 23:52:01 -0700882 rcu_read_lock();
Changli Gaod8d1f302010-06-10 23:31:35 -0700883 in_dev = __in_dev_get_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700884 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
885 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886 return;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700887 }
888 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
David Ahern385add92015-09-29 20:07:13 -0700889 vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700890 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700891
David S. Miller1d861aa2012-07-10 03:58:16 -0700892 net = dev_net(rt->dst.dev);
David Ahern192132b2015-08-27 16:07:03 -0700893 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800894 if (!peer) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000895 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
896 rt_nexthop(rt, ip_hdr(skb)->daddr));
David S. Miller92d86822011-02-04 15:55:25 -0800897 return;
898 }
899
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900 /* No redirected packets during ip_rt_redirect_silence;
901 * reset the algorithm.
902 */
Lorenzo Bianconic09551c62019-02-06 19:18:04 +0100903 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) {
David S. Miller92d86822011-02-04 15:55:25 -0800904 peer->rate_tokens = 0;
Lorenzo Bianconic09551c62019-02-06 19:18:04 +0100905 peer->n_redirects = 0;
906 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907
908 /* Too many ignored redirects; do not send anything
Changli Gaod8d1f302010-06-10 23:31:35 -0700909 * set dst.rate_last to the last seen redirected packet.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910 */
Lorenzo Bianconic09551c62019-02-06 19:18:04 +0100911 if (peer->n_redirects >= ip_rt_redirect_number) {
David S. Miller92d86822011-02-04 15:55:25 -0800912 peer->rate_last = jiffies;
David S. Miller1d861aa2012-07-10 03:58:16 -0700913 goto out_put_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700914 }
915
916 /* Check for load limit; set rate_last to the latest sent
917 * redirect.
918 */
David S. Miller92d86822011-02-04 15:55:25 -0800919 if (peer->rate_tokens == 0 ||
Li Yewang14fb8a72006-12-18 00:26:35 -0800920 time_after(jiffies,
David S. Miller92d86822011-02-04 15:55:25 -0800921 (peer->rate_last +
922 (ip_rt_redirect_load << peer->rate_tokens)))) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000923 __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
924
925 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
David S. Miller92d86822011-02-04 15:55:25 -0800926 peer->rate_last = jiffies;
927 ++peer->rate_tokens;
Lorenzo Bianconic09551c62019-02-06 19:18:04 +0100928 ++peer->n_redirects;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929#ifdef CONFIG_IP_ROUTE_VERBOSE
Eric Dumazet30038fc2009-08-28 23:52:01 -0700930 if (log_martians &&
Joe Perchese87cc472012-05-13 21:56:26 +0000931 peer->rate_tokens == ip_rt_redirect_number)
932 net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
David S. Miller92101b32012-07-23 16:29:00 -0700933 &ip_hdr(skb)->saddr, inet_iif(skb),
Julian Anastasove81da0e2012-10-08 11:41:15 +0000934 &ip_hdr(skb)->daddr, &gw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935#endif
936 }
David S. Miller1d861aa2012-07-10 03:58:16 -0700937out_put_peer:
938 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939}
940
941static int ip_error(struct sk_buff *skb)
942{
Eric Dumazet511c3f92009-06-02 05:14:27 +0000943 struct rtable *rt = skb_rtable(skb);
Stephen Suryaputrae2c0dc12018-02-28 12:20:44 -0500944 struct net_device *dev = skb->dev;
945 struct in_device *in_dev;
David S. Miller92d86822011-02-04 15:55:25 -0800946 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700947 unsigned long now;
David S. Miller251da412012-06-26 16:27:09 -0700948 struct net *net;
David S. Miller92d86822011-02-04 15:55:25 -0800949 bool send;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950 int code;
951
Stephen Suryaputrae2c0dc12018-02-28 12:20:44 -0500952 if (netif_is_l3_master(skb->dev)) {
953 dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif);
954 if (!dev)
955 goto out;
956 }
957
958 in_dev = __in_dev_get_rcu(dev);
959
Eric W. Biederman381c7592015-05-22 04:58:12 -0500960 /* IP on this device is disabled. */
961 if (!in_dev)
962 goto out;
963
David S. Miller251da412012-06-26 16:27:09 -0700964 net = dev_net(rt->dst.dev);
965 if (!IN_DEV_FORWARD(in_dev)) {
966 switch (rt->dst.error) {
967 case EHOSTUNREACH:
Eric Dumazetb45386e2016-04-27 16:44:35 -0700968 __IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS);
David S. Miller251da412012-06-26 16:27:09 -0700969 break;
970
971 case ENETUNREACH:
Eric Dumazetb45386e2016-04-27 16:44:35 -0700972 __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
David S. Miller251da412012-06-26 16:27:09 -0700973 break;
974 }
975 goto out;
976 }
977
Changli Gaod8d1f302010-06-10 23:31:35 -0700978 switch (rt->dst.error) {
Joe Perches4500ebf2011-07-01 09:43:07 +0000979 case EINVAL:
980 default:
981 goto out;
982 case EHOSTUNREACH:
983 code = ICMP_HOST_UNREACH;
984 break;
985 case ENETUNREACH:
986 code = ICMP_NET_UNREACH;
Eric Dumazetb45386e2016-04-27 16:44:35 -0700987 __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
Joe Perches4500ebf2011-07-01 09:43:07 +0000988 break;
989 case EACCES:
990 code = ICMP_PKT_FILTERED;
991 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992 }
993
David Ahern192132b2015-08-27 16:07:03 -0700994 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
David Ahern385add92015-09-29 20:07:13 -0700995 l3mdev_master_ifindex(skb->dev), 1);
David S. Miller92d86822011-02-04 15:55:25 -0800996
997 send = true;
998 if (peer) {
999 now = jiffies;
1000 peer->rate_tokens += now - peer->rate_last;
1001 if (peer->rate_tokens > ip_rt_error_burst)
1002 peer->rate_tokens = ip_rt_error_burst;
1003 peer->rate_last = now;
1004 if (peer->rate_tokens >= ip_rt_error_cost)
1005 peer->rate_tokens -= ip_rt_error_cost;
1006 else
1007 send = false;
David S. Miller1d861aa2012-07-10 03:58:16 -07001008 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009 }
David S. Miller92d86822011-02-04 15:55:25 -08001010 if (send)
1011 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012
1013out: kfree_skb(skb);
1014 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001015}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016
Steffen Klassertd851c122012-10-07 22:47:25 +00001017static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001018{
Steffen Klassertd851c122012-10-07 22:47:25 +00001019 struct dst_entry *dst = &rt->dst;
Sabrina Dubroca28d35bc2018-10-09 17:48:15 +02001020 u32 old_mtu = ipv4_mtu(dst);
David S. Miller4895c772012-07-17 04:19:00 -07001021 struct fib_result res;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001022 bool lock = false;
David S. Miller2c8cec52011-02-09 20:42:07 -08001023
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001024 if (ip_mtu_locked(dst))
Steffen Klassertfa1e4922013-01-16 20:58:10 +00001025 return;
1026
Sabrina Dubroca28d35bc2018-10-09 17:48:15 +02001027 if (old_mtu < mtu)
Li Wei3cdaa5b2015-01-29 16:09:03 +08001028 return;
1029
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001030 if (mtu < ip_rt_min_pmtu) {
1031 lock = true;
Sabrina Dubroca28d35bc2018-10-09 17:48:15 +02001032 mtu = min(old_mtu, ip_rt_min_pmtu);
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001033 }
Eric Dumazetfe6fe792011-06-08 06:07:07 +00001034
Sabrina Dubroca28d35bc2018-10-09 17:48:15 +02001035 if (rt->rt_pmtu == mtu && !lock &&
Timo Teräsf0162292013-05-27 20:46:32 +00001036 time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
1037 return;
1038
Eric Dumazetc5ae7d42012-08-28 12:33:07 +00001039 rcu_read_lock();
Andy Gospodarek0eeb0752015-06-23 13:45:37 -04001040 if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
David Aherneba618a2019-04-02 14:11:55 -07001041 struct fib_nh_common *nhc = FIB_RES_NHC(res);
1042 struct fib_nh *nh;
David S. Miller4895c772012-07-17 04:19:00 -07001043
David Aherneba618a2019-04-02 14:11:55 -07001044 nh = container_of(nhc, struct fib_nh, nh_common);
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001045 update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock,
Julian Anastasovaee06da2012-07-18 10:15:35 +00001046 jiffies + ip_rt_mtu_expires);
David S. Miller4895c772012-07-17 04:19:00 -07001047 }
Eric Dumazetc5ae7d42012-08-28 12:33:07 +00001048 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001049}
1050
David S. Miller4895c772012-07-17 04:19:00 -07001051static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1052 struct sk_buff *skb, u32 mtu)
1053{
1054 struct rtable *rt = (struct rtable *) dst;
1055 struct flowi4 fl4;
1056
1057 ip_rt_build_flow_key(&fl4, sk, skb);
Steffen Klassertd851c122012-10-07 22:47:25 +00001058 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller4895c772012-07-17 04:19:00 -07001059}
1060
David S. Miller36393392012-06-14 22:21:46 -07001061void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
Maciej Żenczykowskid888f392018-09-25 20:56:26 -07001062 int oif, u8 protocol)
David S. Miller36393392012-06-14 22:21:46 -07001063{
David S. Miller4895c772012-07-17 04:19:00 -07001064 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Miller36393392012-06-14 22:21:46 -07001065 struct flowi4 fl4;
1066 struct rtable *rt;
Maciej Żenczykowskid888f392018-09-25 20:56:26 -07001067 u32 mark = IP4_REPLY_MARK(net, skb->mark);
Lorenzo Colitti1b3c61d2014-05-13 10:17:34 -07001068
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001069 __build_flow_key(net, &fl4, NULL, iph, oif,
Maciej Żenczykowskid888f392018-09-25 20:56:26 -07001070 RT_TOS(iph->tos), protocol, mark, 0);
David S. Miller36393392012-06-14 22:21:46 -07001071 rt = __ip_route_output_key(net, &fl4);
1072 if (!IS_ERR(rt)) {
David S. Miller4895c772012-07-17 04:19:00 -07001073 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller36393392012-06-14 22:21:46 -07001074 ip_rt_put(rt);
1075 }
1076}
1077EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
1078
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001079static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
David S. Miller36393392012-06-14 22:21:46 -07001080{
David S. Miller4895c772012-07-17 04:19:00 -07001081 const struct iphdr *iph = (const struct iphdr *) skb->data;
1082 struct flowi4 fl4;
1083 struct rtable *rt;
David S. Miller36393392012-06-14 22:21:46 -07001084
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001085 __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0);
Lorenzo Colitti1b3c61d2014-05-13 10:17:34 -07001086
1087 if (!fl4.flowi4_mark)
1088 fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
1089
David S. Miller4895c772012-07-17 04:19:00 -07001090 rt = __ip_route_output_key(sock_net(sk), &fl4);
1091 if (!IS_ERR(rt)) {
1092 __ip_rt_update_pmtu(rt, &fl4, mtu);
1093 ip_rt_put(rt);
1094 }
David S. Miller36393392012-06-14 22:21:46 -07001095}
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001096
1097void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1098{
1099 const struct iphdr *iph = (const struct iphdr *) skb->data;
1100 struct flowi4 fl4;
1101 struct rtable *rt;
Eric Dumazet7f502362014-06-30 01:26:23 -07001102 struct dst_entry *odst = NULL;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001103 bool new = false;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001104 struct net *net = sock_net(sk);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001105
1106 bh_lock_sock(sk);
Hannes Frederic Sowa482fc602013-11-05 02:24:17 +01001107
1108 if (!ip_sk_accept_pmtu(sk))
1109 goto out;
1110
Eric Dumazet7f502362014-06-30 01:26:23 -07001111 odst = sk_dst_get(sk);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001112
Eric Dumazet7f502362014-06-30 01:26:23 -07001113 if (sock_owned_by_user(sk) || !odst) {
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001114 __ipv4_sk_update_pmtu(skb, sk, mtu);
1115 goto out;
1116 }
1117
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001118 __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001119
Eric Dumazet7f502362014-06-30 01:26:23 -07001120 rt = (struct rtable *)odst;
Ian Morris51456b22015-04-03 09:17:26 +01001121 if (odst->obsolete && !odst->ops->check(odst, 0)) {
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001122 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1123 if (IS_ERR(rt))
1124 goto out;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001125
1126 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001127 }
1128
David Miller0f6c4802017-11-28 15:40:46 -05001129 __ip_rt_update_pmtu((struct rtable *) xfrm_dst_path(&rt->dst), &fl4, mtu);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001130
Eric Dumazet7f502362014-06-30 01:26:23 -07001131 if (!dst_check(&rt->dst, 0)) {
Steffen Klassertb44108d2013-01-22 00:01:28 +00001132 if (new)
1133 dst_release(&rt->dst);
1134
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001135 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1136 if (IS_ERR(rt))
1137 goto out;
1138
Steffen Klassertb44108d2013-01-22 00:01:28 +00001139 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001140 }
1141
Steffen Klassertb44108d2013-01-22 00:01:28 +00001142 if (new)
Eric Dumazet7f502362014-06-30 01:26:23 -07001143 sk_dst_set(sk, &rt->dst);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001144
1145out:
1146 bh_unlock_sock(sk);
Eric Dumazet7f502362014-06-30 01:26:23 -07001147 dst_release(odst);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001148}
David S. Miller36393392012-06-14 22:21:46 -07001149EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
David S. Millerf39925d2011-02-09 22:00:16 -08001150
David S. Millerb42597e2012-07-11 21:25:45 -07001151void ipv4_redirect(struct sk_buff *skb, struct net *net,
Maciej Żenczykowski1042caa2018-09-25 20:56:27 -07001152 int oif, u8 protocol)
David S. Millerb42597e2012-07-11 21:25:45 -07001153{
David S. Miller4895c772012-07-17 04:19:00 -07001154 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Millerb42597e2012-07-11 21:25:45 -07001155 struct flowi4 fl4;
1156 struct rtable *rt;
1157
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001158 __build_flow_key(net, &fl4, NULL, iph, oif,
Maciej Żenczykowski1042caa2018-09-25 20:56:27 -07001159 RT_TOS(iph->tos), protocol, 0, 0);
David S. Millerb42597e2012-07-11 21:25:45 -07001160 rt = __ip_route_output_key(net, &fl4);
1161 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001162 __ip_do_redirect(rt, skb, &fl4, false);
David S. Millerb42597e2012-07-11 21:25:45 -07001163 ip_rt_put(rt);
1164 }
1165}
1166EXPORT_SYMBOL_GPL(ipv4_redirect);
1167
1168void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
1169{
David S. Miller4895c772012-07-17 04:19:00 -07001170 const struct iphdr *iph = (const struct iphdr *) skb->data;
1171 struct flowi4 fl4;
1172 struct rtable *rt;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001173 struct net *net = sock_net(sk);
David S. Millerb42597e2012-07-11 21:25:45 -07001174
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001175 __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
1176 rt = __ip_route_output_key(net, &fl4);
David S. Miller4895c772012-07-17 04:19:00 -07001177 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001178 __ip_do_redirect(rt, skb, &fl4, false);
David S. Miller4895c772012-07-17 04:19:00 -07001179 ip_rt_put(rt);
1180 }
David S. Millerb42597e2012-07-11 21:25:45 -07001181}
1182EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1183
David S. Millerefbc368d2011-12-01 13:38:59 -05001184static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1185{
1186 struct rtable *rt = (struct rtable *) dst;
1187
David S. Millerceb33202012-07-17 11:31:28 -07001188 /* All IPV4 dsts are created with ->obsolete set to the value
1189 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1190 * into this function always.
1191 *
Timo Teräs387aa652013-05-27 20:46:31 +00001192 * When a PMTU/redirect information update invalidates a route,
1193 * this is indicated by setting obsolete to DST_OBSOLETE_KILL or
Julian Wiedmann02afc7a2019-03-20 20:02:56 +01001194 * DST_OBSOLETE_DEAD.
David S. Millerceb33202012-07-17 11:31:28 -07001195 */
Timo Teräs387aa652013-05-27 20:46:31 +00001196 if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
David S. Millerefbc368d2011-12-01 13:38:59 -05001197 return NULL;
Timo Teräsd11a4dc2010-03-18 23:20:20 +00001198 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001199}
1200
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201static void ipv4_link_failure(struct sk_buff *skb)
1202{
1203 struct rtable *rt;
1204
1205 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1206
Eric Dumazet511c3f92009-06-02 05:14:27 +00001207 rt = skb_rtable(skb);
David S. Miller59436342012-07-10 06:58:42 -07001208 if (rt)
1209 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001210}
1211
Eric W. Biedermanede20592015-10-07 16:48:47 -05001212static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001213{
Joe Perches91df42b2012-05-15 14:11:54 +00001214 pr_debug("%s: %pI4 -> %pI4, %s\n",
1215 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1216 skb->dev ? skb->dev->name : "?");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001217 kfree_skb(skb);
Dave Jonesc378a9c2011-05-21 07:16:42 +00001218 WARN_ON(1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219 return 0;
1220}
1221
1222/*
1223 We do not cache source address of outgoing interface,
1224 because it is used only by IP RR, TS and SRR options,
1225 so that it out of fast path.
1226
1227 BTW remember: "addr" is allowed to be not aligned
1228 in IP options!
1229 */
1230
David S. Miller8e363602011-05-13 17:29:41 -04001231void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001232{
Al Viroa61ced52006-09-26 21:27:54 -07001233 __be32 src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234
David S. Millerc7537962010-11-11 17:07:48 -08001235 if (rt_is_output_route(rt))
David S. Millerc5be24f2011-05-13 18:01:21 -04001236 src = ip_hdr(skb)->saddr;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001237 else {
David S. Miller8e363602011-05-13 17:29:41 -04001238 struct fib_result res;
Maciej Żenczykowskie351bb62018-09-29 23:44:46 -07001239 struct iphdr *iph = ip_hdr(skb);
1240 struct flowi4 fl4 = {
1241 .daddr = iph->daddr,
1242 .saddr = iph->saddr,
1243 .flowi4_tos = RT_TOS(iph->tos),
1244 .flowi4_oif = rt->dst.dev->ifindex,
1245 .flowi4_iif = skb->dev->ifindex,
1246 .flowi4_mark = skb->mark,
1247 };
David S. Miller5e2b61f2011-03-04 21:47:09 -08001248
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001249 rcu_read_lock();
Andy Gospodarek0eeb0752015-06-23 13:45:37 -04001250 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0)
David Aherneba618a2019-04-02 14:11:55 -07001251 src = fib_result_prefsrc(dev_net(rt->dst.dev), &res);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001252 else
David S. Millerf8126f12012-07-13 05:03:45 -07001253 src = inet_select_addr(rt->dst.dev,
1254 rt_nexthop(rt, iph->daddr),
1255 RT_SCOPE_UNIVERSE);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001256 rcu_read_unlock();
1257 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001258 memcpy(addr, &src, 4);
1259}
1260
Patrick McHardyc7066f72011-01-14 13:36:42 +01001261#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001262static void set_class_tag(struct rtable *rt, u32 tag)
1263{
Changli Gaod8d1f302010-06-10 23:31:35 -07001264 if (!(rt->dst.tclassid & 0xFFFF))
1265 rt->dst.tclassid |= tag & 0xFFFF;
1266 if (!(rt->dst.tclassid & 0xFFFF0000))
1267 rt->dst.tclassid |= tag & 0xFFFF0000;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001268}
1269#endif
1270
David S. Miller0dbaee32010-12-13 12:52:14 -08001271static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1272{
Gao Feng7ed14d92017-04-12 12:34:03 +08001273 unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
Eric Dumazet164a5e72017-10-18 17:02:03 -07001274 unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
Gao Feng7ed14d92017-04-12 12:34:03 +08001275 ip_rt_min_advmss);
David S. Miller0dbaee32010-12-13 12:52:14 -08001276
Gao Feng7ed14d92017-04-12 12:34:03 +08001277 return min(advmss, IPV4_MAX_PMTU - header_size);
David S. Miller0dbaee32010-12-13 12:52:14 -08001278}
1279
Steffen Klassertebb762f2011-11-23 02:12:51 +00001280static unsigned int ipv4_mtu(const struct dst_entry *dst)
David S. Millerd33e4552010-12-14 13:01:14 -08001281{
Steffen Klassert261663b2011-11-23 02:14:50 +00001282 const struct rtable *rt = (const struct rtable *) dst;
David S. Miller59436342012-07-10 06:58:42 -07001283 unsigned int mtu = rt->rt_pmtu;
1284
Alexander Duyck98d75c32012-08-27 06:30:01 +00001285 if (!mtu || time_after_eq(jiffies, rt->dst.expires))
David S. Miller59436342012-07-10 06:58:42 -07001286 mtu = dst_metric_raw(dst, RTAX_MTU);
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001287
Steffen Klassert38d523e2013-01-16 20:55:01 +00001288 if (mtu)
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001289 return mtu;
1290
Eric Dumazetc780a042017-08-16 11:09:12 -07001291 mtu = READ_ONCE(dst->dev->mtu);
David S. Millerd33e4552010-12-14 13:01:14 -08001292
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001293 if (unlikely(ip_mtu_locked(dst))) {
David Ahern1550c172019-04-05 16:30:27 -07001294 if (rt->rt_gw_family && mtu > 576)
David S. Millerd33e4552010-12-14 13:01:14 -08001295 mtu = 576;
1296 }
1297
Roopa Prabhu14972cb2016-08-24 20:10:43 -07001298 mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
1299
1300 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
David S. Millerd33e4552010-12-14 13:01:14 -08001301}
1302
Julian Anastasov94720e32018-05-02 09:41:19 +03001303static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
1304{
1305 struct fnhe_hash_bucket *hash;
1306 struct fib_nh_exception *fnhe, __rcu **fnhe_p;
1307 u32 hval = fnhe_hashfun(daddr);
1308
1309 spin_lock_bh(&fnhe_lock);
1310
1311 hash = rcu_dereference_protected(nh->nh_exceptions,
1312 lockdep_is_held(&fnhe_lock));
1313 hash += hval;
1314
1315 fnhe_p = &hash->chain;
1316 fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
1317 while (fnhe) {
1318 if (fnhe->fnhe_daddr == daddr) {
1319 rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
1320 fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
Xin Longee60ad22019-03-08 14:50:54 +08001321 /* set fnhe_daddr to 0 to ensure it won't bind with
1322 * new dsts in rt_bind_exception().
1323 */
1324 fnhe->fnhe_daddr = 0;
Julian Anastasov94720e32018-05-02 09:41:19 +03001325 fnhe_flush_routes(fnhe);
1326 kfree_rcu(fnhe, rcu);
1327 break;
1328 }
1329 fnhe_p = &fnhe->fnhe_next;
1330 fnhe = rcu_dereference_protected(fnhe->fnhe_next,
1331 lockdep_is_held(&fnhe_lock));
1332 }
1333
1334 spin_unlock_bh(&fnhe_lock);
1335}
1336
David S. Millerf2bb4be2012-07-17 12:20:47 -07001337static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
David S. Miller4895c772012-07-17 04:19:00 -07001338{
Eric Dumazetcaa41522014-09-03 22:21:56 -07001339 struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions);
David S. Miller4895c772012-07-17 04:19:00 -07001340 struct fib_nh_exception *fnhe;
1341 u32 hval;
1342
David S. Millerf2bb4be2012-07-17 12:20:47 -07001343 if (!hash)
1344 return NULL;
1345
David S. Millerd3a25c92012-07-17 13:23:08 -07001346 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -07001347
1348 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1349 fnhe = rcu_dereference(fnhe->fnhe_next)) {
Julian Anastasov94720e32018-05-02 09:41:19 +03001350 if (fnhe->fnhe_daddr == daddr) {
1351 if (fnhe->fnhe_expires &&
1352 time_after(jiffies, fnhe->fnhe_expires)) {
1353 ip_del_fnhe(nh, daddr);
1354 break;
1355 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001356 return fnhe;
Julian Anastasov94720e32018-05-02 09:41:19 +03001357 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001358 }
1359 return NULL;
1360}
David S. Miller4895c772012-07-17 04:19:00 -07001361
David Ahern50d889b2018-05-21 09:08:13 -07001362/* MTU selection:
1363 * 1. mtu on route is locked - use it
1364 * 2. mtu from nexthop exception
1365 * 3. mtu from egress device
1366 */
1367
1368u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
1369{
David Aherneba618a2019-04-02 14:11:55 -07001370 struct fib_nh_common *nhc = res->nhc;
1371 struct net_device *dev = nhc->nhc_dev;
David Ahern50d889b2018-05-21 09:08:13 -07001372 struct fib_info *fi = res->fi;
David Ahern50d889b2018-05-21 09:08:13 -07001373 u32 mtu = 0;
1374
1375 if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu ||
1376 fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU))
1377 mtu = fi->fib_mtu;
1378
1379 if (likely(!mtu)) {
David Aherneba618a2019-04-02 14:11:55 -07001380 struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common);
David Ahern50d889b2018-05-21 09:08:13 -07001381 struct fib_nh_exception *fnhe;
1382
1383 fnhe = find_exception(nh, daddr);
1384 if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires))
1385 mtu = fnhe->fnhe_pmtu;
1386 }
1387
1388 if (likely(!mtu))
1389 mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU);
1390
David Aherneba618a2019-04-02 14:11:55 -07001391 return mtu - lwtunnel_headroom(nhc->nhc_lwtstate, mtu);
David Ahern50d889b2018-05-21 09:08:13 -07001392}
1393
David S. Millercaacf052012-07-31 15:06:50 -07001394static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
Wei Wanga4c2fd72017-06-17 10:42:42 -07001395 __be32 daddr, const bool do_cache)
David S. Millerf2bb4be2012-07-17 12:20:47 -07001396{
David S. Millercaacf052012-07-31 15:06:50 -07001397 bool ret = false;
1398
David S. Millerc5038a82012-07-31 15:02:02 -07001399 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +00001400
David S. Millerc5038a82012-07-31 15:02:02 -07001401 if (daddr == fnhe->fnhe_daddr) {
Timo Teräs2ffae992013-06-27 10:27:05 +03001402 struct rtable __rcu **porig;
1403 struct rtable *orig;
Timo Teräs5aad1de2013-05-27 20:46:33 +00001404 int genid = fnhe_genid(dev_net(rt->dst.dev));
Timo Teräs2ffae992013-06-27 10:27:05 +03001405
1406 if (rt_is_input_route(rt))
1407 porig = &fnhe->fnhe_rth_input;
1408 else
1409 porig = &fnhe->fnhe_rth_output;
1410 orig = rcu_dereference(*porig);
Timo Teräs5aad1de2013-05-27 20:46:33 +00001411
1412 if (fnhe->fnhe_genid != genid) {
1413 fnhe->fnhe_genid = genid;
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001414 fnhe->fnhe_gw = 0;
1415 fnhe->fnhe_pmtu = 0;
1416 fnhe->fnhe_expires = 0;
Hangbin Liu0e8411e42018-05-09 18:06:44 +08001417 fnhe->fnhe_mtu_locked = false;
Timo Teräs2ffae992013-06-27 10:27:05 +03001418 fnhe_flush_routes(fnhe);
1419 orig = NULL;
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001420 }
Timo Teräs387aa652013-05-27 20:46:31 +00001421 fill_route_from_fnhe(rt, fnhe);
David Ahern1550c172019-04-05 16:30:27 -07001422 if (!rt->rt_gw4) {
1423 rt->rt_gw4 = daddr;
1424 rt->rt_gw_family = AF_INET;
1425 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001426
Wei Wanga4c2fd72017-06-17 10:42:42 -07001427 if (do_cache) {
Wei Wang08301062017-06-17 10:42:29 -07001428 dst_hold(&rt->dst);
Timo Teräs2ffae992013-06-27 10:27:05 +03001429 rcu_assign_pointer(*porig, rt);
Wei Wang08301062017-06-17 10:42:29 -07001430 if (orig) {
Wei Wang95c47f92017-06-17 10:42:30 -07001431 dst_dev_put(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001432 dst_release(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001433 }
Timo Teräs2ffae992013-06-27 10:27:05 +03001434 ret = true;
1435 }
David S. Millerc5038a82012-07-31 15:02:02 -07001436
1437 fnhe->fnhe_stamp = jiffies;
David S. Millerc5038a82012-07-31 15:02:02 -07001438 }
1439 spin_unlock_bh(&fnhe_lock);
David S. Millercaacf052012-07-31 15:06:50 -07001440
1441 return ret;
Eric Dumazet54764bb2012-07-31 01:08:23 +00001442}
1443
David S. Millercaacf052012-07-31 15:06:50 -07001444static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
David S. Millerf2bb4be2012-07-17 12:20:47 -07001445{
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001446 struct rtable *orig, *prev, **p;
David S. Millercaacf052012-07-31 15:06:50 -07001447 bool ret = true;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001448
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001449 if (rt_is_input_route(rt)) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001450 p = (struct rtable **)&nh->nh_rth_input;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001451 } else {
Christoph Lameter903ceff2014-08-17 12:30:35 -05001452 p = (struct rtable **)raw_cpu_ptr(nh->nh_pcpu_rth_output);
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001453 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001454 orig = *p;
1455
Wei Wang08301062017-06-17 10:42:29 -07001456 /* hold dst before doing cmpxchg() to avoid race condition
1457 * on this dst
1458 */
1459 dst_hold(&rt->dst);
David S. Millerf2bb4be2012-07-17 12:20:47 -07001460 prev = cmpxchg(p, orig, rt);
1461 if (prev == orig) {
Wei Wang08301062017-06-17 10:42:29 -07001462 if (orig) {
Wei Wang95c47f92017-06-17 10:42:30 -07001463 dst_dev_put(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001464 dst_release(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001465 }
1466 } else {
1467 dst_release(&rt->dst);
David S. Millercaacf052012-07-31 15:06:50 -07001468 ret = false;
Wei Wang08301062017-06-17 10:42:29 -07001469 }
David S. Millercaacf052012-07-31 15:06:50 -07001470
1471 return ret;
1472}
1473
Eric Dumazet5055c372015-01-14 15:17:06 -08001474struct uncached_list {
1475 spinlock_t lock;
1476 struct list_head head;
1477};
1478
1479static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
David S. Millercaacf052012-07-31 15:06:50 -07001480
Xin Long510c3212018-02-14 19:06:02 +08001481void rt_add_uncached_list(struct rtable *rt)
David S. Millercaacf052012-07-31 15:06:50 -07001482{
Eric Dumazet5055c372015-01-14 15:17:06 -08001483 struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list);
1484
1485 rt->rt_uncached_list = ul;
1486
1487 spin_lock_bh(&ul->lock);
1488 list_add_tail(&rt->rt_uncached, &ul->head);
1489 spin_unlock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001490}
1491
Xin Long510c3212018-02-14 19:06:02 +08001492void rt_del_uncached_list(struct rtable *rt)
David S. Millercaacf052012-07-31 15:06:50 -07001493{
Eric Dumazet78df76a2012-08-24 05:40:47 +00001494 if (!list_empty(&rt->rt_uncached)) {
Eric Dumazet5055c372015-01-14 15:17:06 -08001495 struct uncached_list *ul = rt->rt_uncached_list;
1496
1497 spin_lock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001498 list_del(&rt->rt_uncached);
Eric Dumazet5055c372015-01-14 15:17:06 -08001499 spin_unlock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001500 }
1501}
1502
Xin Long510c3212018-02-14 19:06:02 +08001503static void ipv4_dst_destroy(struct dst_entry *dst)
1504{
Xin Long510c3212018-02-14 19:06:02 +08001505 struct rtable *rt = (struct rtable *)dst;
1506
David Ahern1620a332018-10-04 20:07:54 -07001507 ip_dst_metrics_put(dst);
Xin Long510c3212018-02-14 19:06:02 +08001508 rt_del_uncached_list(rt);
1509}
1510
David S. Millercaacf052012-07-31 15:06:50 -07001511void rt_flush_dev(struct net_device *dev)
1512{
Eric Dumazet5055c372015-01-14 15:17:06 -08001513 struct net *net = dev_net(dev);
1514 struct rtable *rt;
1515 int cpu;
David S. Millercaacf052012-07-31 15:06:50 -07001516
Eric Dumazet5055c372015-01-14 15:17:06 -08001517 for_each_possible_cpu(cpu) {
1518 struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
1519
1520 spin_lock_bh(&ul->lock);
1521 list_for_each_entry(rt, &ul->head, rt_uncached) {
David S. Millercaacf052012-07-31 15:06:50 -07001522 if (rt->dst.dev != dev)
1523 continue;
1524 rt->dst.dev = net->loopback_dev;
1525 dev_hold(rt->dst.dev);
1526 dev_put(dev);
1527 }
Eric Dumazet5055c372015-01-14 15:17:06 -08001528 spin_unlock_bh(&ul->lock);
David S. Miller4895c772012-07-17 04:19:00 -07001529 }
1530}
1531
Eric Dumazet4331deb2012-07-25 05:11:23 +00001532static bool rt_cache_valid(const struct rtable *rt)
David S. Millerd2d68ba92012-07-17 12:58:50 -07001533{
Eric Dumazet4331deb2012-07-25 05:11:23 +00001534 return rt &&
1535 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1536 !rt_is_expired(rt);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001537}
1538
David S. Millerf2bb4be2012-07-17 12:20:47 -07001539static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
David S. Miller5e2b61f2011-03-04 21:47:09 -08001540 const struct fib_result *res,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001541 struct fib_nh_exception *fnhe,
Wei Wanga4c2fd72017-06-17 10:42:42 -07001542 struct fib_info *fi, u16 type, u32 itag,
1543 const bool do_cache)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001544{
David S. Millercaacf052012-07-31 15:06:50 -07001545 bool cached = false;
1546
Linus Torvalds1da177e2005-04-16 15:20:36 -07001547 if (fi) {
David Aherneba618a2019-04-02 14:11:55 -07001548 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
David Ahern0f5f7d72019-04-05 16:30:29 -07001549 struct fib_nh *nh;
David S. Miller4895c772012-07-17 04:19:00 -07001550
David Ahern0f5f7d72019-04-05 16:30:29 -07001551 if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) {
1552 rt->rt_gw_family = nhc->nhc_gw_family;
1553 /* only INET and INET6 are supported */
1554 if (likely(nhc->nhc_gw_family == AF_INET))
1555 rt->rt_gw4 = nhc->nhc_gw.ipv4;
1556 else
1557 rt->rt_gw6 = nhc->nhc_gw.ipv6;
Julian Anastasov155e8332012-10-08 11:41:18 +00001558 }
David Ahern0f5f7d72019-04-05 16:30:29 -07001559
David Aherne1255ed2018-10-04 20:07:53 -07001560 ip_dst_init_metrics(&rt->dst, fi->fib_metrics);
1561
David Ahern0f5f7d72019-04-05 16:30:29 -07001562 nh = container_of(nhc, struct fib_nh, nh_common);
Patrick McHardyc7066f72011-01-14 13:36:42 +01001563#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf2bb4be2012-07-17 12:20:47 -07001564 rt->dst.tclassid = nh->nh_tclassid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001565#endif
David Ahernb75ed8b2019-03-27 20:53:55 -07001566 rt->dst.lwtstate = lwtstate_get(nh->fib_nh_lws);
David S. Millerc5038a82012-07-31 15:02:02 -07001567 if (unlikely(fnhe))
Wei Wanga4c2fd72017-06-17 10:42:42 -07001568 cached = rt_bind_exception(rt, fnhe, daddr, do_cache);
1569 else if (do_cache)
David S. Millercaacf052012-07-31 15:06:50 -07001570 cached = rt_cache_route(nh, rt);
Julian Anastasov155e8332012-10-08 11:41:18 +00001571 if (unlikely(!cached)) {
1572 /* Routes we intend to cache in nexthop exception or
1573 * FIB nexthop have the DST_NOCACHE bit clear.
1574 * However, if we are unsuccessful at storing this
1575 * route into the cache we really need to set it.
1576 */
David Ahern1550c172019-04-05 16:30:27 -07001577 if (!rt->rt_gw4) {
1578 rt->rt_gw_family = AF_INET;
1579 rt->rt_gw4 = daddr;
1580 }
Julian Anastasov155e8332012-10-08 11:41:18 +00001581 rt_add_uncached_list(rt);
1582 }
1583 } else
David S. Millercaacf052012-07-31 15:06:50 -07001584 rt_add_uncached_list(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001585
Patrick McHardyc7066f72011-01-14 13:36:42 +01001586#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001587#ifdef CONFIG_IP_MULTIPLE_TABLES
David S. Miller85b91b02012-07-13 08:21:29 -07001588 set_class_tag(rt, res->tclassid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001589#endif
1590 set_class_tag(rt, itag);
1591#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001592}
1593
David Ahern9ab179d2016-04-07 11:10:06 -07001594struct rtable *rt_dst_alloc(struct net_device *dev,
1595 unsigned int flags, u16 type,
1596 bool nopolicy, bool noxfrm, bool will_cache)
David S. Miller0c4dcd52011-02-17 15:42:37 -08001597{
David Ahernd08c4f32015-09-02 13:58:34 -07001598 struct rtable *rt;
1599
1600 rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
Wei Wanga4c2fd72017-06-17 10:42:42 -07001601 (will_cache ? 0 : DST_HOST) |
David Ahernd08c4f32015-09-02 13:58:34 -07001602 (nopolicy ? DST_NOPOLICY : 0) |
Wei Wangb2a9c0e2017-06-17 10:42:41 -07001603 (noxfrm ? DST_NOXFRM : 0));
David Ahernd08c4f32015-09-02 13:58:34 -07001604
1605 if (rt) {
1606 rt->rt_genid = rt_genid_ipv4(dev_net(dev));
1607 rt->rt_flags = flags;
1608 rt->rt_type = type;
1609 rt->rt_is_input = 0;
1610 rt->rt_iif = 0;
1611 rt->rt_pmtu = 0;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001612 rt->rt_mtu_locked = 0;
David Ahern1550c172019-04-05 16:30:27 -07001613 rt->rt_gw_family = 0;
1614 rt->rt_gw4 = 0;
David Ahernd08c4f32015-09-02 13:58:34 -07001615 INIT_LIST_HEAD(&rt->rt_uncached);
1616
1617 rt->dst.output = ip_output;
1618 if (flags & RTCF_LOCAL)
1619 rt->dst.input = ip_local_deliver;
1620 }
1621
1622 return rt;
David S. Miller0c4dcd52011-02-17 15:42:37 -08001623}
David Ahern9ab179d2016-04-07 11:10:06 -07001624EXPORT_SYMBOL(rt_dst_alloc);
David S. Miller0c4dcd52011-02-17 15:42:37 -08001625
Eric Dumazet96d36222010-06-02 19:21:31 +00001626/* called in rcu_read_lock() section */
Paolo Abenibc044e82017-09-28 15:51:37 +02001627int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1628 u8 tos, struct net_device *dev,
1629 struct in_device *in_dev, u32 *itag)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001630{
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001631 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001632
1633 /* Primary sanity checks. */
Ian Morris51456b22015-04-03 09:17:26 +01001634 if (!in_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635 return -EINVAL;
1636
Jan Engelhardt1e637c72008-01-21 03:18:08 -08001637 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
Thomas Grafd0daebc32012-06-12 00:44:01 +00001638 skb->protocol != htons(ETH_P_IP))
Paolo Abenibc044e82017-09-28 15:51:37 +02001639 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001640
Alexander Duyck75fea732015-09-28 11:10:38 -07001641 if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
Paolo Abenibc044e82017-09-28 15:51:37 +02001642 return -EINVAL;
Thomas Grafd0daebc32012-06-12 00:44:01 +00001643
Joe Perchesf97c1e02007-12-16 13:45:43 -08001644 if (ipv4_is_zeronet(saddr)) {
Edward Chron1d2f4eb2019-01-31 15:00:40 -08001645 if (!ipv4_is_local_multicast(daddr) &&
1646 ip_hdr(skb)->protocol != IPPROTO_IGMP)
Paolo Abenibc044e82017-09-28 15:51:37 +02001647 return -EINVAL;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001648 } else {
David S. Miller9e56e382012-06-28 18:54:02 -07001649 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
Paolo Abenibc044e82017-09-28 15:51:37 +02001650 in_dev, itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001651 if (err < 0)
Paolo Abenibc044e82017-09-28 15:51:37 +02001652 return err;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001653 }
Paolo Abenibc044e82017-09-28 15:51:37 +02001654 return 0;
1655}
1656
1657/* called in rcu_read_lock() section */
1658static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1659 u8 tos, struct net_device *dev, int our)
1660{
1661 struct in_device *in_dev = __in_dev_get_rcu(dev);
1662 unsigned int flags = RTCF_MULTICAST;
1663 struct rtable *rth;
1664 u32 itag = 0;
1665 int err;
1666
1667 err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag);
1668 if (err)
1669 return err;
1670
David Ahernd08c4f32015-09-02 13:58:34 -07001671 if (our)
1672 flags |= RTCF_LOCAL;
1673
1674 rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001675 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676 if (!rth)
Paolo Abenibc044e82017-09-28 15:51:37 +02001677 return -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001678
Patrick McHardyc7066f72011-01-14 13:36:42 +01001679#ifdef CONFIG_IP_ROUTE_CLASSID
Changli Gaod8d1f302010-06-10 23:31:35 -07001680 rth->dst.tclassid = itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001681#endif
David S. Millercf911662011-04-28 14:31:47 -07001682 rth->dst.output = ip_rt_bug;
David S. Miller9917e1e82012-07-17 14:44:26 -07001683 rth->rt_is_input= 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684
1685#ifdef CONFIG_IP_MROUTE
Joe Perchesf97c1e02007-12-16 13:45:43 -08001686 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
Changli Gaod8d1f302010-06-10 23:31:35 -07001687 rth->dst.input = ip_mr_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001688#endif
1689 RT_CACHE_STAT_INC(in_slow_mc);
1690
David S. Miller89aef892012-07-17 11:00:09 -07001691 skb_dst_set(skb, &rth->dst);
1692 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001693}
1694
1695
1696static void ip_handle_martian_source(struct net_device *dev,
1697 struct in_device *in_dev,
1698 struct sk_buff *skb,
Al Viro9e12bb22006-09-26 21:25:20 -07001699 __be32 daddr,
1700 __be32 saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001701{
1702 RT_CACHE_STAT_INC(in_martian_src);
1703#ifdef CONFIG_IP_ROUTE_VERBOSE
1704 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1705 /*
1706 * RFC1812 recommendation, if source is martian,
1707 * the only hint is MAC header.
1708 */
Joe Perches058bd4d2012-03-11 18:36:11 +00001709 pr_warn("martian source %pI4 from %pI4, on dev %s\n",
Harvey Harrison673d57e2008-10-31 00:53:57 -07001710 &daddr, &saddr, dev->name);
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001711 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
Joe Perches058bd4d2012-03-11 18:36:11 +00001712 print_hex_dump(KERN_WARNING, "ll header: ",
1713 DUMP_PREFIX_OFFSET, 16, 1,
1714 skb_mac_header(skb),
David S. Millerb2c85102018-11-20 10:15:36 -08001715 dev->hard_header_len, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001716 }
1717 }
1718#endif
1719}
1720
Eric Dumazet47360222010-06-03 04:13:21 +00001721/* called in rcu_read_lock() section */
Stephen Hemminger5969f712008-04-10 01:52:09 -07001722static int __mkroute_input(struct sk_buff *skb,
David S. Miller982721f2011-02-16 21:44:24 -08001723 const struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001724 struct in_device *in_dev,
David S. Millerc6cffba2012-07-26 11:14:38 +00001725 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001726{
David Aherneba618a2019-04-02 14:11:55 -07001727 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
1728 struct net_device *dev = nhc->nhc_dev;
Timo Teräs2ffae992013-06-27 10:27:05 +03001729 struct fib_nh_exception *fnhe;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001730 struct rtable *rth;
David Aherneba618a2019-04-02 14:11:55 -07001731 struct fib_nh *nh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001732 int err;
1733 struct in_device *out_dev;
David S. Millerd2d68ba92012-07-17 12:58:50 -07001734 bool do_cache;
Li RongQingfbdc0ad2014-05-22 16:36:55 +08001735 u32 itag = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001736
1737 /* get a working reference to the output device */
David Aherneba618a2019-04-02 14:11:55 -07001738 out_dev = __in_dev_get_rcu(dev);
Ian Morris51456b22015-04-03 09:17:26 +01001739 if (!out_dev) {
Joe Perchese87cc472012-05-13 21:56:26 +00001740 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001741 return -EINVAL;
1742 }
1743
Michael Smith5c04c812011-04-07 04:51:50 +00001744 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
David S. Miller9e56e382012-06-28 18:54:02 -07001745 in_dev->dev, in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001746 if (err < 0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001747 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001748 saddr);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001749
Linus Torvalds1da177e2005-04-16 15:20:36 -07001750 goto cleanup;
1751 }
1752
Julian Anastasove81da0e2012-10-08 11:41:15 +00001753 do_cache = res->fi && !itag;
1754 if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
David Aherneba618a2019-04-02 14:11:55 -07001755 skb->protocol == htons(ETH_P_IP)) {
David Ahernbdf00462019-04-05 16:30:26 -07001756 __be32 gw;
David Aherneba618a2019-04-02 14:11:55 -07001757
David Ahernbdf00462019-04-05 16:30:26 -07001758 gw = nhc->nhc_gw_family == AF_INET ? nhc->nhc_gw.ipv4 : 0;
David Aherneba618a2019-04-02 14:11:55 -07001759 if (IN_DEV_SHARED_MEDIA(out_dev) ||
1760 inet_addr_onlink(out_dev, saddr, gw))
1761 IPCB(skb)->flags |= IPSKB_DOREDIRECT;
1762 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001763
1764 if (skb->protocol != htons(ETH_P_IP)) {
1765 /* Not IP (i.e. ARP). Do not create route, if it is
1766 * invalid for proxy arp. DNAT routes are always valid.
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001767 *
1768 * Proxy arp feature have been extended to allow, ARP
1769 * replies back to the same interface, to support
1770 * Private VLAN switch technologies. See arp.c.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001771 */
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001772 if (out_dev == in_dev &&
1773 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001774 err = -EINVAL;
1775 goto cleanup;
1776 }
1777 }
1778
David Aherneba618a2019-04-02 14:11:55 -07001779 nh = container_of(nhc, struct fib_nh, nh_common);
1780 fnhe = find_exception(nh, daddr);
Julian Anastasove81da0e2012-10-08 11:41:15 +00001781 if (do_cache) {
Julian Anastasov94720e32018-05-02 09:41:19 +03001782 if (fnhe)
Timo Teräs2ffae992013-06-27 10:27:05 +03001783 rth = rcu_dereference(fnhe->fnhe_rth_input);
Julian Anastasov94720e32018-05-02 09:41:19 +03001784 else
David Aherneba618a2019-04-02 14:11:55 -07001785 rth = rcu_dereference(nh->nh_rth_input);
Julian Anastasove81da0e2012-10-08 11:41:15 +00001786 if (rt_cache_valid(rth)) {
1787 skb_dst_set_noref(skb, &rth->dst);
1788 goto out;
David S. Millerd2d68ba92012-07-17 12:58:50 -07001789 }
1790 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001791
David Ahernd08c4f32015-09-02 13:58:34 -07001792 rth = rt_dst_alloc(out_dev->dev, 0, res->type,
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001793 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerd2d68ba92012-07-17 12:58:50 -07001794 IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001795 if (!rth) {
1796 err = -ENOBUFS;
1797 goto cleanup;
1798 }
1799
David S. Miller9917e1e82012-07-17 14:44:26 -07001800 rth->rt_is_input = 1;
Duan Jionga6254862014-02-17 15:23:43 +08001801 RT_CACHE_STAT_INC(in_slow_tot);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802
Changli Gaod8d1f302010-06-10 23:31:35 -07001803 rth->dst.input = ip_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001804
Wei Wanga4c2fd72017-06-17 10:42:42 -07001805 rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag,
1806 do_cache);
David Ahern99428952018-02-13 20:32:04 -08001807 lwtunnel_set_redirect(&rth->dst);
David S. Millerc6cffba2012-07-26 11:14:38 +00001808 skb_dst_set(skb, &rth->dst);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001809out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001810 err = 0;
1811 cleanup:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001812 return err;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001813}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001814
Peter Nørlund79a13152015-09-30 10:12:22 +02001815#ifdef CONFIG_IP_ROUTE_MULTIPATH
Peter Nørlund79a13152015-09-30 10:12:22 +02001816/* To make ICMP packets follow the right flow, the multipath hash is
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001817 * calculated from the inner IP addresses.
Peter Nørlund79a13152015-09-30 10:12:22 +02001818 */
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001819static void ip_multipath_l3_keys(const struct sk_buff *skb,
1820 struct flow_keys *hash_keys)
Peter Nørlund79a13152015-09-30 10:12:22 +02001821{
1822 const struct iphdr *outer_iph = ip_hdr(skb);
David Ahern6f74b6c2018-03-02 08:32:13 -08001823 const struct iphdr *key_iph = outer_iph;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001824 const struct iphdr *inner_iph;
Peter Nørlund79a13152015-09-30 10:12:22 +02001825 const struct icmphdr *icmph;
1826 struct iphdr _inner_iph;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001827 struct icmphdr _icmph;
1828
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001829 if (likely(outer_iph->protocol != IPPROTO_ICMP))
David Ahern6f74b6c2018-03-02 08:32:13 -08001830 goto out;
Peter Nørlund79a13152015-09-30 10:12:22 +02001831
1832 if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
David Ahern6f74b6c2018-03-02 08:32:13 -08001833 goto out;
Peter Nørlund79a13152015-09-30 10:12:22 +02001834
1835 icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
1836 &_icmph);
1837 if (!icmph)
David Ahern6f74b6c2018-03-02 08:32:13 -08001838 goto out;
Peter Nørlund79a13152015-09-30 10:12:22 +02001839
1840 if (icmph->type != ICMP_DEST_UNREACH &&
1841 icmph->type != ICMP_REDIRECT &&
1842 icmph->type != ICMP_TIME_EXCEEDED &&
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001843 icmph->type != ICMP_PARAMETERPROB)
David Ahern6f74b6c2018-03-02 08:32:13 -08001844 goto out;
Peter Nørlund79a13152015-09-30 10:12:22 +02001845
1846 inner_iph = skb_header_pointer(skb,
1847 outer_iph->ihl * 4 + sizeof(_icmph),
1848 sizeof(_inner_iph), &_inner_iph);
1849 if (!inner_iph)
David Ahern6f74b6c2018-03-02 08:32:13 -08001850 goto out;
1851
1852 key_iph = inner_iph;
1853out:
1854 hash_keys->addrs.v4addrs.src = key_iph->saddr;
1855 hash_keys->addrs.v4addrs.dst = key_iph->daddr;
Peter Nørlund79a13152015-09-30 10:12:22 +02001856}
1857
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001858/* if skb is set it will be used and fl4 can be NULL */
David Ahern7efc0b62018-03-02 08:32:12 -08001859int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001860 const struct sk_buff *skb, struct flow_keys *flkeys)
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001861{
Ido Schimmel2a8e4992019-03-01 13:38:43 +00001862 u32 multipath_hash = fl4 ? fl4->flowi4_multipath_hash : 0;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001863 struct flow_keys hash_keys;
1864 u32 mhash;
1865
1866 switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
1867 case 0:
1868 memset(&hash_keys, 0, sizeof(hash_keys));
1869 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1870 if (skb) {
1871 ip_multipath_l3_keys(skb, &hash_keys);
1872 } else {
1873 hash_keys.addrs.v4addrs.src = fl4->saddr;
1874 hash_keys.addrs.v4addrs.dst = fl4->daddr;
1875 }
1876 break;
1877 case 1:
1878 /* skb is currently provided only when forwarding */
1879 if (skb) {
1880 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
1881 struct flow_keys keys;
1882
1883 /* short-circuit if we already have L4 hash present */
1884 if (skb->l4_hash)
1885 return skb_get_hash_raw(skb) >> 1;
David Ahernec7127a2018-03-02 08:32:14 -08001886
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001887 memset(&hash_keys, 0, sizeof(hash_keys));
David Ahern1fe4b112018-02-21 11:00:54 -08001888
David Ahernec7127a2018-03-02 08:32:14 -08001889 if (!flkeys) {
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001890 skb_flow_dissect_flow_keys(skb, &keys, flag);
David Ahernec7127a2018-03-02 08:32:14 -08001891 flkeys = &keys;
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001892 }
David Ahernec7127a2018-03-02 08:32:14 -08001893
1894 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1895 hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
1896 hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
1897 hash_keys.ports.src = flkeys->ports.src;
1898 hash_keys.ports.dst = flkeys->ports.dst;
1899 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001900 } else {
1901 memset(&hash_keys, 0, sizeof(hash_keys));
1902 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1903 hash_keys.addrs.v4addrs.src = fl4->saddr;
1904 hash_keys.addrs.v4addrs.dst = fl4->daddr;
1905 hash_keys.ports.src = fl4->fl4_sport;
1906 hash_keys.ports.dst = fl4->fl4_dport;
1907 hash_keys.basic.ip_proto = fl4->flowi4_proto;
1908 }
1909 break;
1910 }
1911 mhash = flow_hash_from_keys(&hash_keys);
1912
wenxu24ba1442019-02-24 11:36:20 +08001913 if (multipath_hash)
1914 mhash = jhash_2words(mhash, multipath_hash, 0);
1915
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001916 return mhash >> 1;
1917}
Peter Nørlund79a13152015-09-30 10:12:22 +02001918#endif /* CONFIG_IP_ROUTE_MULTIPATH */
1919
Stephen Hemminger5969f712008-04-10 01:52:09 -07001920static int ip_mkroute_input(struct sk_buff *skb,
1921 struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001922 struct in_device *in_dev,
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001923 __be32 daddr, __be32 saddr, u32 tos,
1924 struct flow_keys *hkeys)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001925{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001926#ifdef CONFIG_IP_ROUTE_MULTIPATH
Peter Nørlund0e884c72015-09-30 10:12:21 +02001927 if (res->fi && res->fi->fib_nhs > 1) {
David Ahern7efc0b62018-03-02 08:32:12 -08001928 int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
Peter Nørlund0e884c72015-09-30 10:12:21 +02001929
Peter Nørlund0e884c72015-09-30 10:12:21 +02001930 fib_select_multipath(res, h);
1931 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001932#endif
1933
1934 /* create a routing cache entry */
David S. Millerc6cffba2012-07-26 11:14:38 +00001935 return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001936}
1937
Linus Torvalds1da177e2005-04-16 15:20:36 -07001938/*
1939 * NOTE. We drop all the packets that has local source
1940 * addresses, because every properly looped back packet
1941 * must have correct destination already attached by output routine.
1942 *
1943 * Such approach solves two big problems:
1944 * 1. Not simplex devices are handled properly.
1945 * 2. IP spoofing attempts are filtered with 100% of guarantee.
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001946 * called with rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001947 */
1948
Al Viro9e12bb22006-09-26 21:25:20 -07001949static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
David Ahern5510cdf2017-05-25 10:42:34 -07001950 u8 tos, struct net_device *dev,
1951 struct fib_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001952{
Eric Dumazet96d36222010-06-02 19:21:31 +00001953 struct in_device *in_dev = __in_dev_get_rcu(dev);
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001954 struct flow_keys *flkeys = NULL, _flkeys;
1955 struct net *net = dev_net(dev);
Thomas Graf1b7179d2015-07-21 10:43:59 +02001956 struct ip_tunnel_info *tun_info;
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001957 int err = -EINVAL;
Eric Dumazet95c96172012-04-15 05:58:06 +00001958 unsigned int flags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001959 u32 itag = 0;
Eric Dumazet95c96172012-04-15 05:58:06 +00001960 struct rtable *rth;
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001961 struct flowi4 fl4;
David S. Millerd2d68ba92012-07-17 12:58:50 -07001962 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001963
1964 /* IP on this device is disabled. */
1965
1966 if (!in_dev)
1967 goto out;
1968
1969 /* Check for the most weird martians, which can be not detected
1970 by fib_lookup.
1971 */
1972
Jiri Benc61adedf2015-08-20 13:56:25 +02001973 tun_info = skb_tunnel_info(skb);
Jiri Benc46fa0622015-08-28 20:48:19 +02001974 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
Thomas Graf1b7179d2015-07-21 10:43:59 +02001975 fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id;
1976 else
1977 fl4.flowi4_tun_key.tun_id = 0;
Thomas Graff38a9eb2015-07-21 10:43:56 +02001978 skb_dst_drop(skb);
1979
Thomas Grafd0daebc32012-06-12 00:44:01 +00001980 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001981 goto martian_source;
1982
David Ahern5510cdf2017-05-25 10:42:34 -07001983 res->fi = NULL;
1984 res->table = NULL;
Andy Walls27a954b2010-10-17 15:11:22 +00001985 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001986 goto brd_input;
1987
1988 /* Accept zero addresses only to limited broadcast;
1989 * I even do not know to fix it or not. Waiting for complains :-)
1990 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001991 if (ipv4_is_zeronet(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001992 goto martian_source;
1993
Thomas Grafd0daebc32012-06-12 00:44:01 +00001994 if (ipv4_is_zeronet(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001995 goto martian_destination;
1996
Eric Dumazet9eb43e72012-08-03 21:27:25 +00001997 /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
1998 * and call it once if daddr or/and saddr are loopback addresses
1999 */
2000 if (ipv4_is_loopback(daddr)) {
2001 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00002002 goto martian_destination;
Eric Dumazet9eb43e72012-08-03 21:27:25 +00002003 } else if (ipv4_is_loopback(saddr)) {
2004 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00002005 goto martian_source;
2006 }
2007
Linus Torvalds1da177e2005-04-16 15:20:36 -07002008 /*
2009 * Now we are ready to route packet.
2010 */
David S. Miller68a5e3d2011-03-11 20:07:33 -05002011 fl4.flowi4_oif = 0;
David Aherne0d56fd2016-09-10 12:09:57 -07002012 fl4.flowi4_iif = dev->ifindex;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002013 fl4.flowi4_mark = skb->mark;
2014 fl4.flowi4_tos = tos;
2015 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
David Ahernb84f7872015-09-29 19:07:07 -07002016 fl4.flowi4_flags = 0;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002017 fl4.daddr = daddr;
2018 fl4.saddr = saddr;
Julian Anastasov8bcfd092017-02-26 15:50:52 +02002019 fl4.flowi4_uid = sock_net_uid(net, NULL);
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002020
David Ahern5a847a62018-05-16 13:36:40 -07002021 if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) {
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002022 flkeys = &_flkeys;
David Ahern5a847a62018-05-16 13:36:40 -07002023 } else {
2024 fl4.flowi4_proto = 0;
2025 fl4.fl4_sport = 0;
2026 fl4.fl4_dport = 0;
2027 }
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002028
David Ahern5510cdf2017-05-25 10:42:34 -07002029 err = fib_lookup(net, &fl4, res, 0);
Duan Jiongcd0f0b92014-02-14 18:26:22 +08002030 if (err != 0) {
2031 if (!IN_DEV_FORWARD(in_dev))
2032 err = -EHOSTUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002033 goto no_route;
Duan Jiongcd0f0b92014-02-14 18:26:22 +08002034 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002035
Xin Long5cbf7772018-07-27 16:37:28 +08002036 if (res->type == RTN_BROADCAST) {
2037 if (IN_DEV_BFORWARD(in_dev))
2038 goto make_route;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002039 goto brd_input;
Xin Long5cbf7772018-07-27 16:37:28 +08002040 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002041
David Ahern5510cdf2017-05-25 10:42:34 -07002042 if (res->type == RTN_LOCAL) {
Michael Smith5c04c812011-04-07 04:51:50 +00002043 err = fib_validate_source(skb, saddr, daddr, tos,
Cong Wang0d5edc62014-04-15 16:25:35 -07002044 0, dev, in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00002045 if (err < 0)
David Ahern0d753962015-09-28 11:10:44 -07002046 goto martian_source;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002047 goto local_input;
2048 }
2049
Duan Jiongcd0f0b92014-02-14 18:26:22 +08002050 if (!IN_DEV_FORWARD(in_dev)) {
2051 err = -EHOSTUNREACH;
David S. Miller251da412012-06-26 16:27:09 -07002052 goto no_route;
Duan Jiongcd0f0b92014-02-14 18:26:22 +08002053 }
David Ahern5510cdf2017-05-25 10:42:34 -07002054 if (res->type != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002055 goto martian_destination;
2056
Xin Long5cbf7772018-07-27 16:37:28 +08002057make_route:
Roopa Prabhue37b1e92018-02-28 22:42:41 -05002058 err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002059out: return err;
2060
2061brd_input:
2062 if (skb->protocol != htons(ETH_P_IP))
2063 goto e_inval;
2064
David S. Miller41347dc2012-06-28 04:05:27 -07002065 if (!ipv4_is_zeronet(saddr)) {
David S. Miller9e56e382012-06-28 18:54:02 -07002066 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
2067 in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002068 if (err < 0)
David Ahern0d753962015-09-28 11:10:44 -07002069 goto martian_source;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002070 }
2071 flags |= RTCF_BROADCAST;
David Ahern5510cdf2017-05-25 10:42:34 -07002072 res->type = RTN_BROADCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002073 RT_CACHE_STAT_INC(in_brd);
2074
2075local_input:
David S. Millerd2d68ba92012-07-17 12:58:50 -07002076 do_cache = false;
David Ahern5510cdf2017-05-25 10:42:34 -07002077 if (res->fi) {
David S. Millerfe3edf42012-07-23 13:22:20 -07002078 if (!itag) {
David Aherneba618a2019-04-02 14:11:55 -07002079 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
2080 struct fib_nh *nh;
2081
2082 nh = container_of(nhc, struct fib_nh, nh_common);
2083 rth = rcu_dereference(nh->nh_rth_input);
David S. Millerd2d68ba92012-07-17 12:58:50 -07002084 if (rt_cache_valid(rth)) {
David S. Millerc6cffba2012-07-26 11:14:38 +00002085 skb_dst_set_noref(skb, &rth->dst);
2086 err = 0;
2087 goto out;
David S. Millerd2d68ba92012-07-17 12:58:50 -07002088 }
2089 do_cache = true;
2090 }
2091 }
2092
David Ahernf5a0aab2016-12-29 15:29:03 -08002093 rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
David Ahern5510cdf2017-05-25 10:42:34 -07002094 flags | RTCF_LOCAL, res->type,
David S. Millerd2d68ba92012-07-17 12:58:50 -07002095 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002096 if (!rth)
2097 goto e_nobufs;
2098
Changli Gaod8d1f302010-06-10 23:31:35 -07002099 rth->dst.output= ip_rt_bug;
David S. Millercf911662011-04-28 14:31:47 -07002100#ifdef CONFIG_IP_ROUTE_CLASSID
2101 rth->dst.tclassid = itag;
2102#endif
David S. Miller9917e1e82012-07-17 14:44:26 -07002103 rth->rt_is_input = 1;
Roopa Prabhu571e7222015-07-21 10:43:47 +02002104
Duan Jionga6254862014-02-17 15:23:43 +08002105 RT_CACHE_STAT_INC(in_slow_tot);
David Ahern5510cdf2017-05-25 10:42:34 -07002106 if (res->type == RTN_UNREACHABLE) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002107 rth->dst.input= ip_error;
2108 rth->dst.error= -err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002109 rth->rt_flags &= ~RTCF_LOCAL;
2110 }
Thomas Grafefd85702016-11-30 17:10:09 +01002111
Alexei Starovoitovdcdfdf52013-11-19 19:12:34 -08002112 if (do_cache) {
David Aherneba618a2019-04-02 14:11:55 -07002113 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
2114 struct fib_nh *nh;
Thomas Grafefd85702016-11-30 17:10:09 +01002115
David Aherneba618a2019-04-02 14:11:55 -07002116 rth->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);
Thomas Grafefd85702016-11-30 17:10:09 +01002117 if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
2118 WARN_ON(rth->dst.input == lwtunnel_input);
2119 rth->dst.lwtstate->orig_input = rth->dst.input;
2120 rth->dst.input = lwtunnel_input;
2121 }
2122
David Aherneba618a2019-04-02 14:11:55 -07002123 nh = container_of(nhc, struct fib_nh, nh_common);
Wei Wanga4c2fd72017-06-17 10:42:42 -07002124 if (unlikely(!rt_cache_route(nh, rth)))
Alexei Starovoitovdcdfdf52013-11-19 19:12:34 -08002125 rt_add_uncached_list(rth);
Alexei Starovoitovdcdfdf52013-11-19 19:12:34 -08002126 }
David S. Miller89aef892012-07-17 11:00:09 -07002127 skb_dst_set(skb, &rth->dst);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002128 err = 0;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002129 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002130
2131no_route:
2132 RT_CACHE_STAT_INC(in_no_route);
David Ahern5510cdf2017-05-25 10:42:34 -07002133 res->type = RTN_UNREACHABLE;
2134 res->fi = NULL;
2135 res->table = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002136 goto local_input;
2137
2138 /*
2139 * Do not cache martian addresses: they should be logged (RFC1812)
2140 */
2141martian_destination:
2142 RT_CACHE_STAT_INC(in_martian_dst);
2143#ifdef CONFIG_IP_ROUTE_VERBOSE
Joe Perchese87cc472012-05-13 21:56:26 +00002144 if (IN_DEV_LOG_MARTIANS(in_dev))
2145 net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
2146 &daddr, &saddr, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002147#endif
Dietmar Eggemann2c2910a2005-06-28 13:06:23 -07002148
Linus Torvalds1da177e2005-04-16 15:20:36 -07002149e_inval:
2150 err = -EINVAL;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002151 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002152
2153e_nobufs:
2154 err = -ENOBUFS;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002155 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002156
2157martian_source:
2158 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002159 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002160}
2161
David S. Millerc6cffba2012-07-26 11:14:38 +00002162int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2163 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002164{
David Ahern5510cdf2017-05-25 10:42:34 -07002165 struct fib_result res;
2166 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002167
Julian Anastasov6e280992017-02-26 17:14:35 +02002168 tos &= IPTOS_RT_MASK;
Eric Dumazet96d36222010-06-02 19:21:31 +00002169 rcu_read_lock();
David Ahern5510cdf2017-05-25 10:42:34 -07002170 err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res);
2171 rcu_read_unlock();
Eric Dumazet96d36222010-06-02 19:21:31 +00002172
David Ahern5510cdf2017-05-25 10:42:34 -07002173 return err;
2174}
2175EXPORT_SYMBOL(ip_route_input_noref);
2176
2177/* called with rcu_read_lock held */
2178int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2179 u8 tos, struct net_device *dev, struct fib_result *res)
2180{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002181 /* Multicast recognition logic is moved from route cache to here.
2182 The problem was that too many Ethernet cards have broken/missing
2183 hardware multicast filters :-( As result the host on multicasting
2184 network acquires a lot of useless route cache entries, sort of
2185 SDR messages from all the world. Now we try to get rid of them.
2186 Really, provided software IP multicast filter is organized
2187 reasonably (at least, hashed), it does not result in a slowdown
2188 comparing with route cache reject entries.
2189 Note, that multicast routers are not affected, because
2190 route cache entry is created eventually.
2191 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08002192 if (ipv4_is_multicast(daddr)) {
Eric Dumazet96d36222010-06-02 19:21:31 +00002193 struct in_device *in_dev = __in_dev_get_rcu(dev);
David Aherne58e4152016-10-31 15:54:00 -07002194 int our = 0;
David Ahern5510cdf2017-05-25 10:42:34 -07002195 int err = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002196
Paolo Abeni22c74762019-03-06 10:42:53 +01002197 if (!in_dev)
2198 return err;
2199 our = ip_check_mc_rcu(in_dev, daddr, saddr,
2200 ip_hdr(skb)->protocol);
David Aherne58e4152016-10-31 15:54:00 -07002201
2202 /* check l3 master if no match yet */
Paolo Abeni22c74762019-03-06 10:42:53 +01002203 if (!our && netif_is_l3_slave(dev)) {
David Aherne58e4152016-10-31 15:54:00 -07002204 struct in_device *l3_in_dev;
2205
2206 l3_in_dev = __in_dev_get_rcu(skb->dev);
2207 if (l3_in_dev)
2208 our = ip_check_mc_rcu(l3_in_dev, daddr, saddr,
2209 ip_hdr(skb)->protocol);
2210 }
2211
David Aherne58e4152016-10-31 15:54:00 -07002212 if (our
Linus Torvalds1da177e2005-04-16 15:20:36 -07002213#ifdef CONFIG_IP_MROUTE
David Aherne58e4152016-10-31 15:54:00 -07002214 ||
2215 (!ipv4_is_local_multicast(daddr) &&
2216 IN_DEV_MFORWARD(in_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002217#endif
David Aherne58e4152016-10-31 15:54:00 -07002218 ) {
David Ahern5510cdf2017-05-25 10:42:34 -07002219 err = ip_route_input_mc(skb, daddr, saddr,
David Aherne58e4152016-10-31 15:54:00 -07002220 tos, dev, our);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002221 }
David Ahern5510cdf2017-05-25 10:42:34 -07002222 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002223 }
David Ahern5510cdf2017-05-25 10:42:34 -07002224
2225 return ip_route_input_slow(skb, daddr, saddr, tos, dev, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002226}
2227
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002228/* called with rcu_read_lock() */
David S. Miller982721f2011-02-16 21:44:24 -08002229static struct rtable *__mkroute_output(const struct fib_result *res,
David Miller1a00fee2012-07-01 02:02:56 +00002230 const struct flowi4 *fl4, int orig_oif,
Julian Anastasovf61759e2011-12-02 11:39:42 +00002231 struct net_device *dev_out,
David S. Miller5ada5522011-02-17 15:29:00 -08002232 unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002233{
David S. Miller982721f2011-02-16 21:44:24 -08002234 struct fib_info *fi = res->fi;
David S. Millerf2bb4be2012-07-17 12:20:47 -07002235 struct fib_nh_exception *fnhe;
David S. Miller5ada5522011-02-17 15:29:00 -08002236 struct in_device *in_dev;
David S. Miller982721f2011-02-16 21:44:24 -08002237 u16 type = res->type;
David S. Miller5ada5522011-02-17 15:29:00 -08002238 struct rtable *rth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00002239 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002240
Thomas Grafd0daebc32012-06-12 00:44:01 +00002241 in_dev = __in_dev_get_rcu(dev_out);
2242 if (!in_dev)
David S. Miller5ada5522011-02-17 15:29:00 -08002243 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002244
Thomas Grafd0daebc32012-06-12 00:44:01 +00002245 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
David Ahern5f02ce242016-09-10 12:09:54 -07002246 if (ipv4_is_loopback(fl4->saddr) &&
2247 !(dev_out->flags & IFF_LOOPBACK) &&
2248 !netif_is_l3_master(dev_out))
Thomas Grafd0daebc32012-06-12 00:44:01 +00002249 return ERR_PTR(-EINVAL);
2250
David S. Miller68a5e3d2011-03-11 20:07:33 -05002251 if (ipv4_is_lbcast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08002252 type = RTN_BROADCAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002253 else if (ipv4_is_multicast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08002254 type = RTN_MULTICAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002255 else if (ipv4_is_zeronet(fl4->daddr))
David S. Miller5ada5522011-02-17 15:29:00 -08002256 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002257
2258 if (dev_out->flags & IFF_LOOPBACK)
2259 flags |= RTCF_LOCAL;
2260
Julian Anastasov63617422012-11-22 23:04:14 +02002261 do_cache = true;
David S. Miller982721f2011-02-16 21:44:24 -08002262 if (type == RTN_BROADCAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002263 flags |= RTCF_BROADCAST | RTCF_LOCAL;
David S. Miller982721f2011-02-16 21:44:24 -08002264 fi = NULL;
2265 } else if (type == RTN_MULTICAST) {
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00002266 flags |= RTCF_MULTICAST | RTCF_LOCAL;
David S. Miller813b3b52011-04-28 14:48:42 -07002267 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
2268 fl4->flowi4_proto))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002269 flags &= ~RTCF_LOCAL;
Julian Anastasov63617422012-11-22 23:04:14 +02002270 else
2271 do_cache = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002272 /* If multicast route do not exist use
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00002273 * default one, but do not gateway in this case.
2274 * Yes, it is hack.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002275 */
David S. Miller982721f2011-02-16 21:44:24 -08002276 if (fi && res->prefixlen < 4)
2277 fi = NULL;
Chris Friesend6d5e992016-04-08 15:21:30 -06002278 } else if ((type == RTN_LOCAL) && (orig_oif != 0) &&
2279 (orig_oif != dev_out->ifindex)) {
2280 /* For local routes that require a particular output interface
2281 * we do not want to cache the result. Caching the result
2282 * causes incorrect behaviour when there are multiple source
2283 * addresses on the interface, the end result being that if the
2284 * intended recipient is waiting on that interface for the
2285 * packet he won't receive it because it will be delivered on
2286 * the loopback interface and the IP_PKTINFO ipi_ifindex will
2287 * be set to the loopback interface as well.
2288 */
Julian Anastasov94720e32018-05-02 09:41:19 +03002289 do_cache = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002290 }
2291
David S. Millerf2bb4be2012-07-17 12:20:47 -07002292 fnhe = NULL;
Julian Anastasov63617422012-11-22 23:04:14 +02002293 do_cache &= fi != NULL;
Julian Anastasov94720e32018-05-02 09:41:19 +03002294 if (fi) {
David Aherneba618a2019-04-02 14:11:55 -07002295 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
2296 struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common);
David S. Millerc5038a82012-07-31 15:02:02 -07002297 struct rtable __rcu **prth;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00002298
Julian Anastasovc92b9652012-10-08 11:41:19 +00002299 fnhe = find_exception(nh, fl4->daddr);
Julian Anastasov94720e32018-05-02 09:41:19 +03002300 if (!do_cache)
2301 goto add;
Xin Longdeed49d2016-02-18 21:21:19 +08002302 if (fnhe) {
Timo Teräs2ffae992013-06-27 10:27:05 +03002303 prth = &fnhe->fnhe_rth_output;
Julian Anastasov94720e32018-05-02 09:41:19 +03002304 } else {
2305 if (unlikely(fl4->flowi4_flags &
2306 FLOWI_FLAG_KNOWN_NH &&
David Ahernbdf00462019-04-05 16:30:26 -07002307 !(nhc->nhc_gw_family &&
David Aherneba618a2019-04-02 14:11:55 -07002308 nhc->nhc_scope == RT_SCOPE_LINK))) {
Julian Anastasov94720e32018-05-02 09:41:19 +03002309 do_cache = false;
2310 goto add;
Julian Anastasovc92b9652012-10-08 11:41:19 +00002311 }
Julian Anastasov94720e32018-05-02 09:41:19 +03002312 prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
Julian Anastasovc92b9652012-10-08 11:41:19 +00002313 }
David S. Millerc5038a82012-07-31 15:02:02 -07002314 rth = rcu_dereference(*prth);
Wei Wang9df16ef2017-06-17 10:42:31 -07002315 if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst))
David S. Millerc5038a82012-07-31 15:02:02 -07002316 return rth;
David S. Millerf2bb4be2012-07-17 12:20:47 -07002317 }
Julian Anastasovc92b9652012-10-08 11:41:19 +00002318
2319add:
David Ahernd08c4f32015-09-02 13:58:34 -07002320 rth = rt_dst_alloc(dev_out, flags, type,
David S. Miller5c1e6aa2011-04-28 14:13:38 -07002321 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerf2bb4be2012-07-17 12:20:47 -07002322 IN_DEV_CONF_GET(in_dev, NOXFRM),
Julian Anastasovc92b9652012-10-08 11:41:19 +00002323 do_cache);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00002324 if (!rth)
David S. Miller5ada5522011-02-17 15:29:00 -08002325 return ERR_PTR(-ENOBUFS);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00002326
David Ahern9438c872017-08-11 17:02:02 -07002327 rth->rt_iif = orig_oif;
David Ahernb7503e02015-09-02 13:58:35 -07002328
Linus Torvalds1da177e2005-04-16 15:20:36 -07002329 RT_CACHE_STAT_INC(out_slow_tot);
2330
Linus Torvalds1da177e2005-04-16 15:20:36 -07002331 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002332 if (flags & RTCF_LOCAL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07002333 !(dev_out->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002334 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002335 RT_CACHE_STAT_INC(out_slow_mc);
2336 }
2337#ifdef CONFIG_IP_MROUTE
David S. Miller982721f2011-02-16 21:44:24 -08002338 if (type == RTN_MULTICAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002339 if (IN_DEV_MFORWARD(in_dev) &&
David S. Miller813b3b52011-04-28 14:48:42 -07002340 !ipv4_is_local_multicast(fl4->daddr)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002341 rth->dst.input = ip_mr_input;
2342 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002343 }
2344 }
2345#endif
2346 }
2347
Wei Wanga4c2fd72017-06-17 10:42:42 -07002348 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache);
David Ahern99428952018-02-13 20:32:04 -08002349 lwtunnel_set_redirect(&rth->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002350
David S. Miller5ada5522011-02-17 15:29:00 -08002351 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002352}
2353
Linus Torvalds1da177e2005-04-16 15:20:36 -07002354/*
2355 * Major route resolver routine.
2356 */
2357
David Ahern3abd1ade2017-05-25 10:42:33 -07002358struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
2359 const struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002360{
Julian Anastasovf61759e2011-12-02 11:39:42 +00002361 __u8 tos = RT_FL_TOS(fl4);
Eric Dumazetd0ea2b12018-04-07 13:42:42 -07002362 struct fib_result res = {
2363 .type = RTN_UNSPEC,
2364 .fi = NULL,
2365 .table = NULL,
2366 .tclassid = 0,
2367 };
David S. Miller5ada5522011-02-17 15:29:00 -08002368 struct rtable *rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002369
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002370 fl4->flowi4_iif = LOOPBACK_IFINDEX;
David S. Miller813b3b52011-04-28 14:48:42 -07002371 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
2372 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
2373 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
David S. Miller44713b62011-03-04 21:24:47 -08002374
David S. Miller010c2702011-02-17 15:37:09 -08002375 rcu_read_lock();
David Ahern3abd1ade2017-05-25 10:42:33 -07002376 rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
2377 rcu_read_unlock();
2378
2379 return rth;
2380}
2381EXPORT_SYMBOL_GPL(ip_route_output_key_hash);
2382
2383struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
2384 struct fib_result *res,
2385 const struct sk_buff *skb)
2386{
2387 struct net_device *dev_out = NULL;
2388 int orig_oif = fl4->flowi4_oif;
2389 unsigned int flags = 0;
2390 struct rtable *rth;
2391 int err = -ENETUNREACH;
2392
David S. Miller813b3b52011-04-28 14:48:42 -07002393 if (fl4->saddr) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08002394 rth = ERR_PTR(-EINVAL);
David S. Miller813b3b52011-04-28 14:48:42 -07002395 if (ipv4_is_multicast(fl4->saddr) ||
2396 ipv4_is_lbcast(fl4->saddr) ||
2397 ipv4_is_zeronet(fl4->saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002398 goto out;
2399
Linus Torvalds1da177e2005-04-16 15:20:36 -07002400 /* I removed check for oif == dev_out->oif here.
2401 It was wrong for two reasons:
Denis V. Lunev1ab35272008-01-22 22:04:30 -08002402 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
2403 is assigned to multiple interfaces.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002404 2. Moreover, we are allowed to send packets with saddr
2405 of another iface. --ANK
2406 */
2407
David S. Miller813b3b52011-04-28 14:48:42 -07002408 if (fl4->flowi4_oif == 0 &&
2409 (ipv4_is_multicast(fl4->daddr) ||
2410 ipv4_is_lbcast(fl4->daddr))) {
Julian Anastasova210d012008-10-01 07:28:28 -07002411 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07002412 dev_out = __ip_dev_find(net, fl4->saddr, false);
Ian Morris51456b22015-04-03 09:17:26 +01002413 if (!dev_out)
Julian Anastasova210d012008-10-01 07:28:28 -07002414 goto out;
2415
Linus Torvalds1da177e2005-04-16 15:20:36 -07002416 /* Special hack: user can direct multicasts
2417 and limited broadcast via necessary interface
2418 without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
2419 This hack is not just for fun, it allows
2420 vic,vat and friends to work.
2421 They bind socket to loopback, set ttl to zero
2422 and expect that it will work.
2423 From the viewpoint of routing cache they are broken,
2424 because we are not allowed to build multicast path
2425 with loopback source addr (look, routing cache
2426 cannot know, that ttl is zero, so that packet
2427 will not leave this host and route is valid).
2428 Luckily, this hack is good workaround.
2429 */
2430
David S. Miller813b3b52011-04-28 14:48:42 -07002431 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002432 goto make_route;
2433 }
Julian Anastasova210d012008-10-01 07:28:28 -07002434
David S. Miller813b3b52011-04-28 14:48:42 -07002435 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
Julian Anastasova210d012008-10-01 07:28:28 -07002436 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07002437 if (!__ip_dev_find(net, fl4->saddr, false))
Julian Anastasova210d012008-10-01 07:28:28 -07002438 goto out;
Julian Anastasova210d012008-10-01 07:28:28 -07002439 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002440 }
2441
2442
David S. Miller813b3b52011-04-28 14:48:42 -07002443 if (fl4->flowi4_oif) {
2444 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002445 rth = ERR_PTR(-ENODEV);
Ian Morris51456b22015-04-03 09:17:26 +01002446 if (!dev_out)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002447 goto out;
Herbert Xue5ed6392005-10-03 14:35:55 -07002448
2449 /* RACE: Check return value of inet_select_addr instead. */
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002450 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08002451 rth = ERR_PTR(-ENETUNREACH);
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002452 goto out;
2453 }
David S. Miller813b3b52011-04-28 14:48:42 -07002454 if (ipv4_is_local_multicast(fl4->daddr) ||
Andrew Lunn6a211652015-05-01 16:39:54 +02002455 ipv4_is_lbcast(fl4->daddr) ||
2456 fl4->flowi4_proto == IPPROTO_IGMP) {
David S. Miller813b3b52011-04-28 14:48:42 -07002457 if (!fl4->saddr)
2458 fl4->saddr = inet_select_addr(dev_out, 0,
2459 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002460 goto make_route;
2461 }
Jiri Benc0a7e2262013-10-04 17:04:48 +02002462 if (!fl4->saddr) {
David S. Miller813b3b52011-04-28 14:48:42 -07002463 if (ipv4_is_multicast(fl4->daddr))
2464 fl4->saddr = inet_select_addr(dev_out, 0,
2465 fl4->flowi4_scope);
2466 else if (!fl4->daddr)
2467 fl4->saddr = inet_select_addr(dev_out, 0,
2468 RT_SCOPE_HOST);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002469 }
2470 }
2471
David S. Miller813b3b52011-04-28 14:48:42 -07002472 if (!fl4->daddr) {
2473 fl4->daddr = fl4->saddr;
2474 if (!fl4->daddr)
2475 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
Denis V. Lunevb40afd02008-01-22 22:06:19 -08002476 dev_out = net->loopback_dev;
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002477 fl4->flowi4_oif = LOOPBACK_IFINDEX;
David Ahern3abd1ade2017-05-25 10:42:33 -07002478 res->type = RTN_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002479 flags |= RTCF_LOCAL;
2480 goto make_route;
2481 }
2482
David Ahern3abd1ade2017-05-25 10:42:33 -07002483 err = fib_lookup(net, fl4, res, 0);
Nikola ForrĂ³0315e382015-09-17 16:01:32 +02002484 if (err) {
David Ahern3abd1ade2017-05-25 10:42:33 -07002485 res->fi = NULL;
2486 res->table = NULL;
David Ahern6104e112016-10-12 13:20:11 -07002487 if (fl4->flowi4_oif &&
David Aherne58e4152016-10-31 15:54:00 -07002488 (ipv4_is_multicast(fl4->daddr) ||
2489 !netif_index_is_l3_master(net, fl4->flowi4_oif))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002490 /* Apparently, routing tables are wrong. Assume,
2491 that the destination is on link.
2492
2493 WHY? DW.
2494 Because we are allowed to send to iface
2495 even if it has NO routes and NO assigned
2496 addresses. When oif is specified, routing
2497 tables are looked up with only one purpose:
2498 to catch if destination is gatewayed, rather than
2499 direct. Moreover, if MSG_DONTROUTE is set,
2500 we send packet, ignoring both routing tables
2501 and ifaddr state. --ANK
2502
2503
2504 We could make it even if oif is unknown,
2505 likely IPv6, but we do not.
2506 */
2507
David S. Miller813b3b52011-04-28 14:48:42 -07002508 if (fl4->saddr == 0)
2509 fl4->saddr = inet_select_addr(dev_out, 0,
2510 RT_SCOPE_LINK);
David Ahern3abd1ade2017-05-25 10:42:33 -07002511 res->type = RTN_UNICAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002512 goto make_route;
2513 }
Nikola ForrĂ³0315e382015-09-17 16:01:32 +02002514 rth = ERR_PTR(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002515 goto out;
2516 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002517
David Ahern3abd1ade2017-05-25 10:42:33 -07002518 if (res->type == RTN_LOCAL) {
David S. Miller813b3b52011-04-28 14:48:42 -07002519 if (!fl4->saddr) {
David Ahern3abd1ade2017-05-25 10:42:33 -07002520 if (res->fi->fib_prefsrc)
2521 fl4->saddr = res->fi->fib_prefsrc;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002522 else
David S. Miller813b3b52011-04-28 14:48:42 -07002523 fl4->saddr = fl4->daddr;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002524 }
David Ahern5f02ce242016-09-10 12:09:54 -07002525
2526 /* L3 master device is the loopback for that domain */
David Ahern3abd1ade2017-05-25 10:42:33 -07002527 dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? :
Robert Shearmanb7c84872017-04-21 21:34:59 +01002528 net->loopback_dev;
David Ahern839da4d2017-08-10 13:49:10 -07002529
2530 /* make sure orig_oif points to fib result device even
2531 * though packet rx/tx happens over loopback or l3mdev
2532 */
2533 orig_oif = FIB_RES_OIF(*res);
2534
David S. Miller813b3b52011-04-28 14:48:42 -07002535 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002536 flags |= RTCF_LOCAL;
2537 goto make_route;
2538 }
2539
David Ahern3abd1ade2017-05-25 10:42:33 -07002540 fib_select_path(net, res, fl4, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002541
David Ahern3abd1ade2017-05-25 10:42:33 -07002542 dev_out = FIB_RES_DEV(*res);
David S. Miller813b3b52011-04-28 14:48:42 -07002543 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002544
2545
2546make_route:
David Ahern3abd1ade2017-05-25 10:42:33 -07002547 rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002548
David S. Miller010c2702011-02-17 15:37:09 -08002549out:
David S. Millerb23dd4f2011-03-02 14:31:35 -08002550 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002551}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002552
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002553static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2554{
2555 return NULL;
2556}
2557
Steffen Klassertebb762f2011-11-23 02:12:51 +00002558static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
Roland Dreierec831ea2011-01-31 13:16:00 -08002559{
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002560 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2561
2562 return mtu ? : dst->dev->mtu;
Roland Dreierec831ea2011-01-31 13:16:00 -08002563}
2564
David S. Miller6700c272012-07-17 03:29:28 -07002565static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2566 struct sk_buff *skb, u32 mtu)
David S. Miller14e50e52007-05-24 18:17:54 -07002567{
2568}
2569
David S. Miller6700c272012-07-17 03:29:28 -07002570static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2571 struct sk_buff *skb)
David S. Millerb587ee32012-07-12 00:39:24 -07002572{
2573}
2574
Held Bernhard0972ddb2011-04-24 22:07:32 +00002575static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2576 unsigned long old)
2577{
2578 return NULL;
2579}
2580
David S. Miller14e50e52007-05-24 18:17:54 -07002581static struct dst_ops ipv4_dst_blackhole_ops = {
2582 .family = AF_INET,
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002583 .check = ipv4_blackhole_dst_check,
Steffen Klassertebb762f2011-11-23 02:12:51 +00002584 .mtu = ipv4_blackhole_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -08002585 .default_advmss = ipv4_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -07002586 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
David S. Millerb587ee32012-07-12 00:39:24 -07002587 .redirect = ipv4_rt_blackhole_redirect,
Held Bernhard0972ddb2011-04-24 22:07:32 +00002588 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -07002589 .neigh_lookup = ipv4_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -07002590};
2591
David S. Miller2774c132011-03-01 14:59:04 -08002592struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -07002593{
David S. Miller2774c132011-03-01 14:59:04 -08002594 struct rtable *ort = (struct rtable *) dst_orig;
David S. Millerf5b0a872012-07-19 12:31:33 -07002595 struct rtable *rt;
David S. Miller14e50e52007-05-24 18:17:54 -07002596
Steffen Klassert6c0e7282017-10-09 08:43:55 +02002597 rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0);
David S. Miller14e50e52007-05-24 18:17:54 -07002598 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002599 struct dst_entry *new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -07002600
David S. Miller14e50e52007-05-24 18:17:54 -07002601 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -08002602 new->input = dst_discard;
Eric W. Biedermanede20592015-10-07 16:48:47 -05002603 new->output = dst_discard_out;
David S. Miller14e50e52007-05-24 18:17:54 -07002604
Wei Wang1dbe32522017-06-17 10:42:26 -07002605 new->dev = net->loopback_dev;
David S. Miller14e50e52007-05-24 18:17:54 -07002606 if (new->dev)
2607 dev_hold(new->dev);
2608
David S. Miller9917e1e82012-07-17 14:44:26 -07002609 rt->rt_is_input = ort->rt_is_input;
David S. Miller5e2b61f2011-03-04 21:47:09 -08002610 rt->rt_iif = ort->rt_iif;
David S. Miller59436342012-07-10 06:58:42 -07002611 rt->rt_pmtu = ort->rt_pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01002612 rt->rt_mtu_locked = ort->rt_mtu_locked;
David S. Miller14e50e52007-05-24 18:17:54 -07002613
fan.duca4c3fc2013-07-30 08:33:53 +08002614 rt->rt_genid = rt_genid_ipv4(net);
David S. Miller14e50e52007-05-24 18:17:54 -07002615 rt->rt_flags = ort->rt_flags;
2616 rt->rt_type = ort->rt_type;
David Ahern1550c172019-04-05 16:30:27 -07002617 rt->rt_gw_family = ort->rt_gw_family;
2618 if (rt->rt_gw_family == AF_INET)
2619 rt->rt_gw4 = ort->rt_gw4;
David Ahern0f5f7d72019-04-05 16:30:29 -07002620 else if (rt->rt_gw_family == AF_INET6)
2621 rt->rt_gw6 = ort->rt_gw6;
David S. Miller14e50e52007-05-24 18:17:54 -07002622
David S. Millercaacf052012-07-31 15:06:50 -07002623 INIT_LIST_HEAD(&rt->rt_uncached);
David S. Miller14e50e52007-05-24 18:17:54 -07002624 }
2625
David S. Miller2774c132011-03-01 14:59:04 -08002626 dst_release(dst_orig);
2627
2628 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -07002629}
2630
David S. Miller9d6ec932011-03-12 01:12:47 -05002631struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
Eric Dumazet6f9c9612015-09-25 07:39:10 -07002632 const struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002633{
David S. Miller9d6ec932011-03-12 01:12:47 -05002634 struct rtable *rt = __ip_route_output_key(net, flp4);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002635
David S. Millerb23dd4f2011-03-02 14:31:35 -08002636 if (IS_ERR(rt))
2637 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002638
David S. Miller56157872011-05-02 14:37:45 -07002639 if (flp4->flowi4_proto)
Steffen Klassertf92ee612014-09-16 10:08:40 +02002640 rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
2641 flowi4_to_flowi(flp4),
2642 sk, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002643
David S. Millerb23dd4f2011-03-02 14:31:35 -08002644 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002645}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002646EXPORT_SYMBOL_GPL(ip_route_output_flow);
2647
David Ahern3765d352017-05-25 10:42:36 -07002648/* called with rcu_read_lock held */
Roopa Prabhu404eb772018-05-22 14:03:27 -07002649static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
2650 struct rtable *rt, u32 table_id, struct flowi4 *fl4,
2651 struct sk_buff *skb, u32 portid, u32 seq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002652{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002653 struct rtmsg *r;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002654 struct nlmsghdr *nlh;
Steffen Klassert2bc8ca42011-10-11 01:12:02 +00002655 unsigned long expires = 0;
David S. Millerf1850712012-07-10 07:26:01 -07002656 u32 error;
Julian Anastasov521f5492012-07-20 12:02:08 +03002657 u32 metrics[RTAX_MAX];
Thomas Grafbe403ea2006-08-17 18:15:17 -07002658
David Ahernd3166e02017-05-25 10:42:35 -07002659 nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), 0);
Ian Morris51456b22015-04-03 09:17:26 +01002660 if (!nlh)
Patrick McHardy26932562007-01-31 23:16:40 -08002661 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002662
2663 r = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002664 r->rtm_family = AF_INET;
2665 r->rtm_dst_len = 32;
2666 r->rtm_src_len = 0;
David Millerd6c0a4f2012-07-01 02:02:59 +00002667 r->rtm_tos = fl4->flowi4_tos;
David Ahern8a430ed2017-01-11 15:42:17 -08002668 r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT;
David Ahernc36ba662015-09-02 13:58:36 -07002669 if (nla_put_u32(skb, RTA_TABLE, table_id))
David S. Millerf3756b72012-04-01 20:39:02 -04002670 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002671 r->rtm_type = rt->rt_type;
2672 r->rtm_scope = RT_SCOPE_UNIVERSE;
2673 r->rtm_protocol = RTPROT_UNSPEC;
2674 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2675 if (rt->rt_flags & RTCF_NOTIFY)
2676 r->rtm_flags |= RTM_F_NOTIFY;
Hannes Frederic Sowadf4d9252015-01-23 12:01:26 +01002677 if (IPCB(skb)->flags & IPSKB_DOREDIRECT)
2678 r->rtm_flags |= RTCF_DOREDIRECT;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002679
Jiri Benc930345e2015-03-29 16:59:25 +02002680 if (nla_put_in_addr(skb, RTA_DST, dst))
David S. Millerf3756b72012-04-01 20:39:02 -04002681 goto nla_put_failure;
David Miller1a00fee2012-07-01 02:02:56 +00002682 if (src) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002683 r->rtm_src_len = 32;
Jiri Benc930345e2015-03-29 16:59:25 +02002684 if (nla_put_in_addr(skb, RTA_SRC, src))
David S. Millerf3756b72012-04-01 20:39:02 -04002685 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002686 }
David S. Millerf3756b72012-04-01 20:39:02 -04002687 if (rt->dst.dev &&
2688 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2689 goto nla_put_failure;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002690#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf3756b72012-04-01 20:39:02 -04002691 if (rt->dst.tclassid &&
2692 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2693 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002694#endif
David S. Miller41347dc2012-06-28 04:05:27 -07002695 if (!rt_is_input_route(rt) &&
David Millerd6c0a4f2012-07-01 02:02:59 +00002696 fl4->saddr != src) {
Jiri Benc930345e2015-03-29 16:59:25 +02002697 if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
David S. Millerf3756b72012-04-01 20:39:02 -04002698 goto nla_put_failure;
2699 }
David Ahern1550c172019-04-05 16:30:27 -07002700 if (rt->rt_gw_family == AF_INET &&
David Ahern0f5f7d72019-04-05 16:30:29 -07002701 nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) {
David S. Millerf3756b72012-04-01 20:39:02 -04002702 goto nla_put_failure;
David Ahern0f5f7d72019-04-05 16:30:29 -07002703 } else if (rt->rt_gw_family == AF_INET6) {
2704 int alen = sizeof(struct in6_addr);
2705 struct nlattr *nla;
2706 struct rtvia *via;
2707
2708 nla = nla_reserve(skb, RTA_VIA, alen + 2);
2709 if (!nla)
2710 goto nla_put_failure;
2711
2712 via = nla_data(nla);
2713 via->rtvia_family = AF_INET6;
2714 memcpy(via->rtvia_addr, &rt->rt_gw6, alen);
2715 }
Thomas Grafbe403ea2006-08-17 18:15:17 -07002716
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002717 expires = rt->dst.expires;
2718 if (expires) {
2719 unsigned long now = jiffies;
2720
2721 if (time_before(now, expires))
2722 expires -= now;
2723 else
2724 expires = 0;
2725 }
2726
Julian Anastasov521f5492012-07-20 12:02:08 +03002727 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002728 if (rt->rt_pmtu && expires)
Julian Anastasov521f5492012-07-20 12:02:08 +03002729 metrics[RTAX_MTU - 1] = rt->rt_pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01002730 if (rt->rt_mtu_locked && expires)
2731 metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU);
Julian Anastasov521f5492012-07-20 12:02:08 +03002732 if (rtnetlink_put_metrics(skb, metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -07002733 goto nla_put_failure;
2734
David Millerb4869882012-07-01 02:03:01 +00002735 if (fl4->flowi4_mark &&
stephen hemminger68aaed52012-10-10 08:27:25 +00002736 nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
David S. Millerf3756b72012-04-01 20:39:02 -04002737 goto nla_put_failure;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002738
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002739 if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
2740 nla_put_u32(skb, RTA_UID,
2741 from_kuid_munged(current_user_ns(), fl4->flowi4_uid)))
2742 goto nla_put_failure;
2743
Changli Gaod8d1f302010-06-10 23:31:35 -07002744 error = rt->dst.error;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002745
David S. Millerc7537962010-11-11 17:07:48 -08002746 if (rt_is_input_route(rt)) {
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002747#ifdef CONFIG_IP_MROUTE
2748 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2749 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2750 int err = ipmr_get_route(net, skb,
2751 fl4->saddr, fl4->daddr,
David Ahern9f09eae2017-01-06 17:39:06 -08002752 r, portid);
Nikolay Aleksandrov2cf75072016-09-25 23:08:31 +02002753
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002754 if (err <= 0) {
David Ahern0c8d8032017-01-05 19:32:46 -08002755 if (err == 0)
2756 return 0;
2757 goto nla_put_failure;
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002758 }
2759 } else
2760#endif
Roopa Prabhu404eb772018-05-22 14:03:27 -07002761 if (nla_put_u32(skb, RTA_IIF, fl4->flowi4_iif))
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002762 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002763 }
2764
David S. Millerf1850712012-07-10 07:26:01 -07002765 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002766 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002767
Johannes Berg053c0952015-01-16 22:09:00 +01002768 nlmsg_end(skb, nlh);
2769 return 0;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002770
2771nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002772 nlmsg_cancel(skb, nlh);
2773 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002774}
2775
Roopa Prabhu404eb772018-05-22 14:03:27 -07002776static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst,
2777 u8 ip_proto, __be16 sport,
2778 __be16 dport)
2779{
2780 struct sk_buff *skb;
2781 struct iphdr *iph;
2782
2783 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2784 if (!skb)
2785 return NULL;
2786
2787 /* Reserve room for dummy headers, this skb can pass
2788 * through good chunk of routing engine.
2789 */
2790 skb_reset_mac_header(skb);
2791 skb_reset_network_header(skb);
2792 skb->protocol = htons(ETH_P_IP);
2793 iph = skb_put(skb, sizeof(struct iphdr));
2794 iph->protocol = ip_proto;
2795 iph->saddr = src;
2796 iph->daddr = dst;
2797 iph->version = 0x4;
2798 iph->frag_off = 0;
2799 iph->ihl = 0x5;
2800 skb_set_transport_header(skb, skb->len);
2801
2802 switch (iph->protocol) {
2803 case IPPROTO_UDP: {
2804 struct udphdr *udph;
2805
2806 udph = skb_put_zero(skb, sizeof(struct udphdr));
2807 udph->source = sport;
2808 udph->dest = dport;
2809 udph->len = sizeof(struct udphdr);
2810 udph->check = 0;
2811 break;
2812 }
2813 case IPPROTO_TCP: {
2814 struct tcphdr *tcph;
2815
2816 tcph = skb_put_zero(skb, sizeof(struct tcphdr));
2817 tcph->source = sport;
2818 tcph->dest = dport;
2819 tcph->doff = sizeof(struct tcphdr) / 4;
2820 tcph->rst = 1;
2821 tcph->check = ~tcp_v4_check(sizeof(struct tcphdr),
2822 src, dst, 0);
2823 break;
2824 }
2825 case IPPROTO_ICMP: {
2826 struct icmphdr *icmph;
2827
2828 icmph = skb_put_zero(skb, sizeof(struct icmphdr));
2829 icmph->type = ICMP_ECHO;
2830 icmph->code = 0;
2831 }
2832 }
2833
2834 return skb;
2835}
2836
Jakub Kicinskia00302b62019-01-18 10:46:19 -08002837static int inet_rtm_valid_getroute_req(struct sk_buff *skb,
2838 const struct nlmsghdr *nlh,
2839 struct nlattr **tb,
2840 struct netlink_ext_ack *extack)
2841{
2842 struct rtmsg *rtm;
2843 int i, err;
2844
2845 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
2846 NL_SET_ERR_MSG(extack,
2847 "ipv4: Invalid header for route get request");
2848 return -EINVAL;
2849 }
2850
2851 if (!netlink_strict_get_check(skb))
2852 return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX,
2853 rtm_ipv4_policy, extack);
2854
2855 rtm = nlmsg_data(nlh);
2856 if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) ||
2857 (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) ||
2858 rtm->rtm_table || rtm->rtm_protocol ||
2859 rtm->rtm_scope || rtm->rtm_type) {
2860 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for route get request");
2861 return -EINVAL;
2862 }
2863
2864 if (rtm->rtm_flags & ~(RTM_F_NOTIFY |
2865 RTM_F_LOOKUP_TABLE |
2866 RTM_F_FIB_MATCH)) {
2867 NL_SET_ERR_MSG(extack, "ipv4: Unsupported rtm_flags for route get request");
2868 return -EINVAL;
2869 }
2870
2871 err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
2872 rtm_ipv4_policy, extack);
2873 if (err)
2874 return err;
2875
2876 if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
2877 (tb[RTA_DST] && !rtm->rtm_dst_len)) {
2878 NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4");
2879 return -EINVAL;
2880 }
2881
2882 for (i = 0; i <= RTA_MAX; i++) {
2883 if (!tb[i])
2884 continue;
2885
2886 switch (i) {
2887 case RTA_IIF:
2888 case RTA_OIF:
2889 case RTA_SRC:
2890 case RTA_DST:
2891 case RTA_IP_PROTO:
2892 case RTA_SPORT:
2893 case RTA_DPORT:
2894 case RTA_MARK:
2895 case RTA_UID:
2896 break;
2897 default:
2898 NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in route get request");
2899 return -EINVAL;
2900 }
2901 }
2902
2903 return 0;
2904}
2905
David Ahernc21ef3e2017-04-16 09:48:24 -07002906static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2907 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002908{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002909 struct net *net = sock_net(in_skb->sk);
Thomas Grafd889ce32006-08-17 18:15:44 -07002910 struct nlattr *tb[RTA_MAX+1];
Roopa Prabhu404eb772018-05-22 14:03:27 -07002911 u32 table_id = RT_TABLE_MAIN;
2912 __be16 sport = 0, dport = 0;
David Ahern3765d352017-05-25 10:42:36 -07002913 struct fib_result res = {};
Roopa Prabhu404eb772018-05-22 14:03:27 -07002914 u8 ip_proto = IPPROTO_UDP;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002915 struct rtable *rt = NULL;
Roopa Prabhu404eb772018-05-22 14:03:27 -07002916 struct sk_buff *skb;
2917 struct rtmsg *rtm;
Maciej Żenczykowskie8e3fbe2018-09-29 23:44:47 -07002918 struct flowi4 fl4 = {};
Al Viro9e12bb22006-09-26 21:25:20 -07002919 __be32 dst = 0;
2920 __be32 src = 0;
Roopa Prabhu404eb772018-05-22 14:03:27 -07002921 kuid_t uid;
Al Viro9e12bb22006-09-26 21:25:20 -07002922 u32 iif;
Thomas Grafd889ce32006-08-17 18:15:44 -07002923 int err;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002924 int mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002925
Jakub Kicinskia00302b62019-01-18 10:46:19 -08002926 err = inet_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
Thomas Grafd889ce32006-08-17 18:15:44 -07002927 if (err < 0)
Roopa Prabhu404eb772018-05-22 14:03:27 -07002928 return err;
Thomas Grafd889ce32006-08-17 18:15:44 -07002929
2930 rtm = nlmsg_data(nlh);
Jiri Benc67b61f62015-03-29 16:59:26 +02002931 src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
2932 dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
Thomas Grafd889ce32006-08-17 18:15:44 -07002933 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002934 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002935 if (tb[RTA_UID])
2936 uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID]));
2937 else
2938 uid = (iif ? INVALID_UID : current_uid());
Linus Torvalds1da177e2005-04-16 15:20:36 -07002939
Roopa Prabhu404eb772018-05-22 14:03:27 -07002940 if (tb[RTA_IP_PROTO]) {
2941 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
Hangbin Liu5e1a99e2019-02-27 16:15:29 +08002942 &ip_proto, AF_INET, extack);
Roopa Prabhu404eb772018-05-22 14:03:27 -07002943 if (err)
2944 return err;
2945 }
Florian Laryschbbadb9a2017-04-07 14:42:20 +02002946
Roopa Prabhu404eb772018-05-22 14:03:27 -07002947 if (tb[RTA_SPORT])
2948 sport = nla_get_be16(tb[RTA_SPORT]);
2949
2950 if (tb[RTA_DPORT])
2951 dport = nla_get_be16(tb[RTA_DPORT]);
2952
2953 skb = inet_rtm_getroute_build_skb(src, dst, ip_proto, sport, dport);
2954 if (!skb)
2955 return -ENOBUFS;
Florian Laryschbbadb9a2017-04-07 14:42:20 +02002956
David Millerd6c0a4f2012-07-01 02:02:59 +00002957 fl4.daddr = dst;
2958 fl4.saddr = src;
2959 fl4.flowi4_tos = rtm->rtm_tos;
2960 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
2961 fl4.flowi4_mark = mark;
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002962 fl4.flowi4_uid = uid;
Roopa Prabhu404eb772018-05-22 14:03:27 -07002963 if (sport)
2964 fl4.fl4_sport = sport;
2965 if (dport)
2966 fl4.fl4_dport = dport;
2967 fl4.flowi4_proto = ip_proto;
David Millerd6c0a4f2012-07-01 02:02:59 +00002968
David Ahern3765d352017-05-25 10:42:36 -07002969 rcu_read_lock();
2970
Linus Torvalds1da177e2005-04-16 15:20:36 -07002971 if (iif) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002972 struct net_device *dev;
2973
David Ahern3765d352017-05-25 10:42:36 -07002974 dev = dev_get_by_index_rcu(net, iif);
Ian Morris51456b22015-04-03 09:17:26 +01002975 if (!dev) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002976 err = -ENODEV;
Roopa Prabhu404eb772018-05-22 14:03:27 -07002977 goto errout_rcu;
Thomas Grafd889ce32006-08-17 18:15:44 -07002978 }
2979
Roopa Prabhu404eb772018-05-22 14:03:27 -07002980 fl4.flowi4_iif = iif; /* for rt_fill_info */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002981 skb->dev = dev;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002982 skb->mark = mark;
David Ahern3765d352017-05-25 10:42:36 -07002983 err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos,
2984 dev, &res);
Thomas Grafd889ce32006-08-17 18:15:44 -07002985
Eric Dumazet511c3f92009-06-02 05:14:27 +00002986 rt = skb_rtable(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07002987 if (err == 0 && rt->dst.error)
2988 err = -rt->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002989 } else {
Lorenzo Colitti6503a302018-01-11 18:36:26 +09002990 fl4.flowi4_iif = LOOPBACK_IFINDEX;
Ido Schimmel21f94772018-12-20 17:03:27 +00002991 skb->dev = net->loopback_dev;
David Ahern3765d352017-05-25 10:42:36 -07002992 rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002993 err = 0;
2994 if (IS_ERR(rt))
2995 err = PTR_ERR(rt);
Florian Westphal2c87d632017-08-14 00:52:58 +02002996 else
2997 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002998 }
Thomas Grafd889ce32006-08-17 18:15:44 -07002999
Linus Torvalds1da177e2005-04-16 15:20:36 -07003000 if (err)
Roopa Prabhu404eb772018-05-22 14:03:27 -07003001 goto errout_rcu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003002
Linus Torvalds1da177e2005-04-16 15:20:36 -07003003 if (rtm->rtm_flags & RTM_F_NOTIFY)
3004 rt->rt_flags |= RTCF_NOTIFY;
3005
David Ahernc36ba662015-09-02 13:58:36 -07003006 if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
David Ahern68e813a2018-02-14 14:24:28 -08003007 table_id = res.table ? res.table->tb_id : 0;
David Ahernc36ba662015-09-02 13:58:36 -07003008
Roopa Prabhu404eb772018-05-22 14:03:27 -07003009 /* reset skb for netlink reply msg */
3010 skb_trim(skb, 0);
3011 skb_reset_network_header(skb);
3012 skb_reset_transport_header(skb);
3013 skb_reset_mac_header(skb);
3014
Roopa Prabhubc3aae22017-08-16 12:38:52 -07003015 if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
3016 if (!res.fi) {
3017 err = fib_props[res.type].error;
3018 if (!err)
3019 err = -EHOSTUNREACH;
Roopa Prabhu404eb772018-05-22 14:03:27 -07003020 goto errout_rcu;
Roopa Prabhubc3aae22017-08-16 12:38:52 -07003021 }
Roopa Prabhub6179812017-05-25 10:42:39 -07003022 err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
3023 nlh->nlmsg_seq, RTM_NEWROUTE, table_id,
3024 rt->rt_type, res.prefix, res.prefixlen,
3025 fl4.flowi4_tos, res.fi, 0);
Roopa Prabhubc3aae22017-08-16 12:38:52 -07003026 } else {
Roopa Prabhu404eb772018-05-22 14:03:27 -07003027 err = rt_fill_info(net, dst, src, rt, table_id, &fl4, skb,
Roopa Prabhuba52d612017-05-31 22:53:25 -07003028 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq);
Roopa Prabhubc3aae22017-08-16 12:38:52 -07003029 }
David S. Miller7b46a642015-01-18 23:36:08 -05003030 if (err < 0)
Roopa Prabhu404eb772018-05-22 14:03:27 -07003031 goto errout_rcu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003032
David Ahern3765d352017-05-25 10:42:36 -07003033 rcu_read_unlock();
3034
Eric W. Biederman15e47302012-09-07 20:12:54 +00003035 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003036
Thomas Grafd889ce32006-08-17 18:15:44 -07003037errout_free:
Roopa Prabhu404eb772018-05-22 14:03:27 -07003038 return err;
3039errout_rcu:
David Ahern3765d352017-05-25 10:42:36 -07003040 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003041 kfree_skb(skb);
Roopa Prabhu404eb772018-05-22 14:03:27 -07003042 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003043}
3044
Linus Torvalds1da177e2005-04-16 15:20:36 -07003045void ip_rt_multicast_event(struct in_device *in_dev)
3046{
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00003047 rt_cache_flush(dev_net(in_dev->dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07003048}
3049
3050#ifdef CONFIG_SYSCTL
Gao feng082c7ca2013-02-19 00:43:12 +00003051static int ip_rt_gc_interval __read_mostly = 60 * HZ;
3052static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
3053static int ip_rt_gc_elasticity __read_mostly = 8;
Arnd Bergmann773daa32018-02-28 14:32:48 +01003054static int ip_min_valid_pmtu __read_mostly = IPV4_MIN_MTU;
Gao feng082c7ca2013-02-19 00:43:12 +00003055
Joe Perchesfe2c6332013-06-11 23:04:25 -07003056static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07003057 void __user *buffer,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003058 size_t *lenp, loff_t *ppos)
3059{
Timo Teräs5aad1de2013-05-27 20:46:33 +00003060 struct net *net = (struct net *)__ctl->extra1;
3061
Linus Torvalds1da177e2005-04-16 15:20:36 -07003062 if (write) {
Timo Teräs5aad1de2013-05-27 20:46:33 +00003063 rt_cache_flush(net);
3064 fnhe_genid_bump(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003065 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09003066 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003067
3068 return -EINVAL;
3069}
3070
Joe Perchesfe2c6332013-06-11 23:04:25 -07003071static struct ctl_table ipv4_route_table[] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09003072 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003073 .procname = "gc_thresh",
3074 .data = &ipv4_dst_ops.gc_thresh,
3075 .maxlen = sizeof(int),
3076 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003077 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003078 },
3079 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003080 .procname = "max_size",
3081 .data = &ip_rt_max_size,
3082 .maxlen = sizeof(int),
3083 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003084 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003085 },
3086 {
3087 /* Deprecated. Use gc_min_interval_ms */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09003088
Linus Torvalds1da177e2005-04-16 15:20:36 -07003089 .procname = "gc_min_interval",
3090 .data = &ip_rt_gc_min_interval,
3091 .maxlen = sizeof(int),
3092 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003093 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003094 },
3095 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003096 .procname = "gc_min_interval_ms",
3097 .data = &ip_rt_gc_min_interval,
3098 .maxlen = sizeof(int),
3099 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003100 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003101 },
3102 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003103 .procname = "gc_timeout",
3104 .data = &ip_rt_gc_timeout,
3105 .maxlen = sizeof(int),
3106 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003107 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003108 },
3109 {
Eric Dumazet9f28a2f2011-12-21 15:47:16 -05003110 .procname = "gc_interval",
3111 .data = &ip_rt_gc_interval,
3112 .maxlen = sizeof(int),
3113 .mode = 0644,
3114 .proc_handler = proc_dointvec_jiffies,
3115 },
3116 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003117 .procname = "redirect_load",
3118 .data = &ip_rt_redirect_load,
3119 .maxlen = sizeof(int),
3120 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003121 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003122 },
3123 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003124 .procname = "redirect_number",
3125 .data = &ip_rt_redirect_number,
3126 .maxlen = sizeof(int),
3127 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003128 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003129 },
3130 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003131 .procname = "redirect_silence",
3132 .data = &ip_rt_redirect_silence,
3133 .maxlen = sizeof(int),
3134 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003135 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003136 },
3137 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003138 .procname = "error_cost",
3139 .data = &ip_rt_error_cost,
3140 .maxlen = sizeof(int),
3141 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003142 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003143 },
3144 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003145 .procname = "error_burst",
3146 .data = &ip_rt_error_burst,
3147 .maxlen = sizeof(int),
3148 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003149 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003150 },
3151 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003152 .procname = "gc_elasticity",
3153 .data = &ip_rt_gc_elasticity,
3154 .maxlen = sizeof(int),
3155 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003156 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003157 },
3158 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003159 .procname = "mtu_expires",
3160 .data = &ip_rt_mtu_expires,
3161 .maxlen = sizeof(int),
3162 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003163 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003164 },
3165 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003166 .procname = "min_pmtu",
3167 .data = &ip_rt_min_pmtu,
3168 .maxlen = sizeof(int),
3169 .mode = 0644,
Sabrina Dubrocac7272c22018-02-26 16:13:43 +01003170 .proc_handler = proc_dointvec_minmax,
3171 .extra1 = &ip_min_valid_pmtu,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003172 },
3173 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003174 .procname = "min_adv_mss",
3175 .data = &ip_rt_min_advmss,
3176 .maxlen = sizeof(int),
3177 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003178 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003179 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08003180 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003181};
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003182
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003183static struct ctl_table ipv4_route_flush_table[] = {
3184 {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003185 .procname = "flush",
3186 .maxlen = sizeof(int),
3187 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08003188 .proc_handler = ipv4_sysctl_rtcache_flush,
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003189 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08003190 { },
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003191};
3192
3193static __net_init int sysctl_route_net_init(struct net *net)
3194{
3195 struct ctl_table *tbl;
3196
3197 tbl = ipv4_route_flush_table;
Octavian Purdila09ad9bc2009-11-25 15:14:13 -08003198 if (!net_eq(net, &init_net)) {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003199 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
Ian Morris51456b22015-04-03 09:17:26 +01003200 if (!tbl)
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003201 goto err_dup;
Eric W. Biederman464dc802012-11-16 03:02:59 +00003202
3203 /* Don't export sysctls to unprivileged users */
3204 if (net->user_ns != &init_user_ns)
3205 tbl[0].procname = NULL;
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003206 }
3207 tbl[0].extra1 = net;
3208
Eric W. Biedermanec8f23c2012-04-19 13:44:49 +00003209 net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
Ian Morris51456b22015-04-03 09:17:26 +01003210 if (!net->ipv4.route_hdr)
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003211 goto err_reg;
3212 return 0;
3213
3214err_reg:
3215 if (tbl != ipv4_route_flush_table)
3216 kfree(tbl);
3217err_dup:
3218 return -ENOMEM;
3219}
3220
3221static __net_exit void sysctl_route_net_exit(struct net *net)
3222{
3223 struct ctl_table *tbl;
3224
3225 tbl = net->ipv4.route_hdr->ctl_table_arg;
3226 unregister_net_sysctl_table(net->ipv4.route_hdr);
3227 BUG_ON(tbl == ipv4_route_flush_table);
3228 kfree(tbl);
3229}
3230
3231static __net_initdata struct pernet_operations sysctl_route_ops = {
3232 .init = sysctl_route_net_init,
3233 .exit = sysctl_route_net_exit,
3234};
Linus Torvalds1da177e2005-04-16 15:20:36 -07003235#endif
3236
Neil Horman3ee94372010-05-08 01:57:52 -07003237static __net_init int rt_genid_init(struct net *net)
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003238{
fan.duca4c3fc2013-07-30 08:33:53 +08003239 atomic_set(&net->ipv4.rt_genid, 0);
Timo Teräs5aad1de2013-05-27 20:46:33 +00003240 atomic_set(&net->fnhe_genid, 0);
Jason A. Donenfeld7aed9f72017-06-07 23:01:20 -04003241 atomic_set(&net->ipv4.dev_addr_genid, get_random_int());
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003242 return 0;
3243}
3244
Neil Horman3ee94372010-05-08 01:57:52 -07003245static __net_initdata struct pernet_operations rt_genid_ops = {
3246 .init = rt_genid_init,
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003247};
3248
David S. Millerc3426b42012-06-09 16:27:05 -07003249static int __net_init ipv4_inetpeer_init(struct net *net)
3250{
3251 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3252
3253 if (!bp)
3254 return -ENOMEM;
3255 inet_peer_base_init(bp);
3256 net->ipv4.peers = bp;
3257 return 0;
3258}
3259
3260static void __net_exit ipv4_inetpeer_exit(struct net *net)
3261{
3262 struct inet_peer_base *bp = net->ipv4.peers;
3263
3264 net->ipv4.peers = NULL;
David S. Miller56a6b242012-06-09 16:32:41 -07003265 inetpeer_invalidate_tree(bp);
David S. Millerc3426b42012-06-09 16:27:05 -07003266 kfree(bp);
3267}
3268
3269static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
3270 .init = ipv4_inetpeer_init,
3271 .exit = ipv4_inetpeer_exit,
3272};
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003273
Patrick McHardyc7066f72011-01-14 13:36:42 +01003274#ifdef CONFIG_IP_ROUTE_CLASSID
Tejun Heo7d720c32010-02-16 15:20:26 +00003275struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
Patrick McHardyc7066f72011-01-14 13:36:42 +01003276#endif /* CONFIG_IP_ROUTE_CLASSID */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003277
Linus Torvalds1da177e2005-04-16 15:20:36 -07003278int __init ip_rt_init(void)
3279{
Eric Dumazet5055c372015-01-14 15:17:06 -08003280 int cpu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003281
Kees Cook6da2ec52018-06-12 13:55:00 -07003282 ip_idents = kmalloc_array(IP_IDENTS_SZ, sizeof(*ip_idents),
3283 GFP_KERNEL);
Eric Dumazet73f156a2014-06-02 05:26:03 -07003284 if (!ip_idents)
3285 panic("IP: failed to allocate ip_idents\n");
3286
3287 prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
3288
Eric Dumazet355b590c2015-05-01 10:37:49 -07003289 ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL);
3290 if (!ip_tstamps)
3291 panic("IP: failed to allocate ip_tstamps\n");
3292
Eric Dumazet5055c372015-01-14 15:17:06 -08003293 for_each_possible_cpu(cpu) {
3294 struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
3295
3296 INIT_LIST_HEAD(&ul->head);
3297 spin_lock_init(&ul->lock);
3298 }
Patrick McHardyc7066f72011-01-14 13:36:42 +01003299#ifdef CONFIG_IP_ROUTE_CLASSID
Ingo Molnar0dcec8c2009-02-25 14:07:33 +01003300 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
Linus Torvalds1da177e2005-04-16 15:20:36 -07003301 if (!ip_rt_acct)
3302 panic("IP: failed to allocate ip_rt_acct\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003303#endif
3304
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07003305 ipv4_dst_ops.kmem_cachep =
3306 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09003307 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003308
David S. Miller14e50e52007-05-24 18:17:54 -07003309 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
3310
Eric Dumazetfc66f952010-10-08 06:37:34 +00003311 if (dst_entries_init(&ipv4_dst_ops) < 0)
3312 panic("IP: failed to allocate ipv4_dst_ops counter\n");
3313
3314 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
3315 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
3316
David S. Miller89aef892012-07-17 11:00:09 -07003317 ipv4_dst_ops.gc_thresh = ~0;
3318 ip_rt_max_size = INT_MAX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003319
Linus Torvalds1da177e2005-04-16 15:20:36 -07003320 devinet_init();
3321 ip_fib_init();
3322
Denis V. Lunev73b38712008-02-28 20:51:18 -08003323 if (ip_rt_proc_init())
Joe Perches058bd4d2012-03-11 18:36:11 +00003324 pr_err("Unable to create route proc files\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003325#ifdef CONFIG_XFRM
3326 xfrm_init();
Steffen Klassert703fb942012-11-13 08:52:24 +01003327 xfrm4_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003328#endif
Florian Westphal394f51a2017-08-15 16:34:44 +02003329 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL,
3330 RTNL_FLAG_DOIT_UNLOCKED);
Thomas Graf63f34442007-03-22 11:55:17 -07003331
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003332#ifdef CONFIG_SYSCTL
3333 register_pernet_subsys(&sysctl_route_ops);
3334#endif
Neil Horman3ee94372010-05-08 01:57:52 -07003335 register_pernet_subsys(&rt_genid_ops);
David S. Millerc3426b42012-06-09 16:27:05 -07003336 register_pernet_subsys(&ipv4_inetpeer_ops);
Tim Hansen1bcdca32017-10-04 15:59:49 -04003337 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003338}
3339
Al Viroa1bc6eb2008-07-30 06:32:52 -04003340#ifdef CONFIG_SYSCTL
Al Viroeeb61f72008-07-27 08:59:33 +01003341/*
3342 * We really need to sanitize the damn ipv4 init order, then all
3343 * this nonsense will go away.
3344 */
3345void __init ip_static_sysctl_init(void)
3346{
Eric W. Biederman4e5ca782012-04-19 13:32:39 +00003347 register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
Al Viroeeb61f72008-07-27 08:59:33 +01003348}
Al Viroa1bc6eb2008-07-30 06:32:52 -04003349#endif