blob: 8322e479f2997b8f0a97b7d6c87434e91471a941 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * ROUTE - implementation of the IP router.
7 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07008 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
12 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 *
14 * Fixes:
15 * Alan Cox : Verify area fixes.
16 * Alan Cox : cli() protects routing changes
17 * Rui Oliveira : ICMP routing table updates
18 * (rco@di.uminho.pt) Routing table insertion and update
19 * Linus Torvalds : Rewrote bits to be sensible
20 * Alan Cox : Added BSD route gw semantics
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090021 * Alan Cox : Super /proc >4K
Linus Torvalds1da177e2005-04-16 15:20:36 -070022 * Alan Cox : MTU in route table
23 * Alan Cox : MSS actually. Also added the window
24 * clamper.
25 * Sam Lantinga : Fixed route matching in rt_del()
26 * Alan Cox : Routing cache support.
27 * Alan Cox : Removed compatibility cruft.
28 * Alan Cox : RTF_REJECT support.
29 * Alan Cox : TCP irtt support.
30 * Jonathan Naylor : Added Metric support.
31 * Miquel van Smoorenburg : BSD API fixes.
32 * Miquel van Smoorenburg : Metrics.
33 * Alan Cox : Use __u32 properly
34 * Alan Cox : Aligned routing errors more closely with BSD
35 * our system is still very different.
36 * Alan Cox : Faster /proc handling
37 * Alexey Kuznetsov : Massive rework to support tree based routing,
38 * routing caches and better behaviour.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090039 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 * Olaf Erb : irtt wasn't being copied right.
41 * Bjorn Ekwall : Kerneld route support.
42 * Alan Cox : Multicast fixed (I hope)
43 * Pavel Krauz : Limited broadcast fixed
44 * Mike McLagan : Routing by source
45 * Alexey Kuznetsov : End of old history. Split to fib.c and
46 * route.c and rewritten from scratch.
47 * Andi Kleen : Load-limit warning messages.
48 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
49 * Vitaly E. Lavrov : Race condition in ip_route_input_slow.
50 * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow.
51 * Vladimir V. Ivanov : IP rule info (flowid) is really useful.
52 * Marc Boucher : routing by fwmark
53 * Robert Olsson : Added rt_cache statistics
54 * Arnaldo C. Melo : Convert proc stuff to seq_file
Eric Dumazetbb1d23b2005-07-05 15:00:32 -070055 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
Ilia Sotnikovcef26852006-03-25 01:38:55 -080056 * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect
57 * Ilia Sotnikov : Removed TOS from hash calculations
Linus Torvalds1da177e2005-04-16 15:20:36 -070058 *
59 * This program is free software; you can redistribute it and/or
60 * modify it under the terms of the GNU General Public License
61 * as published by the Free Software Foundation; either version
62 * 2 of the License, or (at your option) any later version.
63 */
64
Joe Perchesafd465032012-03-12 07:03:32 +000065#define pr_fmt(fmt) "IPv4: " fmt
66
Linus Torvalds1da177e2005-04-16 15:20:36 -070067#include <linux/module.h>
Linus Torvalds7c0f6ba2016-12-24 11:46:01 -080068#include <linux/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070069#include <linux/bitops.h>
70#include <linux/types.h>
71#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070072#include <linux/mm.h>
73#include <linux/string.h>
74#include <linux/socket.h>
75#include <linux/sockios.h>
76#include <linux/errno.h>
77#include <linux/in.h>
78#include <linux/inet.h>
79#include <linux/netdevice.h>
80#include <linux/proc_fs.h>
81#include <linux/init.h>
82#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070083#include <linux/inetdevice.h>
84#include <linux/igmp.h>
85#include <linux/pkt_sched.h>
86#include <linux/mroute.h>
87#include <linux/netfilter_ipv4.h>
88#include <linux/random.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070089#include <linux/rcupdate.h>
90#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090091#include <linux/slab.h>
Eric Dumazet73f156a2014-06-02 05:26:03 -070092#include <linux/jhash.h>
Herbert Xu352e5122007-11-13 21:34:06 -080093#include <net/dst.h>
Thomas Graf1b7179d2015-07-21 10:43:59 +020094#include <net/dst_metadata.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020095#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070096#include <net/protocol.h>
97#include <net/ip.h>
98#include <net/route.h>
99#include <net/inetpeer.h>
100#include <net/sock.h>
101#include <net/ip_fib.h>
102#include <net/arp.h>
103#include <net/tcp.h>
104#include <net/icmp.h>
105#include <net/xfrm.h>
Roopa Prabhu571e7222015-07-21 10:43:47 +0200106#include <net/lwtunnel.h>
Tom Tucker8d717402006-07-30 20:43:36 -0700107#include <net/netevent.h>
Thomas Graf63f34442007-03-22 11:55:17 -0700108#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109#ifdef CONFIG_SYSCTL
110#include <linux/sysctl.h>
Shan Wei7426a562012-04-18 18:05:46 +0000111#include <linux/kmemleak.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112#endif
David S. Miller6e5714e2011-08-03 20:50:44 -0700113#include <net/secure_seq.h>
Thomas Graf1b7179d2015-07-21 10:43:59 +0200114#include <net/ip_tunnels.h>
David Ahern385add92015-09-29 20:07:13 -0700115#include <net/l3mdev.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116
Roopa Prabhub6179812017-05-25 10:42:39 -0700117#include "fib_lookup.h"
118
David S. Miller68a5e3d2011-03-11 20:07:33 -0500119#define RT_FL_TOS(oldflp4) \
Julian Anastasovf61759e2011-12-02 11:39:42 +0000120 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700121
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122#define RT_GC_TIMEOUT (300*HZ)
123
Linus Torvalds1da177e2005-04-16 15:20:36 -0700124static int ip_rt_max_size;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700125static int ip_rt_redirect_number __read_mostly = 9;
126static int ip_rt_redirect_load __read_mostly = HZ / 50;
127static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
128static int ip_rt_error_cost __read_mostly = HZ;
129static int ip_rt_error_burst __read_mostly = 5 * HZ;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700130static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
Sabrina Dubrocac7272c22018-02-26 16:13:43 +0100131static u32 ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700132static int ip_rt_min_advmss __read_mostly = 256;
Eric Dumazet9f28a2f2011-12-21 15:47:16 -0500133
Xin Longdeed49d2016-02-18 21:21:19 +0800134static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
Sabrina Dubrocac7272c22018-02-26 16:13:43 +0100135
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136/*
137 * Interface to generic destination cache.
138 */
139
140static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -0800141static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
Steffen Klassertebb762f2011-11-23 02:12:51 +0000142static unsigned int ipv4_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
144static void ipv4_link_failure(struct sk_buff *skb);
David S. Miller6700c272012-07-17 03:29:28 -0700145static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
146 struct sk_buff *skb, u32 mtu);
147static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
148 struct sk_buff *skb);
David S. Millercaacf052012-07-31 15:06:50 -0700149static void ipv4_dst_destroy(struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150
David S. Miller62fa8a82011-01-26 20:51:05 -0800151static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
152{
David S. Miller31248732012-07-10 07:08:18 -0700153 WARN_ON(1);
154 return NULL;
David S. Miller62fa8a82011-01-26 20:51:05 -0800155}
156
David S. Millerf894cbf2012-07-02 21:52:24 -0700157static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
158 struct sk_buff *skb,
159 const void *daddr);
Julian Anastasov63fca652017-02-06 23:14:15 +0200160static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700161
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162static struct dst_ops ipv4_dst_ops = {
163 .family = AF_INET,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164 .check = ipv4_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800165 .default_advmss = ipv4_default_advmss,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000166 .mtu = ipv4_mtu,
David S. Miller62fa8a82011-01-26 20:51:05 -0800167 .cow_metrics = ipv4_cow_metrics,
David S. Millercaacf052012-07-31 15:06:50 -0700168 .destroy = ipv4_dst_destroy,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169 .negative_advice = ipv4_negative_advice,
170 .link_failure = ipv4_link_failure,
171 .update_pmtu = ip_rt_update_pmtu,
David S. Millere47a1852012-07-11 20:55:47 -0700172 .redirect = ip_do_redirect,
Eric W. Biedermanb92dacd2015-10-07 16:48:37 -0500173 .local_out = __ip_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700174 .neigh_lookup = ipv4_neigh_lookup,
Julian Anastasov63fca652017-02-06 23:14:15 +0200175 .confirm_neigh = ipv4_confirm_neigh,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176};
177
178#define ECN_OR_COST(class) TC_PRIO_##class
179
Philippe De Muyter4839c522007-07-09 15:32:57 -0700180const __u8 ip_tos2prio[16] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181 TC_PRIO_BESTEFFORT,
Dan Siemon4a2b9c32011-03-15 13:56:07 +0000182 ECN_OR_COST(BESTEFFORT),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183 TC_PRIO_BESTEFFORT,
184 ECN_OR_COST(BESTEFFORT),
185 TC_PRIO_BULK,
186 ECN_OR_COST(BULK),
187 TC_PRIO_BULK,
188 ECN_OR_COST(BULK),
189 TC_PRIO_INTERACTIVE,
190 ECN_OR_COST(INTERACTIVE),
191 TC_PRIO_INTERACTIVE,
192 ECN_OR_COST(INTERACTIVE),
193 TC_PRIO_INTERACTIVE_BULK,
194 ECN_OR_COST(INTERACTIVE_BULK),
195 TC_PRIO_INTERACTIVE_BULK,
196 ECN_OR_COST(INTERACTIVE_BULK)
197};
Amir Vadaid4a96862012-04-04 21:33:28 +0000198EXPORT_SYMBOL(ip_tos2prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199
Eric Dumazet2f970d82006-01-17 02:54:36 -0800200static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
Christoph Lameter3ed66e92014-04-07 15:39:40 -0700201#define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
205{
Eric Dumazet29e75252008-01-31 17:05:09 -0800206 if (*pos)
David S. Miller89aef892012-07-17 11:00:09 -0700207 return NULL;
Eric Dumazet29e75252008-01-31 17:05:09 -0800208 return SEQ_START_TOKEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209}
210
211static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
212{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213 ++*pos;
David S. Miller89aef892012-07-17 11:00:09 -0700214 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215}
216
217static void rt_cache_seq_stop(struct seq_file *seq, void *v)
218{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700219}
220
221static int rt_cache_seq_show(struct seq_file *seq, void *v)
222{
223 if (v == SEQ_START_TOKEN)
224 seq_printf(seq, "%-127s\n",
225 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
226 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
227 "HHUptod\tSpecDst");
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900228 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229}
230
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700231static const struct seq_operations rt_cache_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232 .start = rt_cache_seq_start,
233 .next = rt_cache_seq_next,
234 .stop = rt_cache_seq_stop,
235 .show = rt_cache_seq_show,
236};
237
238static int rt_cache_seq_open(struct inode *inode, struct file *file)
239{
David S. Miller89aef892012-07-17 11:00:09 -0700240 return seq_open(file, &rt_cache_seq_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241}
242
Arjan van de Ven9a321442007-02-12 00:55:35 -0800243static const struct file_operations rt_cache_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244 .open = rt_cache_seq_open,
245 .read = seq_read,
246 .llseek = seq_lseek,
David S. Miller89aef892012-07-17 11:00:09 -0700247 .release = seq_release,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248};
249
250
251static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
252{
253 int cpu;
254
255 if (*pos == 0)
256 return SEQ_START_TOKEN;
257
Rusty Russell0f23174a2008-12-29 12:23:42 +0000258 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 if (!cpu_possible(cpu))
260 continue;
261 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800262 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263 }
264 return NULL;
265}
266
267static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
268{
269 int cpu;
270
Rusty Russell0f23174a2008-12-29 12:23:42 +0000271 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272 if (!cpu_possible(cpu))
273 continue;
274 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800275 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700276 }
277 return NULL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900278
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279}
280
281static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
282{
283
284}
285
286static int rt_cpu_seq_show(struct seq_file *seq, void *v)
287{
288 struct rt_cache_stat *st = v;
289
290 if (v == SEQ_START_TOKEN) {
Olaf Rempel5bec0032005-04-28 12:16:08 -0700291 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292 return 0;
293 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900294
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
296 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
Eric Dumazetfc66f952010-10-08 06:37:34 +0000297 dst_entries_get_slow(&ipv4_dst_ops),
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700298 0, /* st->in_hit */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299 st->in_slow_tot,
300 st->in_slow_mc,
301 st->in_no_route,
302 st->in_brd,
303 st->in_martian_dst,
304 st->in_martian_src,
305
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700306 0, /* st->out_hit */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307 st->out_slow_tot,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900308 st->out_slow_mc,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700310 0, /* st->gc_total */
311 0, /* st->gc_ignored */
312 0, /* st->gc_goal_miss */
313 0, /* st->gc_dst_overflow */
314 0, /* st->in_hlist_search */
315 0 /* st->out_hlist_search */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316 );
317 return 0;
318}
319
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700320static const struct seq_operations rt_cpu_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321 .start = rt_cpu_seq_start,
322 .next = rt_cpu_seq_next,
323 .stop = rt_cpu_seq_stop,
324 .show = rt_cpu_seq_show,
325};
326
327
328static int rt_cpu_seq_open(struct inode *inode, struct file *file)
329{
330 return seq_open(file, &rt_cpu_seq_ops);
331}
332
Arjan van de Ven9a321442007-02-12 00:55:35 -0800333static const struct file_operations rt_cpu_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334 .open = rt_cpu_seq_open,
335 .read = seq_read,
336 .llseek = seq_lseek,
337 .release = seq_release,
338};
339
Patrick McHardyc7066f72011-01-14 13:36:42 +0100340#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800341static int rt_acct_proc_show(struct seq_file *m, void *v)
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800342{
Alexey Dobriyana661c412009-11-25 15:40:35 -0800343 struct ip_rt_acct *dst, *src;
344 unsigned int i, j;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800345
Alexey Dobriyana661c412009-11-25 15:40:35 -0800346 dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
347 if (!dst)
348 return -ENOMEM;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800349
Alexey Dobriyana661c412009-11-25 15:40:35 -0800350 for_each_possible_cpu(i) {
351 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
352 for (j = 0; j < 256; j++) {
353 dst[j].o_bytes += src[j].o_bytes;
354 dst[j].o_packets += src[j].o_packets;
355 dst[j].i_bytes += src[j].i_bytes;
356 dst[j].i_packets += src[j].i_packets;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800357 }
358 }
Alexey Dobriyana661c412009-11-25 15:40:35 -0800359
360 seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
361 kfree(dst);
362 return 0;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800363}
Alexey Dobriyana661c412009-11-25 15:40:35 -0800364
365static int rt_acct_proc_open(struct inode *inode, struct file *file)
366{
367 return single_open(file, rt_acct_proc_show, NULL);
368}
369
370static const struct file_operations rt_acct_proc_fops = {
Alexey Dobriyana661c412009-11-25 15:40:35 -0800371 .open = rt_acct_proc_open,
372 .read = seq_read,
373 .llseek = seq_lseek,
374 .release = single_release,
375};
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800376#endif
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800377
Denis V. Lunev73b38712008-02-28 20:51:18 -0800378static int __net_init ip_rt_do_proc_init(struct net *net)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800379{
380 struct proc_dir_entry *pde;
381
Joe Perchesd6444062018-03-23 15:54:38 -0700382 pde = proc_create("rt_cache", 0444, net->proc_net,
Gao fengd4beaa62013-02-18 01:34:54 +0000383 &rt_cache_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800384 if (!pde)
385 goto err1;
386
Joe Perchesd6444062018-03-23 15:54:38 -0700387 pde = proc_create("rt_cache", 0444,
Wang Chen77020722008-02-28 14:14:25 -0800388 net->proc_net_stat, &rt_cpu_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800389 if (!pde)
390 goto err2;
391
Patrick McHardyc7066f72011-01-14 13:36:42 +0100392#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800393 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800394 if (!pde)
395 goto err3;
396#endif
397 return 0;
398
Patrick McHardyc7066f72011-01-14 13:36:42 +0100399#ifdef CONFIG_IP_ROUTE_CLASSID
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800400err3:
401 remove_proc_entry("rt_cache", net->proc_net_stat);
402#endif
403err2:
404 remove_proc_entry("rt_cache", net->proc_net);
405err1:
406 return -ENOMEM;
407}
Denis V. Lunev73b38712008-02-28 20:51:18 -0800408
409static void __net_exit ip_rt_do_proc_exit(struct net *net)
410{
411 remove_proc_entry("rt_cache", net->proc_net_stat);
412 remove_proc_entry("rt_cache", net->proc_net);
Patrick McHardyc7066f72011-01-14 13:36:42 +0100413#ifdef CONFIG_IP_ROUTE_CLASSID
Denis V. Lunev73b38712008-02-28 20:51:18 -0800414 remove_proc_entry("rt_acct", net->proc_net);
Alexey Dobriyan0a931ac2010-01-17 03:32:50 +0000415#endif
Denis V. Lunev73b38712008-02-28 20:51:18 -0800416}
417
418static struct pernet_operations ip_rt_proc_ops __net_initdata = {
419 .init = ip_rt_do_proc_init,
420 .exit = ip_rt_do_proc_exit,
421};
422
423static int __init ip_rt_proc_init(void)
424{
425 return register_pernet_subsys(&ip_rt_proc_ops);
426}
427
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800428#else
Denis V. Lunev73b38712008-02-28 20:51:18 -0800429static inline int ip_rt_proc_init(void)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800430{
431 return 0;
432}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433#endif /* CONFIG_PROC_FS */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900434
Eric Dumazet4331deb2012-07-25 05:11:23 +0000435static inline bool rt_is_expired(const struct rtable *rth)
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700436{
fan.duca4c3fc2013-07-30 08:33:53 +0800437 return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev));
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700438}
439
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +0000440void rt_cache_flush(struct net *net)
Eric Dumazet29e75252008-01-31 17:05:09 -0800441{
fan.duca4c3fc2013-07-30 08:33:53 +0800442 rt_genid_bump_ipv4(net);
Eric Dumazet98376382010-03-08 03:20:00 +0000443}
444
David S. Millerf894cbf2012-07-02 21:52:24 -0700445static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
446 struct sk_buff *skb,
447 const void *daddr)
David Miller3769cff2011-07-11 22:44:24 +0000448{
David S. Millerd3aaeb32011-07-18 00:40:17 -0700449 struct net_device *dev = dst->dev;
450 const __be32 *pkey = daddr;
David S. Miller39232972012-01-26 15:22:32 -0500451 const struct rtable *rt;
David Miller3769cff2011-07-11 22:44:24 +0000452 struct neighbour *n;
453
David S. Miller39232972012-01-26 15:22:32 -0500454 rt = (const struct rtable *) dst;
David S. Millera263b302012-07-02 02:02:15 -0700455 if (rt->rt_gateway)
David S. Miller39232972012-01-26 15:22:32 -0500456 pkey = (const __be32 *) &rt->rt_gateway;
David S. Millerf894cbf2012-07-02 21:52:24 -0700457 else if (skb)
458 pkey = &ip_hdr(skb)->daddr;
David S. Millerd3aaeb32011-07-18 00:40:17 -0700459
David S. Miller80703d22012-02-15 17:48:35 -0500460 n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700461 if (n)
462 return n;
David Miller32092ec2011-07-25 00:01:41 +0000463 return neigh_create(&arp_tbl, pkey, dev);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700464}
465
Julian Anastasov63fca652017-02-06 23:14:15 +0200466static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)
467{
468 struct net_device *dev = dst->dev;
469 const __be32 *pkey = daddr;
470 const struct rtable *rt;
471
472 rt = (const struct rtable *)dst;
473 if (rt->rt_gateway)
474 pkey = (const __be32 *)&rt->rt_gateway;
475 else if (!daddr ||
476 (rt->rt_flags &
477 (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL)))
478 return;
479
480 __ipv4_confirm_neigh(dev, *(__force u32 *)pkey);
481}
482
Eric Dumazet04ca6972014-07-26 08:58:10 +0200483#define IP_IDENTS_SZ 2048u
Eric Dumazet04ca6972014-07-26 08:58:10 +0200484
Eric Dumazet355b590c2015-05-01 10:37:49 -0700485static atomic_t *ip_idents __read_mostly;
486static u32 *ip_tstamps __read_mostly;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200487
488/* In order to protect privacy, we add a perturbation to identifiers
489 * if one generator is seldom used. This makes hard for an attacker
490 * to infer how many packets were sent between two points in time.
491 */
492u32 ip_idents_reserve(u32 hash, int segs)
493{
Eric Dumazet355b590c2015-05-01 10:37:49 -0700494 u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
495 atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
Mark Rutland6aa7de02017-10-23 14:07:29 -0700496 u32 old = READ_ONCE(*p_tstamp);
Eric Dumazet04ca6972014-07-26 08:58:10 +0200497 u32 now = (u32)jiffies;
Eric Dumazetadb03112016-09-20 18:06:17 -0700498 u32 new, delta = 0;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200499
Eric Dumazet355b590c2015-05-01 10:37:49 -0700500 if (old != now && cmpxchg(p_tstamp, old, now) == old)
Eric Dumazet04ca6972014-07-26 08:58:10 +0200501 delta = prandom_u32_max(now - old);
502
Eric Dumazetadb03112016-09-20 18:06:17 -0700503 /* Do not use atomic_add_return() as it makes UBSAN unhappy */
504 do {
505 old = (u32)atomic_read(p_id);
506 new = old + delta + segs;
507 } while (atomic_cmpxchg(p_id, old, new) != old);
508
509 return new - segs;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200510}
511EXPORT_SYMBOL(ip_idents_reserve);
Eric Dumazet73f156a2014-06-02 05:26:03 -0700512
Hannes Frederic Sowab6a77192015-03-25 17:07:44 +0100513void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700514{
Eric Dumazet73f156a2014-06-02 05:26:03 -0700515 static u32 ip_idents_hashrnd __read_mostly;
516 u32 hash, id;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517
Eric Dumazet73f156a2014-06-02 05:26:03 -0700518 net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519
Eric Dumazet04ca6972014-07-26 08:58:10 +0200520 hash = jhash_3words((__force u32)iph->daddr,
521 (__force u32)iph->saddr,
Hannes Frederic Sowab6a77192015-03-25 17:07:44 +0100522 iph->protocol ^ net_hash_mix(net),
Eric Dumazet04ca6972014-07-26 08:58:10 +0200523 ip_idents_hashrnd);
Eric Dumazet73f156a2014-06-02 05:26:03 -0700524 id = ip_idents_reserve(hash, segs);
525 iph->id = htons(id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700526}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000527EXPORT_SYMBOL(__ip_select_ident);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900529static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
530 const struct sock *sk,
David S. Miller4895c772012-07-17 04:19:00 -0700531 const struct iphdr *iph,
532 int oif, u8 tos,
533 u8 prot, u32 mark, int flow_flags)
534{
535 if (sk) {
536 const struct inet_sock *inet = inet_sk(sk);
537
538 oif = sk->sk_bound_dev_if;
539 mark = sk->sk_mark;
540 tos = RT_CONN_FLAGS(sk);
541 prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
542 }
543 flowi4_init_output(fl4, oif, mark, tos,
544 RT_SCOPE_UNIVERSE, prot,
545 flow_flags,
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900546 iph->daddr, iph->saddr, 0, 0,
547 sock_net_uid(net, sk));
David S. Miller4895c772012-07-17 04:19:00 -0700548}
549
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200550static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
551 const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700552{
Lorenzo Colittid109e612016-11-30 02:56:47 +0900553 const struct net *net = dev_net(skb->dev);
David S. Miller4895c772012-07-17 04:19:00 -0700554 const struct iphdr *iph = ip_hdr(skb);
555 int oif = skb->dev->ifindex;
556 u8 tos = RT_TOS(iph->tos);
557 u8 prot = iph->protocol;
558 u32 mark = skb->mark;
559
Lorenzo Colittid109e612016-11-30 02:56:47 +0900560 __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0);
David S. Miller4895c772012-07-17 04:19:00 -0700561}
562
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200563static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700564{
565 const struct inet_sock *inet = inet_sk(sk);
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200566 const struct ip_options_rcu *inet_opt;
David S. Miller4895c772012-07-17 04:19:00 -0700567 __be32 daddr = inet->inet_daddr;
568
569 rcu_read_lock();
570 inet_opt = rcu_dereference(inet->inet_opt);
571 if (inet_opt && inet_opt->opt.srr)
572 daddr = inet_opt->opt.faddr;
573 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
574 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
575 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
576 inet_sk_flowi_flags(sk),
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900577 daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
David S. Miller4895c772012-07-17 04:19:00 -0700578 rcu_read_unlock();
579}
580
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200581static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
582 const struct sk_buff *skb)
David S. Miller4895c772012-07-17 04:19:00 -0700583{
584 if (skb)
585 build_skb_flow_key(fl4, skb, sk);
586 else
587 build_sk_flow_key(fl4, sk);
588}
589
David S. Millerc5038a82012-07-31 15:02:02 -0700590static DEFINE_SPINLOCK(fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700591
Timo Teräs2ffae992013-06-27 10:27:05 +0300592static void fnhe_flush_routes(struct fib_nh_exception *fnhe)
593{
594 struct rtable *rt;
595
596 rt = rcu_dereference(fnhe->fnhe_rth_input);
597 if (rt) {
598 RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL);
Wei Wang95c47f92017-06-17 10:42:30 -0700599 dst_dev_put(&rt->dst);
Wei Wang08301062017-06-17 10:42:29 -0700600 dst_release(&rt->dst);
Timo Teräs2ffae992013-06-27 10:27:05 +0300601 }
602 rt = rcu_dereference(fnhe->fnhe_rth_output);
603 if (rt) {
604 RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL);
Wei Wang95c47f92017-06-17 10:42:30 -0700605 dst_dev_put(&rt->dst);
Wei Wang08301062017-06-17 10:42:29 -0700606 dst_release(&rt->dst);
Timo Teräs2ffae992013-06-27 10:27:05 +0300607 }
608}
609
Julian Anastasovaee06da2012-07-18 10:15:35 +0000610static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
David S. Miller4895c772012-07-17 04:19:00 -0700611{
612 struct fib_nh_exception *fnhe, *oldest;
613
614 oldest = rcu_dereference(hash->chain);
615 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
616 fnhe = rcu_dereference(fnhe->fnhe_next)) {
617 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
618 oldest = fnhe;
619 }
Timo Teräs2ffae992013-06-27 10:27:05 +0300620 fnhe_flush_routes(oldest);
David S. Miller4895c772012-07-17 04:19:00 -0700621 return oldest;
622}
623
David S. Millerd3a25c92012-07-17 13:23:08 -0700624static inline u32 fnhe_hashfun(__be32 daddr)
625{
Eric Dumazetd546c622014-09-04 08:21:31 -0700626 static u32 fnhe_hashrnd __read_mostly;
David S. Millerd3a25c92012-07-17 13:23:08 -0700627 u32 hval;
628
Eric Dumazetd546c622014-09-04 08:21:31 -0700629 net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd));
630 hval = jhash_1word((__force u32) daddr, fnhe_hashrnd);
631 return hash_32(hval, FNHE_HASH_SHIFT);
David S. Millerd3a25c92012-07-17 13:23:08 -0700632}
633
Timo Teräs387aa652013-05-27 20:46:31 +0000634static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
635{
636 rt->rt_pmtu = fnhe->fnhe_pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100637 rt->rt_mtu_locked = fnhe->fnhe_mtu_locked;
Timo Teräs387aa652013-05-27 20:46:31 +0000638 rt->dst.expires = fnhe->fnhe_expires;
639
640 if (fnhe->fnhe_gw) {
641 rt->rt_flags |= RTCF_REDIRECTED;
642 rt->rt_gateway = fnhe->fnhe_gw;
643 rt->rt_uses_gateway = 1;
644 }
645}
646
Julian Anastasovaee06da2012-07-18 10:15:35 +0000647static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100648 u32 pmtu, bool lock, unsigned long expires)
David S. Miller4895c772012-07-17 04:19:00 -0700649{
Julian Anastasovaee06da2012-07-18 10:15:35 +0000650 struct fnhe_hash_bucket *hash;
David S. Miller4895c772012-07-17 04:19:00 -0700651 struct fib_nh_exception *fnhe;
Timo Teräs387aa652013-05-27 20:46:31 +0000652 struct rtable *rt;
Xin Longcebe84c2017-11-17 14:27:18 +0800653 u32 genid, hval;
Timo Teräs387aa652013-05-27 20:46:31 +0000654 unsigned int i;
David S. Miller4895c772012-07-17 04:19:00 -0700655 int depth;
Xin Longcebe84c2017-11-17 14:27:18 +0800656
657 genid = fnhe_genid(dev_net(nh->nh_dev));
658 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -0700659
David S. Millerc5038a82012-07-31 15:02:02 -0700660 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000661
Eric Dumazetcaa41522014-09-03 22:21:56 -0700662 hash = rcu_dereference(nh->nh_exceptions);
David S. Miller4895c772012-07-17 04:19:00 -0700663 if (!hash) {
Julian Anastasovaee06da2012-07-18 10:15:35 +0000664 hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC);
David S. Miller4895c772012-07-17 04:19:00 -0700665 if (!hash)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000666 goto out_unlock;
Eric Dumazetcaa41522014-09-03 22:21:56 -0700667 rcu_assign_pointer(nh->nh_exceptions, hash);
David S. Miller4895c772012-07-17 04:19:00 -0700668 }
669
David S. Miller4895c772012-07-17 04:19:00 -0700670 hash += hval;
671
672 depth = 0;
673 for (fnhe = rcu_dereference(hash->chain); fnhe;
674 fnhe = rcu_dereference(fnhe->fnhe_next)) {
675 if (fnhe->fnhe_daddr == daddr)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000676 break;
David S. Miller4895c772012-07-17 04:19:00 -0700677 depth++;
678 }
679
Julian Anastasovaee06da2012-07-18 10:15:35 +0000680 if (fnhe) {
Xin Longcebe84c2017-11-17 14:27:18 +0800681 if (fnhe->fnhe_genid != genid)
682 fnhe->fnhe_genid = genid;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000683 if (gw)
684 fnhe->fnhe_gw = gw;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100685 if (pmtu) {
Julian Anastasovaee06da2012-07-18 10:15:35 +0000686 fnhe->fnhe_pmtu = pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100687 fnhe->fnhe_mtu_locked = lock;
688 }
Xin Longe39d5242017-11-17 14:27:06 +0800689 fnhe->fnhe_expires = max(1UL, expires);
Timo Teräs387aa652013-05-27 20:46:31 +0000690 /* Update all cached dsts too */
Timo Teräs2ffae992013-06-27 10:27:05 +0300691 rt = rcu_dereference(fnhe->fnhe_rth_input);
692 if (rt)
693 fill_route_from_fnhe(rt, fnhe);
694 rt = rcu_dereference(fnhe->fnhe_rth_output);
Timo Teräs387aa652013-05-27 20:46:31 +0000695 if (rt)
696 fill_route_from_fnhe(rt, fnhe);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000697 } else {
698 if (depth > FNHE_RECLAIM_DEPTH)
699 fnhe = fnhe_oldest(hash);
700 else {
701 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
702 if (!fnhe)
703 goto out_unlock;
704
705 fnhe->fnhe_next = hash->chain;
706 rcu_assign_pointer(hash->chain, fnhe);
707 }
Xin Longcebe84c2017-11-17 14:27:18 +0800708 fnhe->fnhe_genid = genid;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000709 fnhe->fnhe_daddr = daddr;
710 fnhe->fnhe_gw = gw;
711 fnhe->fnhe_pmtu = pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100712 fnhe->fnhe_mtu_locked = lock;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000713 fnhe->fnhe_expires = expires;
Timo Teräs387aa652013-05-27 20:46:31 +0000714
715 /* Exception created; mark the cached routes for the nexthop
716 * stale, so anyone caching it rechecks if this exception
717 * applies to them.
718 */
Timo Teräs2ffae992013-06-27 10:27:05 +0300719 rt = rcu_dereference(nh->nh_rth_input);
720 if (rt)
721 rt->dst.obsolete = DST_OBSOLETE_KILL;
722
Timo Teräs387aa652013-05-27 20:46:31 +0000723 for_each_possible_cpu(i) {
724 struct rtable __rcu **prt;
725 prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i);
726 rt = rcu_dereference(*prt);
727 if (rt)
728 rt->dst.obsolete = DST_OBSOLETE_KILL;
729 }
David S. Miller4895c772012-07-17 04:19:00 -0700730 }
David S. Miller4895c772012-07-17 04:19:00 -0700731
David S. Miller4895c772012-07-17 04:19:00 -0700732 fnhe->fnhe_stamp = jiffies;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000733
734out_unlock:
David S. Millerc5038a82012-07-31 15:02:02 -0700735 spin_unlock_bh(&fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700736}
737
David S. Millerceb33202012-07-17 11:31:28 -0700738static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
739 bool kill_route)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740{
David S. Millere47a1852012-07-11 20:55:47 -0700741 __be32 new_gw = icmp_hdr(skb)->un.gateway;
David S. Miller94206122012-07-11 20:38:08 -0700742 __be32 old_gw = ip_hdr(skb)->saddr;
David S. Millere47a1852012-07-11 20:55:47 -0700743 struct net_device *dev = skb->dev;
David S. Millere47a1852012-07-11 20:55:47 -0700744 struct in_device *in_dev;
David S. Miller4895c772012-07-17 04:19:00 -0700745 struct fib_result res;
David S. Millere47a1852012-07-11 20:55:47 -0700746 struct neighbour *n;
Denis V. Lunev317805b2008-02-28 20:50:06 -0800747 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700748
David S. Miller94206122012-07-11 20:38:08 -0700749 switch (icmp_hdr(skb)->code & 7) {
750 case ICMP_REDIR_NET:
751 case ICMP_REDIR_NETTOS:
752 case ICMP_REDIR_HOST:
753 case ICMP_REDIR_HOSTTOS:
754 break;
755
756 default:
757 return;
758 }
759
David S. Millere47a1852012-07-11 20:55:47 -0700760 if (rt->rt_gateway != old_gw)
761 return;
762
763 in_dev = __in_dev_get_rcu(dev);
764 if (!in_dev)
765 return;
766
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900767 net = dev_net(dev);
Joe Perches9d4fb272009-11-23 10:41:23 -0800768 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
769 ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
770 ipv4_is_zeronet(new_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700771 goto reject_redirect;
772
773 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
774 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
775 goto reject_redirect;
776 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
777 goto reject_redirect;
778 } else {
Denis V. Lunev317805b2008-02-28 20:50:06 -0800779 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780 goto reject_redirect;
781 }
782
Stephen Suryaputra Lin969447f2016-11-10 11:16:15 -0500783 n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
784 if (!n)
785 n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
WANG Cong2c1a4312014-09-24 17:07:53 -0700786 if (!IS_ERR(n)) {
David S. Millere47a1852012-07-11 20:55:47 -0700787 if (!(n->nud_state & NUD_VALID)) {
788 neigh_event_send(n, NULL);
789 } else {
Andy Gospodarek0eeb0752015-06-23 13:45:37 -0400790 if (fib_lookup(net, fl4, &res, 0) == 0) {
David S. Miller4895c772012-07-17 04:19:00 -0700791 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -0700792
Julian Anastasovaee06da2012-07-18 10:15:35 +0000793 update_or_create_fnhe(nh, fl4->daddr, new_gw,
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +0100794 0, false,
795 jiffies + ip_rt_gc_timeout);
David S. Miller4895c772012-07-17 04:19:00 -0700796 }
David S. Millerceb33202012-07-17 11:31:28 -0700797 if (kill_route)
798 rt->dst.obsolete = DST_OBSOLETE_KILL;
David S. Millere47a1852012-07-11 20:55:47 -0700799 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
800 }
801 neigh_release(n);
802 }
803 return;
804
805reject_redirect:
806#ifdef CONFIG_IP_ROUTE_VERBOSE
David S. Miller99ee0382012-07-12 07:40:05 -0700807 if (IN_DEV_LOG_MARTIANS(in_dev)) {
808 const struct iphdr *iph = (const struct iphdr *) skb->data;
809 __be32 daddr = iph->daddr;
810 __be32 saddr = iph->saddr;
811
David S. Millere47a1852012-07-11 20:55:47 -0700812 net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
813 " Advised path = %pI4 -> %pI4\n",
814 &old_gw, dev->name, &new_gw,
815 &saddr, &daddr);
David S. Miller99ee0382012-07-12 07:40:05 -0700816 }
David S. Millere47a1852012-07-11 20:55:47 -0700817#endif
818 ;
819}
820
David S. Miller4895c772012-07-17 04:19:00 -0700821static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
822{
823 struct rtable *rt;
824 struct flowi4 fl4;
Michal Kubecekf96ef982013-05-28 08:26:49 +0200825 const struct iphdr *iph = (const struct iphdr *) skb->data;
Lorenzo Colitti7d995692016-12-23 00:33:57 +0900826 struct net *net = dev_net(skb->dev);
Michal Kubecekf96ef982013-05-28 08:26:49 +0200827 int oif = skb->dev->ifindex;
828 u8 tos = RT_TOS(iph->tos);
829 u8 prot = iph->protocol;
830 u32 mark = skb->mark;
David S. Miller4895c772012-07-17 04:19:00 -0700831
832 rt = (struct rtable *) dst;
833
Lorenzo Colitti7d995692016-12-23 00:33:57 +0900834 __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
David S. Millerceb33202012-07-17 11:31:28 -0700835 __ip_do_redirect(rt, skb, &fl4, true);
David S. Miller4895c772012-07-17 04:19:00 -0700836}
837
Linus Torvalds1da177e2005-04-16 15:20:36 -0700838static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
839{
Eric Dumazetee6b9672008-03-05 18:30:47 -0800840 struct rtable *rt = (struct rtable *)dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700841 struct dst_entry *ret = dst;
842
843 if (rt) {
Timo Teräsd11a4dc2010-03-18 23:20:20 +0000844 if (dst->obsolete > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700845 ip_rt_put(rt);
846 ret = NULL;
David S. Miller59436342012-07-10 06:58:42 -0700847 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
848 rt->dst.expires) {
David S. Miller89aef892012-07-17 11:00:09 -0700849 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850 ret = NULL;
851 }
852 }
853 return ret;
854}
855
856/*
857 * Algorithm:
858 * 1. The first ip_rt_redirect_number redirects are sent
859 * with exponential backoff, then we stop sending them at all,
860 * assuming that the host ignores our redirects.
861 * 2. If we did not see packets requiring redirects
862 * during ip_rt_redirect_silence, we assume that the host
863 * forgot redirected route and start to send redirects again.
864 *
865 * This algorithm is much cheaper and more intelligent than dumb load limiting
866 * in icmp.c.
867 *
868 * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
869 * and "frag. need" (breaks PMTU discovery) in icmp.c.
870 */
871
872void ip_rt_send_redirect(struct sk_buff *skb)
873{
Eric Dumazet511c3f92009-06-02 05:14:27 +0000874 struct rtable *rt = skb_rtable(skb);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700875 struct in_device *in_dev;
David S. Miller92d86822011-02-04 15:55:25 -0800876 struct inet_peer *peer;
David S. Miller1d861aa2012-07-10 03:58:16 -0700877 struct net *net;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700878 int log_martians;
David Ahern192132b2015-08-27 16:07:03 -0700879 int vif;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700880
Eric Dumazet30038fc2009-08-28 23:52:01 -0700881 rcu_read_lock();
Changli Gaod8d1f302010-06-10 23:31:35 -0700882 in_dev = __in_dev_get_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700883 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
884 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700885 return;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700886 }
887 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
David Ahern385add92015-09-29 20:07:13 -0700888 vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700889 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700890
David S. Miller1d861aa2012-07-10 03:58:16 -0700891 net = dev_net(rt->dst.dev);
David Ahern192132b2015-08-27 16:07:03 -0700892 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800893 if (!peer) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000894 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
895 rt_nexthop(rt, ip_hdr(skb)->daddr));
David S. Miller92d86822011-02-04 15:55:25 -0800896 return;
897 }
898
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899 /* No redirected packets during ip_rt_redirect_silence;
900 * reset the algorithm.
901 */
David S. Miller92d86822011-02-04 15:55:25 -0800902 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
903 peer->rate_tokens = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904
905 /* Too many ignored redirects; do not send anything
Changli Gaod8d1f302010-06-10 23:31:35 -0700906 * set dst.rate_last to the last seen redirected packet.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907 */
David S. Miller92d86822011-02-04 15:55:25 -0800908 if (peer->rate_tokens >= ip_rt_redirect_number) {
909 peer->rate_last = jiffies;
David S. Miller1d861aa2012-07-10 03:58:16 -0700910 goto out_put_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911 }
912
913 /* Check for load limit; set rate_last to the latest sent
914 * redirect.
915 */
David S. Miller92d86822011-02-04 15:55:25 -0800916 if (peer->rate_tokens == 0 ||
Li Yewang14fb8a72006-12-18 00:26:35 -0800917 time_after(jiffies,
David S. Miller92d86822011-02-04 15:55:25 -0800918 (peer->rate_last +
919 (ip_rt_redirect_load << peer->rate_tokens)))) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000920 __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
921
922 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
David S. Miller92d86822011-02-04 15:55:25 -0800923 peer->rate_last = jiffies;
924 ++peer->rate_tokens;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925#ifdef CONFIG_IP_ROUTE_VERBOSE
Eric Dumazet30038fc2009-08-28 23:52:01 -0700926 if (log_martians &&
Joe Perchese87cc472012-05-13 21:56:26 +0000927 peer->rate_tokens == ip_rt_redirect_number)
928 net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
David S. Miller92101b32012-07-23 16:29:00 -0700929 &ip_hdr(skb)->saddr, inet_iif(skb),
Julian Anastasove81da0e2012-10-08 11:41:15 +0000930 &ip_hdr(skb)->daddr, &gw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700931#endif
932 }
David S. Miller1d861aa2012-07-10 03:58:16 -0700933out_put_peer:
934 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935}
936
937static int ip_error(struct sk_buff *skb)
938{
Eric Dumazet511c3f92009-06-02 05:14:27 +0000939 struct rtable *rt = skb_rtable(skb);
Stephen Suryaputrae2c0dc12018-02-28 12:20:44 -0500940 struct net_device *dev = skb->dev;
941 struct in_device *in_dev;
David S. Miller92d86822011-02-04 15:55:25 -0800942 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943 unsigned long now;
David S. Miller251da412012-06-26 16:27:09 -0700944 struct net *net;
David S. Miller92d86822011-02-04 15:55:25 -0800945 bool send;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946 int code;
947
Stephen Suryaputrae2c0dc12018-02-28 12:20:44 -0500948 if (netif_is_l3_master(skb->dev)) {
949 dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif);
950 if (!dev)
951 goto out;
952 }
953
954 in_dev = __in_dev_get_rcu(dev);
955
Eric W. Biederman381c7592015-05-22 04:58:12 -0500956 /* IP on this device is disabled. */
957 if (!in_dev)
958 goto out;
959
David S. Miller251da412012-06-26 16:27:09 -0700960 net = dev_net(rt->dst.dev);
961 if (!IN_DEV_FORWARD(in_dev)) {
962 switch (rt->dst.error) {
963 case EHOSTUNREACH:
Eric Dumazetb45386e2016-04-27 16:44:35 -0700964 __IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS);
David S. Miller251da412012-06-26 16:27:09 -0700965 break;
966
967 case ENETUNREACH:
Eric Dumazetb45386e2016-04-27 16:44:35 -0700968 __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
David S. Miller251da412012-06-26 16:27:09 -0700969 break;
970 }
971 goto out;
972 }
973
Changli Gaod8d1f302010-06-10 23:31:35 -0700974 switch (rt->dst.error) {
Joe Perches4500ebf2011-07-01 09:43:07 +0000975 case EINVAL:
976 default:
977 goto out;
978 case EHOSTUNREACH:
979 code = ICMP_HOST_UNREACH;
980 break;
981 case ENETUNREACH:
982 code = ICMP_NET_UNREACH;
Eric Dumazetb45386e2016-04-27 16:44:35 -0700983 __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
Joe Perches4500ebf2011-07-01 09:43:07 +0000984 break;
985 case EACCES:
986 code = ICMP_PKT_FILTERED;
987 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700988 }
989
David Ahern192132b2015-08-27 16:07:03 -0700990 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
David Ahern385add92015-09-29 20:07:13 -0700991 l3mdev_master_ifindex(skb->dev), 1);
David S. Miller92d86822011-02-04 15:55:25 -0800992
993 send = true;
994 if (peer) {
995 now = jiffies;
996 peer->rate_tokens += now - peer->rate_last;
997 if (peer->rate_tokens > ip_rt_error_burst)
998 peer->rate_tokens = ip_rt_error_burst;
999 peer->rate_last = now;
1000 if (peer->rate_tokens >= ip_rt_error_cost)
1001 peer->rate_tokens -= ip_rt_error_cost;
1002 else
1003 send = false;
David S. Miller1d861aa2012-07-10 03:58:16 -07001004 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005 }
David S. Miller92d86822011-02-04 15:55:25 -08001006 if (send)
1007 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001008
1009out: kfree_skb(skb);
1010 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001011}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012
Steffen Klassertd851c122012-10-07 22:47:25 +00001013static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014{
Steffen Klassertd851c122012-10-07 22:47:25 +00001015 struct dst_entry *dst = &rt->dst;
David S. Miller4895c772012-07-17 04:19:00 -07001016 struct fib_result res;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001017 bool lock = false;
David S. Miller2c8cec52011-02-09 20:42:07 -08001018
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001019 if (ip_mtu_locked(dst))
Steffen Klassertfa1e4922013-01-16 20:58:10 +00001020 return;
1021
Herbert Xucb6ccf02015-04-28 11:43:15 +08001022 if (ipv4_mtu(dst) < mtu)
Li Wei3cdaa5b2015-01-29 16:09:03 +08001023 return;
1024
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001025 if (mtu < ip_rt_min_pmtu) {
1026 lock = true;
David S. Miller59436342012-07-10 06:58:42 -07001027 mtu = ip_rt_min_pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001028 }
Eric Dumazetfe6fe792011-06-08 06:07:07 +00001029
Timo Teräsf0162292013-05-27 20:46:32 +00001030 if (rt->rt_pmtu == mtu &&
1031 time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
1032 return;
1033
Eric Dumazetc5ae7d42012-08-28 12:33:07 +00001034 rcu_read_lock();
Andy Gospodarek0eeb0752015-06-23 13:45:37 -04001035 if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
David S. Miller4895c772012-07-17 04:19:00 -07001036 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -07001037
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001038 update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock,
Julian Anastasovaee06da2012-07-18 10:15:35 +00001039 jiffies + ip_rt_mtu_expires);
David S. Miller4895c772012-07-17 04:19:00 -07001040 }
Eric Dumazetc5ae7d42012-08-28 12:33:07 +00001041 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001042}
1043
David S. Miller4895c772012-07-17 04:19:00 -07001044static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1045 struct sk_buff *skb, u32 mtu)
1046{
1047 struct rtable *rt = (struct rtable *) dst;
1048 struct flowi4 fl4;
1049
1050 ip_rt_build_flow_key(&fl4, sk, skb);
Steffen Klassertd851c122012-10-07 22:47:25 +00001051 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller4895c772012-07-17 04:19:00 -07001052}
1053
David S. Miller36393392012-06-14 22:21:46 -07001054void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
1055 int oif, u32 mark, u8 protocol, int flow_flags)
1056{
David S. Miller4895c772012-07-17 04:19:00 -07001057 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Miller36393392012-06-14 22:21:46 -07001058 struct flowi4 fl4;
1059 struct rtable *rt;
1060
Lorenzo Colitti1b3c61d2014-05-13 10:17:34 -07001061 if (!mark)
1062 mark = IP4_REPLY_MARK(net, skb->mark);
1063
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001064 __build_flow_key(net, &fl4, NULL, iph, oif,
David S. Miller4895c772012-07-17 04:19:00 -07001065 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Miller36393392012-06-14 22:21:46 -07001066 rt = __ip_route_output_key(net, &fl4);
1067 if (!IS_ERR(rt)) {
David S. Miller4895c772012-07-17 04:19:00 -07001068 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller36393392012-06-14 22:21:46 -07001069 ip_rt_put(rt);
1070 }
1071}
1072EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
1073
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001074static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
David S. Miller36393392012-06-14 22:21:46 -07001075{
David S. Miller4895c772012-07-17 04:19:00 -07001076 const struct iphdr *iph = (const struct iphdr *) skb->data;
1077 struct flowi4 fl4;
1078 struct rtable *rt;
David S. Miller36393392012-06-14 22:21:46 -07001079
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001080 __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0);
Lorenzo Colitti1b3c61d2014-05-13 10:17:34 -07001081
1082 if (!fl4.flowi4_mark)
1083 fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
1084
David S. Miller4895c772012-07-17 04:19:00 -07001085 rt = __ip_route_output_key(sock_net(sk), &fl4);
1086 if (!IS_ERR(rt)) {
1087 __ip_rt_update_pmtu(rt, &fl4, mtu);
1088 ip_rt_put(rt);
1089 }
David S. Miller36393392012-06-14 22:21:46 -07001090}
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001091
1092void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1093{
1094 const struct iphdr *iph = (const struct iphdr *) skb->data;
1095 struct flowi4 fl4;
1096 struct rtable *rt;
Eric Dumazet7f502362014-06-30 01:26:23 -07001097 struct dst_entry *odst = NULL;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001098 bool new = false;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001099 struct net *net = sock_net(sk);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001100
1101 bh_lock_sock(sk);
Hannes Frederic Sowa482fc602013-11-05 02:24:17 +01001102
1103 if (!ip_sk_accept_pmtu(sk))
1104 goto out;
1105
Eric Dumazet7f502362014-06-30 01:26:23 -07001106 odst = sk_dst_get(sk);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001107
Eric Dumazet7f502362014-06-30 01:26:23 -07001108 if (sock_owned_by_user(sk) || !odst) {
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001109 __ipv4_sk_update_pmtu(skb, sk, mtu);
1110 goto out;
1111 }
1112
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001113 __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001114
Eric Dumazet7f502362014-06-30 01:26:23 -07001115 rt = (struct rtable *)odst;
Ian Morris51456b22015-04-03 09:17:26 +01001116 if (odst->obsolete && !odst->ops->check(odst, 0)) {
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001117 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1118 if (IS_ERR(rt))
1119 goto out;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001120
1121 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001122 }
1123
David Miller0f6c4802017-11-28 15:40:46 -05001124 __ip_rt_update_pmtu((struct rtable *) xfrm_dst_path(&rt->dst), &fl4, mtu);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001125
Eric Dumazet7f502362014-06-30 01:26:23 -07001126 if (!dst_check(&rt->dst, 0)) {
Steffen Klassertb44108d2013-01-22 00:01:28 +00001127 if (new)
1128 dst_release(&rt->dst);
1129
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001130 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1131 if (IS_ERR(rt))
1132 goto out;
1133
Steffen Klassertb44108d2013-01-22 00:01:28 +00001134 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001135 }
1136
Steffen Klassertb44108d2013-01-22 00:01:28 +00001137 if (new)
Eric Dumazet7f502362014-06-30 01:26:23 -07001138 sk_dst_set(sk, &rt->dst);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001139
1140out:
1141 bh_unlock_sock(sk);
Eric Dumazet7f502362014-06-30 01:26:23 -07001142 dst_release(odst);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001143}
David S. Miller36393392012-06-14 22:21:46 -07001144EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
David S. Millerf39925d2011-02-09 22:00:16 -08001145
David S. Millerb42597e2012-07-11 21:25:45 -07001146void ipv4_redirect(struct sk_buff *skb, struct net *net,
1147 int oif, u32 mark, u8 protocol, int flow_flags)
1148{
David S. Miller4895c772012-07-17 04:19:00 -07001149 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Millerb42597e2012-07-11 21:25:45 -07001150 struct flowi4 fl4;
1151 struct rtable *rt;
1152
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001153 __build_flow_key(net, &fl4, NULL, iph, oif,
David S. Miller4895c772012-07-17 04:19:00 -07001154 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Millerb42597e2012-07-11 21:25:45 -07001155 rt = __ip_route_output_key(net, &fl4);
1156 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001157 __ip_do_redirect(rt, skb, &fl4, false);
David S. Millerb42597e2012-07-11 21:25:45 -07001158 ip_rt_put(rt);
1159 }
1160}
1161EXPORT_SYMBOL_GPL(ipv4_redirect);
1162
1163void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
1164{
David S. Miller4895c772012-07-17 04:19:00 -07001165 const struct iphdr *iph = (const struct iphdr *) skb->data;
1166 struct flowi4 fl4;
1167 struct rtable *rt;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001168 struct net *net = sock_net(sk);
David S. Millerb42597e2012-07-11 21:25:45 -07001169
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001170 __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
1171 rt = __ip_route_output_key(net, &fl4);
David S. Miller4895c772012-07-17 04:19:00 -07001172 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001173 __ip_do_redirect(rt, skb, &fl4, false);
David S. Miller4895c772012-07-17 04:19:00 -07001174 ip_rt_put(rt);
1175 }
David S. Millerb42597e2012-07-11 21:25:45 -07001176}
1177EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1178
David S. Millerefbc368d2011-12-01 13:38:59 -05001179static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1180{
1181 struct rtable *rt = (struct rtable *) dst;
1182
David S. Millerceb33202012-07-17 11:31:28 -07001183 /* All IPV4 dsts are created with ->obsolete set to the value
1184 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1185 * into this function always.
1186 *
Timo Teräs387aa652013-05-27 20:46:31 +00001187 * When a PMTU/redirect information update invalidates a route,
1188 * this is indicated by setting obsolete to DST_OBSOLETE_KILL or
1189 * DST_OBSOLETE_DEAD by dst_free().
David S. Millerceb33202012-07-17 11:31:28 -07001190 */
Timo Teräs387aa652013-05-27 20:46:31 +00001191 if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
David S. Millerefbc368d2011-12-01 13:38:59 -05001192 return NULL;
Timo Teräsd11a4dc2010-03-18 23:20:20 +00001193 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194}
1195
Linus Torvalds1da177e2005-04-16 15:20:36 -07001196static void ipv4_link_failure(struct sk_buff *skb)
1197{
1198 struct rtable *rt;
1199
1200 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1201
Eric Dumazet511c3f92009-06-02 05:14:27 +00001202 rt = skb_rtable(skb);
David S. Miller59436342012-07-10 06:58:42 -07001203 if (rt)
1204 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001205}
1206
Eric W. Biedermanede20592015-10-07 16:48:47 -05001207static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001208{
Joe Perches91df42b2012-05-15 14:11:54 +00001209 pr_debug("%s: %pI4 -> %pI4, %s\n",
1210 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1211 skb->dev ? skb->dev->name : "?");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001212 kfree_skb(skb);
Dave Jonesc378a9c2011-05-21 07:16:42 +00001213 WARN_ON(1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001214 return 0;
1215}
1216
1217/*
1218 We do not cache source address of outgoing interface,
1219 because it is used only by IP RR, TS and SRR options,
1220 so that it out of fast path.
1221
1222 BTW remember: "addr" is allowed to be not aligned
1223 in IP options!
1224 */
1225
David S. Miller8e363602011-05-13 17:29:41 -04001226void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227{
Al Viroa61ced52006-09-26 21:27:54 -07001228 __be32 src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001229
David S. Millerc7537962010-11-11 17:07:48 -08001230 if (rt_is_output_route(rt))
David S. Millerc5be24f2011-05-13 18:01:21 -04001231 src = ip_hdr(skb)->saddr;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001232 else {
David S. Miller8e363602011-05-13 17:29:41 -04001233 struct fib_result res;
1234 struct flowi4 fl4;
1235 struct iphdr *iph;
1236
1237 iph = ip_hdr(skb);
1238
1239 memset(&fl4, 0, sizeof(fl4));
1240 fl4.daddr = iph->daddr;
1241 fl4.saddr = iph->saddr;
Julian Anastasovb0fe4a32011-07-23 02:00:41 +00001242 fl4.flowi4_tos = RT_TOS(iph->tos);
David S. Miller8e363602011-05-13 17:29:41 -04001243 fl4.flowi4_oif = rt->dst.dev->ifindex;
1244 fl4.flowi4_iif = skb->dev->ifindex;
1245 fl4.flowi4_mark = skb->mark;
David S. Miller5e2b61f2011-03-04 21:47:09 -08001246
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001247 rcu_read_lock();
Andy Gospodarek0eeb0752015-06-23 13:45:37 -04001248 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0)
David S. Miller436c3b62011-03-24 17:42:21 -07001249 src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001250 else
David S. Millerf8126f12012-07-13 05:03:45 -07001251 src = inet_select_addr(rt->dst.dev,
1252 rt_nexthop(rt, iph->daddr),
1253 RT_SCOPE_UNIVERSE);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001254 rcu_read_unlock();
1255 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256 memcpy(addr, &src, 4);
1257}
1258
Patrick McHardyc7066f72011-01-14 13:36:42 +01001259#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260static void set_class_tag(struct rtable *rt, u32 tag)
1261{
Changli Gaod8d1f302010-06-10 23:31:35 -07001262 if (!(rt->dst.tclassid & 0xFFFF))
1263 rt->dst.tclassid |= tag & 0xFFFF;
1264 if (!(rt->dst.tclassid & 0xFFFF0000))
1265 rt->dst.tclassid |= tag & 0xFFFF0000;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001266}
1267#endif
1268
David S. Miller0dbaee32010-12-13 12:52:14 -08001269static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1270{
Gao Feng7ed14d92017-04-12 12:34:03 +08001271 unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
Eric Dumazet164a5e72017-10-18 17:02:03 -07001272 unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
Gao Feng7ed14d92017-04-12 12:34:03 +08001273 ip_rt_min_advmss);
David S. Miller0dbaee32010-12-13 12:52:14 -08001274
Gao Feng7ed14d92017-04-12 12:34:03 +08001275 return min(advmss, IPV4_MAX_PMTU - header_size);
David S. Miller0dbaee32010-12-13 12:52:14 -08001276}
1277
Steffen Klassertebb762f2011-11-23 02:12:51 +00001278static unsigned int ipv4_mtu(const struct dst_entry *dst)
David S. Millerd33e4552010-12-14 13:01:14 -08001279{
Steffen Klassert261663b2011-11-23 02:14:50 +00001280 const struct rtable *rt = (const struct rtable *) dst;
David S. Miller59436342012-07-10 06:58:42 -07001281 unsigned int mtu = rt->rt_pmtu;
1282
Alexander Duyck98d75c32012-08-27 06:30:01 +00001283 if (!mtu || time_after_eq(jiffies, rt->dst.expires))
David S. Miller59436342012-07-10 06:58:42 -07001284 mtu = dst_metric_raw(dst, RTAX_MTU);
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001285
Steffen Klassert38d523e2013-01-16 20:55:01 +00001286 if (mtu)
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001287 return mtu;
1288
Eric Dumazetc780a042017-08-16 11:09:12 -07001289 mtu = READ_ONCE(dst->dev->mtu);
David S. Millerd33e4552010-12-14 13:01:14 -08001290
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001291 if (unlikely(ip_mtu_locked(dst))) {
Julian Anastasov155e8332012-10-08 11:41:18 +00001292 if (rt->rt_uses_gateway && mtu > 576)
David S. Millerd33e4552010-12-14 13:01:14 -08001293 mtu = 576;
1294 }
1295
Roopa Prabhu14972cb2016-08-24 20:10:43 -07001296 mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
1297
1298 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
David S. Millerd33e4552010-12-14 13:01:14 -08001299}
1300
David S. Millerf2bb4be2012-07-17 12:20:47 -07001301static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
David S. Miller4895c772012-07-17 04:19:00 -07001302{
Eric Dumazetcaa41522014-09-03 22:21:56 -07001303 struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions);
David S. Miller4895c772012-07-17 04:19:00 -07001304 struct fib_nh_exception *fnhe;
1305 u32 hval;
1306
David S. Millerf2bb4be2012-07-17 12:20:47 -07001307 if (!hash)
1308 return NULL;
1309
David S. Millerd3a25c92012-07-17 13:23:08 -07001310 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -07001311
1312 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1313 fnhe = rcu_dereference(fnhe->fnhe_next)) {
David S. Millerf2bb4be2012-07-17 12:20:47 -07001314 if (fnhe->fnhe_daddr == daddr)
1315 return fnhe;
1316 }
1317 return NULL;
1318}
David S. Miller4895c772012-07-17 04:19:00 -07001319
David S. Millercaacf052012-07-31 15:06:50 -07001320static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
Wei Wanga4c2fd72017-06-17 10:42:42 -07001321 __be32 daddr, const bool do_cache)
David S. Millerf2bb4be2012-07-17 12:20:47 -07001322{
David S. Millercaacf052012-07-31 15:06:50 -07001323 bool ret = false;
1324
David S. Millerc5038a82012-07-31 15:02:02 -07001325 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +00001326
David S. Millerc5038a82012-07-31 15:02:02 -07001327 if (daddr == fnhe->fnhe_daddr) {
Timo Teräs2ffae992013-06-27 10:27:05 +03001328 struct rtable __rcu **porig;
1329 struct rtable *orig;
Timo Teräs5aad1de2013-05-27 20:46:33 +00001330 int genid = fnhe_genid(dev_net(rt->dst.dev));
Timo Teräs2ffae992013-06-27 10:27:05 +03001331
1332 if (rt_is_input_route(rt))
1333 porig = &fnhe->fnhe_rth_input;
1334 else
1335 porig = &fnhe->fnhe_rth_output;
1336 orig = rcu_dereference(*porig);
Timo Teräs5aad1de2013-05-27 20:46:33 +00001337
1338 if (fnhe->fnhe_genid != genid) {
1339 fnhe->fnhe_genid = genid;
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001340 fnhe->fnhe_gw = 0;
1341 fnhe->fnhe_pmtu = 0;
1342 fnhe->fnhe_expires = 0;
Timo Teräs2ffae992013-06-27 10:27:05 +03001343 fnhe_flush_routes(fnhe);
1344 orig = NULL;
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001345 }
Timo Teräs387aa652013-05-27 20:46:31 +00001346 fill_route_from_fnhe(rt, fnhe);
1347 if (!rt->rt_gateway)
Julian Anastasov155e8332012-10-08 11:41:18 +00001348 rt->rt_gateway = daddr;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001349
Wei Wanga4c2fd72017-06-17 10:42:42 -07001350 if (do_cache) {
Wei Wang08301062017-06-17 10:42:29 -07001351 dst_hold(&rt->dst);
Timo Teräs2ffae992013-06-27 10:27:05 +03001352 rcu_assign_pointer(*porig, rt);
Wei Wang08301062017-06-17 10:42:29 -07001353 if (orig) {
Wei Wang95c47f92017-06-17 10:42:30 -07001354 dst_dev_put(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001355 dst_release(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001356 }
Timo Teräs2ffae992013-06-27 10:27:05 +03001357 ret = true;
1358 }
David S. Millerc5038a82012-07-31 15:02:02 -07001359
1360 fnhe->fnhe_stamp = jiffies;
David S. Millerc5038a82012-07-31 15:02:02 -07001361 }
1362 spin_unlock_bh(&fnhe_lock);
David S. Millercaacf052012-07-31 15:06:50 -07001363
1364 return ret;
Eric Dumazet54764bb2012-07-31 01:08:23 +00001365}
1366
David S. Millercaacf052012-07-31 15:06:50 -07001367static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
David S. Millerf2bb4be2012-07-17 12:20:47 -07001368{
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001369 struct rtable *orig, *prev, **p;
David S. Millercaacf052012-07-31 15:06:50 -07001370 bool ret = true;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001371
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001372 if (rt_is_input_route(rt)) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001373 p = (struct rtable **)&nh->nh_rth_input;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001374 } else {
Christoph Lameter903ceff2014-08-17 12:30:35 -05001375 p = (struct rtable **)raw_cpu_ptr(nh->nh_pcpu_rth_output);
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001376 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001377 orig = *p;
1378
Wei Wang08301062017-06-17 10:42:29 -07001379 /* hold dst before doing cmpxchg() to avoid race condition
1380 * on this dst
1381 */
1382 dst_hold(&rt->dst);
David S. Millerf2bb4be2012-07-17 12:20:47 -07001383 prev = cmpxchg(p, orig, rt);
1384 if (prev == orig) {
Wei Wang08301062017-06-17 10:42:29 -07001385 if (orig) {
Wei Wang95c47f92017-06-17 10:42:30 -07001386 dst_dev_put(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001387 dst_release(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001388 }
1389 } else {
1390 dst_release(&rt->dst);
David S. Millercaacf052012-07-31 15:06:50 -07001391 ret = false;
Wei Wang08301062017-06-17 10:42:29 -07001392 }
David S. Millercaacf052012-07-31 15:06:50 -07001393
1394 return ret;
1395}
1396
Eric Dumazet5055c372015-01-14 15:17:06 -08001397struct uncached_list {
1398 spinlock_t lock;
1399 struct list_head head;
1400};
1401
1402static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
David S. Millercaacf052012-07-31 15:06:50 -07001403
Xin Long510c3212018-02-14 19:06:02 +08001404void rt_add_uncached_list(struct rtable *rt)
David S. Millercaacf052012-07-31 15:06:50 -07001405{
Eric Dumazet5055c372015-01-14 15:17:06 -08001406 struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list);
1407
1408 rt->rt_uncached_list = ul;
1409
1410 spin_lock_bh(&ul->lock);
1411 list_add_tail(&rt->rt_uncached, &ul->head);
1412 spin_unlock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001413}
1414
Xin Long510c3212018-02-14 19:06:02 +08001415void rt_del_uncached_list(struct rtable *rt)
David S. Millercaacf052012-07-31 15:06:50 -07001416{
Eric Dumazet78df76a2012-08-24 05:40:47 +00001417 if (!list_empty(&rt->rt_uncached)) {
Eric Dumazet5055c372015-01-14 15:17:06 -08001418 struct uncached_list *ul = rt->rt_uncached_list;
1419
1420 spin_lock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001421 list_del(&rt->rt_uncached);
Eric Dumazet5055c372015-01-14 15:17:06 -08001422 spin_unlock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001423 }
1424}
1425
Xin Long510c3212018-02-14 19:06:02 +08001426static void ipv4_dst_destroy(struct dst_entry *dst)
1427{
1428 struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
1429 struct rtable *rt = (struct rtable *)dst;
1430
1431 if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
1432 kfree(p);
1433
1434 rt_del_uncached_list(rt);
1435}
1436
David S. Millercaacf052012-07-31 15:06:50 -07001437void rt_flush_dev(struct net_device *dev)
1438{
Eric Dumazet5055c372015-01-14 15:17:06 -08001439 struct net *net = dev_net(dev);
1440 struct rtable *rt;
1441 int cpu;
David S. Millercaacf052012-07-31 15:06:50 -07001442
Eric Dumazet5055c372015-01-14 15:17:06 -08001443 for_each_possible_cpu(cpu) {
1444 struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
1445
1446 spin_lock_bh(&ul->lock);
1447 list_for_each_entry(rt, &ul->head, rt_uncached) {
David S. Millercaacf052012-07-31 15:06:50 -07001448 if (rt->dst.dev != dev)
1449 continue;
1450 rt->dst.dev = net->loopback_dev;
1451 dev_hold(rt->dst.dev);
1452 dev_put(dev);
1453 }
Eric Dumazet5055c372015-01-14 15:17:06 -08001454 spin_unlock_bh(&ul->lock);
David S. Miller4895c772012-07-17 04:19:00 -07001455 }
1456}
1457
Eric Dumazet4331deb2012-07-25 05:11:23 +00001458static bool rt_cache_valid(const struct rtable *rt)
David S. Millerd2d68ba92012-07-17 12:58:50 -07001459{
Eric Dumazet4331deb2012-07-25 05:11:23 +00001460 return rt &&
1461 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1462 !rt_is_expired(rt);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001463}
1464
David S. Millerf2bb4be2012-07-17 12:20:47 -07001465static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
David S. Miller5e2b61f2011-03-04 21:47:09 -08001466 const struct fib_result *res,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001467 struct fib_nh_exception *fnhe,
Wei Wanga4c2fd72017-06-17 10:42:42 -07001468 struct fib_info *fi, u16 type, u32 itag,
1469 const bool do_cache)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001470{
David S. Millercaacf052012-07-31 15:06:50 -07001471 bool cached = false;
1472
Linus Torvalds1da177e2005-04-16 15:20:36 -07001473 if (fi) {
David S. Miller4895c772012-07-17 04:19:00 -07001474 struct fib_nh *nh = &FIB_RES_NH(*res);
1475
Julian Anastasov155e8332012-10-08 11:41:18 +00001476 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) {
David S. Miller4895c772012-07-17 04:19:00 -07001477 rt->rt_gateway = nh->nh_gw;
Julian Anastasov155e8332012-10-08 11:41:18 +00001478 rt->rt_uses_gateway = 1;
1479 }
Eric Dumazet3fb07da2017-05-25 14:27:35 -07001480 dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true);
1481 if (fi->fib_metrics != &dst_default_metrics) {
1482 rt->dst._metrics |= DST_METRICS_REFCOUNTED;
Eric Dumazet9620fef2017-08-18 12:08:07 -07001483 refcount_inc(&fi->fib_metrics->refcnt);
Eric Dumazet3fb07da2017-05-25 14:27:35 -07001484 }
Patrick McHardyc7066f72011-01-14 13:36:42 +01001485#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf2bb4be2012-07-17 12:20:47 -07001486 rt->dst.tclassid = nh->nh_tclassid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487#endif
Jiri Benc61adedf2015-08-20 13:56:25 +02001488 rt->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
David S. Millerc5038a82012-07-31 15:02:02 -07001489 if (unlikely(fnhe))
Wei Wanga4c2fd72017-06-17 10:42:42 -07001490 cached = rt_bind_exception(rt, fnhe, daddr, do_cache);
1491 else if (do_cache)
David S. Millercaacf052012-07-31 15:06:50 -07001492 cached = rt_cache_route(nh, rt);
Julian Anastasov155e8332012-10-08 11:41:18 +00001493 if (unlikely(!cached)) {
1494 /* Routes we intend to cache in nexthop exception or
1495 * FIB nexthop have the DST_NOCACHE bit clear.
1496 * However, if we are unsuccessful at storing this
1497 * route into the cache we really need to set it.
1498 */
Julian Anastasov155e8332012-10-08 11:41:18 +00001499 if (!rt->rt_gateway)
1500 rt->rt_gateway = daddr;
1501 rt_add_uncached_list(rt);
1502 }
1503 } else
David S. Millercaacf052012-07-31 15:06:50 -07001504 rt_add_uncached_list(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001505
Patrick McHardyc7066f72011-01-14 13:36:42 +01001506#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001507#ifdef CONFIG_IP_MULTIPLE_TABLES
David S. Miller85b91b02012-07-13 08:21:29 -07001508 set_class_tag(rt, res->tclassid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001509#endif
1510 set_class_tag(rt, itag);
1511#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001512}
1513
David Ahern9ab179d2016-04-07 11:10:06 -07001514struct rtable *rt_dst_alloc(struct net_device *dev,
1515 unsigned int flags, u16 type,
1516 bool nopolicy, bool noxfrm, bool will_cache)
David S. Miller0c4dcd52011-02-17 15:42:37 -08001517{
David Ahernd08c4f32015-09-02 13:58:34 -07001518 struct rtable *rt;
1519
1520 rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
Wei Wanga4c2fd72017-06-17 10:42:42 -07001521 (will_cache ? 0 : DST_HOST) |
David Ahernd08c4f32015-09-02 13:58:34 -07001522 (nopolicy ? DST_NOPOLICY : 0) |
Wei Wangb2a9c0e2017-06-17 10:42:41 -07001523 (noxfrm ? DST_NOXFRM : 0));
David Ahernd08c4f32015-09-02 13:58:34 -07001524
1525 if (rt) {
1526 rt->rt_genid = rt_genid_ipv4(dev_net(dev));
1527 rt->rt_flags = flags;
1528 rt->rt_type = type;
1529 rt->rt_is_input = 0;
1530 rt->rt_iif = 0;
1531 rt->rt_pmtu = 0;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01001532 rt->rt_mtu_locked = 0;
David Ahernd08c4f32015-09-02 13:58:34 -07001533 rt->rt_gateway = 0;
1534 rt->rt_uses_gateway = 0;
1535 INIT_LIST_HEAD(&rt->rt_uncached);
1536
1537 rt->dst.output = ip_output;
1538 if (flags & RTCF_LOCAL)
1539 rt->dst.input = ip_local_deliver;
1540 }
1541
1542 return rt;
David S. Miller0c4dcd52011-02-17 15:42:37 -08001543}
David Ahern9ab179d2016-04-07 11:10:06 -07001544EXPORT_SYMBOL(rt_dst_alloc);
David S. Miller0c4dcd52011-02-17 15:42:37 -08001545
Eric Dumazet96d36222010-06-02 19:21:31 +00001546/* called in rcu_read_lock() section */
Paolo Abenibc044e82017-09-28 15:51:37 +02001547int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1548 u8 tos, struct net_device *dev,
1549 struct in_device *in_dev, u32 *itag)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001550{
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001551 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001552
1553 /* Primary sanity checks. */
Ian Morris51456b22015-04-03 09:17:26 +01001554 if (!in_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001555 return -EINVAL;
1556
Jan Engelhardt1e637c72008-01-21 03:18:08 -08001557 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
Thomas Grafd0daebc32012-06-12 00:44:01 +00001558 skb->protocol != htons(ETH_P_IP))
Paolo Abenibc044e82017-09-28 15:51:37 +02001559 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001560
Alexander Duyck75fea732015-09-28 11:10:38 -07001561 if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
Paolo Abenibc044e82017-09-28 15:51:37 +02001562 return -EINVAL;
Thomas Grafd0daebc32012-06-12 00:44:01 +00001563
Joe Perchesf97c1e02007-12-16 13:45:43 -08001564 if (ipv4_is_zeronet(saddr)) {
1565 if (!ipv4_is_local_multicast(daddr))
Paolo Abenibc044e82017-09-28 15:51:37 +02001566 return -EINVAL;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001567 } else {
David S. Miller9e56e382012-06-28 18:54:02 -07001568 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
Paolo Abenibc044e82017-09-28 15:51:37 +02001569 in_dev, itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001570 if (err < 0)
Paolo Abenibc044e82017-09-28 15:51:37 +02001571 return err;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001572 }
Paolo Abenibc044e82017-09-28 15:51:37 +02001573 return 0;
1574}
1575
1576/* called in rcu_read_lock() section */
1577static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1578 u8 tos, struct net_device *dev, int our)
1579{
1580 struct in_device *in_dev = __in_dev_get_rcu(dev);
1581 unsigned int flags = RTCF_MULTICAST;
1582 struct rtable *rth;
1583 u32 itag = 0;
1584 int err;
1585
1586 err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag);
1587 if (err)
1588 return err;
1589
David Ahernd08c4f32015-09-02 13:58:34 -07001590 if (our)
1591 flags |= RTCF_LOCAL;
1592
1593 rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001594 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001595 if (!rth)
Paolo Abenibc044e82017-09-28 15:51:37 +02001596 return -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001597
Patrick McHardyc7066f72011-01-14 13:36:42 +01001598#ifdef CONFIG_IP_ROUTE_CLASSID
Changli Gaod8d1f302010-06-10 23:31:35 -07001599 rth->dst.tclassid = itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001600#endif
David S. Millercf911662011-04-28 14:31:47 -07001601 rth->dst.output = ip_rt_bug;
David S. Miller9917e1e82012-07-17 14:44:26 -07001602 rth->rt_is_input= 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001603
1604#ifdef CONFIG_IP_MROUTE
Joe Perchesf97c1e02007-12-16 13:45:43 -08001605 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
Changli Gaod8d1f302010-06-10 23:31:35 -07001606 rth->dst.input = ip_mr_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001607#endif
1608 RT_CACHE_STAT_INC(in_slow_mc);
1609
David S. Miller89aef892012-07-17 11:00:09 -07001610 skb_dst_set(skb, &rth->dst);
1611 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612}
1613
1614
1615static void ip_handle_martian_source(struct net_device *dev,
1616 struct in_device *in_dev,
1617 struct sk_buff *skb,
Al Viro9e12bb22006-09-26 21:25:20 -07001618 __be32 daddr,
1619 __be32 saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001620{
1621 RT_CACHE_STAT_INC(in_martian_src);
1622#ifdef CONFIG_IP_ROUTE_VERBOSE
1623 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1624 /*
1625 * RFC1812 recommendation, if source is martian,
1626 * the only hint is MAC header.
1627 */
Joe Perches058bd4d2012-03-11 18:36:11 +00001628 pr_warn("martian source %pI4 from %pI4, on dev %s\n",
Harvey Harrison673d57e2008-10-31 00:53:57 -07001629 &daddr, &saddr, dev->name);
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001630 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
Joe Perches058bd4d2012-03-11 18:36:11 +00001631 print_hex_dump(KERN_WARNING, "ll header: ",
1632 DUMP_PREFIX_OFFSET, 16, 1,
1633 skb_mac_header(skb),
1634 dev->hard_header_len, true);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635 }
1636 }
1637#endif
1638}
1639
Xin Longdeed49d2016-02-18 21:21:19 +08001640static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
1641{
1642 struct fnhe_hash_bucket *hash;
1643 struct fib_nh_exception *fnhe, __rcu **fnhe_p;
1644 u32 hval = fnhe_hashfun(daddr);
1645
1646 spin_lock_bh(&fnhe_lock);
1647
1648 hash = rcu_dereference_protected(nh->nh_exceptions,
1649 lockdep_is_held(&fnhe_lock));
1650 hash += hval;
1651
1652 fnhe_p = &hash->chain;
1653 fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
1654 while (fnhe) {
1655 if (fnhe->fnhe_daddr == daddr) {
1656 rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
1657 fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
1658 fnhe_flush_routes(fnhe);
1659 kfree_rcu(fnhe, rcu);
1660 break;
1661 }
1662 fnhe_p = &fnhe->fnhe_next;
1663 fnhe = rcu_dereference_protected(fnhe->fnhe_next,
1664 lockdep_is_held(&fnhe_lock));
1665 }
1666
1667 spin_unlock_bh(&fnhe_lock);
1668}
1669
Eric Dumazet47360222010-06-03 04:13:21 +00001670/* called in rcu_read_lock() section */
Stephen Hemminger5969f712008-04-10 01:52:09 -07001671static int __mkroute_input(struct sk_buff *skb,
David S. Miller982721f2011-02-16 21:44:24 -08001672 const struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001673 struct in_device *in_dev,
David S. Millerc6cffba2012-07-26 11:14:38 +00001674 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001675{
Timo Teräs2ffae992013-06-27 10:27:05 +03001676 struct fib_nh_exception *fnhe;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001677 struct rtable *rth;
1678 int err;
1679 struct in_device *out_dev;
David S. Millerd2d68ba92012-07-17 12:58:50 -07001680 bool do_cache;
Li RongQingfbdc0ad2014-05-22 16:36:55 +08001681 u32 itag = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001682
1683 /* get a working reference to the output device */
Eric Dumazet47360222010-06-03 04:13:21 +00001684 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
Ian Morris51456b22015-04-03 09:17:26 +01001685 if (!out_dev) {
Joe Perchese87cc472012-05-13 21:56:26 +00001686 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001687 return -EINVAL;
1688 }
1689
Michael Smith5c04c812011-04-07 04:51:50 +00001690 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
David S. Miller9e56e382012-06-28 18:54:02 -07001691 in_dev->dev, in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001692 if (err < 0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001693 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001694 saddr);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001695
Linus Torvalds1da177e2005-04-16 15:20:36 -07001696 goto cleanup;
1697 }
1698
Julian Anastasove81da0e2012-10-08 11:41:15 +00001699 do_cache = res->fi && !itag;
1700 if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
Hannes Frederic Sowadf4d9252015-01-23 12:01:26 +01001701 skb->protocol == htons(ETH_P_IP) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001702 (IN_DEV_SHARED_MEDIA(out_dev) ||
Hannes Frederic Sowadf4d9252015-01-23 12:01:26 +01001703 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1704 IPCB(skb)->flags |= IPSKB_DOREDIRECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001705
1706 if (skb->protocol != htons(ETH_P_IP)) {
1707 /* Not IP (i.e. ARP). Do not create route, if it is
1708 * invalid for proxy arp. DNAT routes are always valid.
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001709 *
1710 * Proxy arp feature have been extended to allow, ARP
1711 * replies back to the same interface, to support
1712 * Private VLAN switch technologies. See arp.c.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713 */
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001714 if (out_dev == in_dev &&
1715 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001716 err = -EINVAL;
1717 goto cleanup;
1718 }
1719 }
1720
Timo Teräs2ffae992013-06-27 10:27:05 +03001721 fnhe = find_exception(&FIB_RES_NH(*res), daddr);
Julian Anastasove81da0e2012-10-08 11:41:15 +00001722 if (do_cache) {
Xin Longdeed49d2016-02-18 21:21:19 +08001723 if (fnhe) {
Timo Teräs2ffae992013-06-27 10:27:05 +03001724 rth = rcu_dereference(fnhe->fnhe_rth_input);
Xin Longdeed49d2016-02-18 21:21:19 +08001725 if (rth && rth->dst.expires &&
1726 time_after(jiffies, rth->dst.expires)) {
1727 ip_del_fnhe(&FIB_RES_NH(*res), daddr);
1728 fnhe = NULL;
1729 } else {
1730 goto rt_cache;
1731 }
1732 }
Timo Teräs2ffae992013-06-27 10:27:05 +03001733
Xin Longdeed49d2016-02-18 21:21:19 +08001734 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1735
1736rt_cache:
Julian Anastasove81da0e2012-10-08 11:41:15 +00001737 if (rt_cache_valid(rth)) {
1738 skb_dst_set_noref(skb, &rth->dst);
1739 goto out;
David S. Millerd2d68ba92012-07-17 12:58:50 -07001740 }
1741 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001742
David Ahernd08c4f32015-09-02 13:58:34 -07001743 rth = rt_dst_alloc(out_dev->dev, 0, res->type,
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001744 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerd2d68ba92012-07-17 12:58:50 -07001745 IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001746 if (!rth) {
1747 err = -ENOBUFS;
1748 goto cleanup;
1749 }
1750
David S. Miller9917e1e82012-07-17 14:44:26 -07001751 rth->rt_is_input = 1;
Duan Jionga6254862014-02-17 15:23:43 +08001752 RT_CACHE_STAT_INC(in_slow_tot);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001753
Changli Gaod8d1f302010-06-10 23:31:35 -07001754 rth->dst.input = ip_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001755
Wei Wanga4c2fd72017-06-17 10:42:42 -07001756 rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag,
1757 do_cache);
David Ahern99428952018-02-13 20:32:04 -08001758 lwtunnel_set_redirect(&rth->dst);
David S. Millerc6cffba2012-07-26 11:14:38 +00001759 skb_dst_set(skb, &rth->dst);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001760out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001761 err = 0;
1762 cleanup:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001763 return err;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001764}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001765
Peter Nørlund79a13152015-09-30 10:12:22 +02001766#ifdef CONFIG_IP_ROUTE_MULTIPATH
Peter Nørlund79a13152015-09-30 10:12:22 +02001767/* To make ICMP packets follow the right flow, the multipath hash is
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001768 * calculated from the inner IP addresses.
Peter Nørlund79a13152015-09-30 10:12:22 +02001769 */
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001770static void ip_multipath_l3_keys(const struct sk_buff *skb,
1771 struct flow_keys *hash_keys)
Peter Nørlund79a13152015-09-30 10:12:22 +02001772{
1773 const struct iphdr *outer_iph = ip_hdr(skb);
David Ahern6f74b6c2018-03-02 08:32:13 -08001774 const struct iphdr *key_iph = outer_iph;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001775 const struct iphdr *inner_iph;
Peter Nørlund79a13152015-09-30 10:12:22 +02001776 const struct icmphdr *icmph;
1777 struct iphdr _inner_iph;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001778 struct icmphdr _icmph;
1779
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001780 if (likely(outer_iph->protocol != IPPROTO_ICMP))
David Ahern6f74b6c2018-03-02 08:32:13 -08001781 goto out;
Peter Nørlund79a13152015-09-30 10:12:22 +02001782
1783 if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
David Ahern6f74b6c2018-03-02 08:32:13 -08001784 goto out;
Peter Nørlund79a13152015-09-30 10:12:22 +02001785
1786 icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
1787 &_icmph);
1788 if (!icmph)
David Ahern6f74b6c2018-03-02 08:32:13 -08001789 goto out;
Peter Nørlund79a13152015-09-30 10:12:22 +02001790
1791 if (icmph->type != ICMP_DEST_UNREACH &&
1792 icmph->type != ICMP_REDIRECT &&
1793 icmph->type != ICMP_TIME_EXCEEDED &&
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001794 icmph->type != ICMP_PARAMETERPROB)
David Ahern6f74b6c2018-03-02 08:32:13 -08001795 goto out;
Peter Nørlund79a13152015-09-30 10:12:22 +02001796
1797 inner_iph = skb_header_pointer(skb,
1798 outer_iph->ihl * 4 + sizeof(_icmph),
1799 sizeof(_inner_iph), &_inner_iph);
1800 if (!inner_iph)
David Ahern6f74b6c2018-03-02 08:32:13 -08001801 goto out;
1802
1803 key_iph = inner_iph;
1804out:
1805 hash_keys->addrs.v4addrs.src = key_iph->saddr;
1806 hash_keys->addrs.v4addrs.dst = key_iph->daddr;
Peter Nørlund79a13152015-09-30 10:12:22 +02001807}
1808
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001809/* if skb is set it will be used and fl4 can be NULL */
David Ahern7efc0b62018-03-02 08:32:12 -08001810int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001811 const struct sk_buff *skb, struct flow_keys *flkeys)
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001812{
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001813 struct flow_keys hash_keys;
1814 u32 mhash;
1815
1816 switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
1817 case 0:
1818 memset(&hash_keys, 0, sizeof(hash_keys));
1819 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1820 if (skb) {
1821 ip_multipath_l3_keys(skb, &hash_keys);
1822 } else {
1823 hash_keys.addrs.v4addrs.src = fl4->saddr;
1824 hash_keys.addrs.v4addrs.dst = fl4->daddr;
1825 }
1826 break;
1827 case 1:
1828 /* skb is currently provided only when forwarding */
1829 if (skb) {
1830 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
1831 struct flow_keys keys;
1832
1833 /* short-circuit if we already have L4 hash present */
1834 if (skb->l4_hash)
1835 return skb_get_hash_raw(skb) >> 1;
David Ahernec7127a2018-03-02 08:32:14 -08001836
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001837 memset(&hash_keys, 0, sizeof(hash_keys));
David Ahern1fe4b112018-02-21 11:00:54 -08001838
David Ahernec7127a2018-03-02 08:32:14 -08001839 if (!flkeys) {
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001840 skb_flow_dissect_flow_keys(skb, &keys, flag);
David Ahernec7127a2018-03-02 08:32:14 -08001841 flkeys = &keys;
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001842 }
David Ahernec7127a2018-03-02 08:32:14 -08001843
1844 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1845 hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
1846 hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
1847 hash_keys.ports.src = flkeys->ports.src;
1848 hash_keys.ports.dst = flkeys->ports.dst;
1849 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001850 } else {
1851 memset(&hash_keys, 0, sizeof(hash_keys));
1852 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1853 hash_keys.addrs.v4addrs.src = fl4->saddr;
1854 hash_keys.addrs.v4addrs.dst = fl4->daddr;
1855 hash_keys.ports.src = fl4->fl4_sport;
1856 hash_keys.ports.dst = fl4->fl4_dport;
1857 hash_keys.basic.ip_proto = fl4->flowi4_proto;
1858 }
1859 break;
1860 }
1861 mhash = flow_hash_from_keys(&hash_keys);
1862
1863 return mhash >> 1;
1864}
Peter Nørlund79a13152015-09-30 10:12:22 +02001865#endif /* CONFIG_IP_ROUTE_MULTIPATH */
1866
Stephen Hemminger5969f712008-04-10 01:52:09 -07001867static int ip_mkroute_input(struct sk_buff *skb,
1868 struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001869 struct in_device *in_dev,
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001870 __be32 daddr, __be32 saddr, u32 tos,
1871 struct flow_keys *hkeys)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001872{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001873#ifdef CONFIG_IP_ROUTE_MULTIPATH
Peter Nørlund0e884c72015-09-30 10:12:21 +02001874 if (res->fi && res->fi->fib_nhs > 1) {
David Ahern7efc0b62018-03-02 08:32:12 -08001875 int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
Peter Nørlund0e884c72015-09-30 10:12:21 +02001876
Peter Nørlund0e884c72015-09-30 10:12:21 +02001877 fib_select_multipath(res, h);
1878 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001879#endif
1880
1881 /* create a routing cache entry */
David S. Millerc6cffba2012-07-26 11:14:38 +00001882 return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001883}
1884
Linus Torvalds1da177e2005-04-16 15:20:36 -07001885/*
1886 * NOTE. We drop all the packets that has local source
1887 * addresses, because every properly looped back packet
1888 * must have correct destination already attached by output routine.
1889 *
1890 * Such approach solves two big problems:
1891 * 1. Not simplex devices are handled properly.
1892 * 2. IP spoofing attempts are filtered with 100% of guarantee.
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001893 * called with rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001894 */
1895
Al Viro9e12bb22006-09-26 21:25:20 -07001896static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
David Ahern5510cdf2017-05-25 10:42:34 -07001897 u8 tos, struct net_device *dev,
1898 struct fib_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001899{
Eric Dumazet96d36222010-06-02 19:21:31 +00001900 struct in_device *in_dev = __in_dev_get_rcu(dev);
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001901 struct flow_keys *flkeys = NULL, _flkeys;
1902 struct net *net = dev_net(dev);
Thomas Graf1b7179d2015-07-21 10:43:59 +02001903 struct ip_tunnel_info *tun_info;
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001904 int err = -EINVAL;
Eric Dumazet95c96172012-04-15 05:58:06 +00001905 unsigned int flags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001906 u32 itag = 0;
Eric Dumazet95c96172012-04-15 05:58:06 +00001907 struct rtable *rth;
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001908 struct flowi4 fl4;
David S. Millerd2d68ba92012-07-17 12:58:50 -07001909 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001910
1911 /* IP on this device is disabled. */
1912
1913 if (!in_dev)
1914 goto out;
1915
1916 /* Check for the most weird martians, which can be not detected
1917 by fib_lookup.
1918 */
1919
Jiri Benc61adedf2015-08-20 13:56:25 +02001920 tun_info = skb_tunnel_info(skb);
Jiri Benc46fa0622015-08-28 20:48:19 +02001921 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
Thomas Graf1b7179d2015-07-21 10:43:59 +02001922 fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id;
1923 else
1924 fl4.flowi4_tun_key.tun_id = 0;
Thomas Graff38a9eb2015-07-21 10:43:56 +02001925 skb_dst_drop(skb);
1926
Thomas Grafd0daebc32012-06-12 00:44:01 +00001927 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001928 goto martian_source;
1929
David Ahern5510cdf2017-05-25 10:42:34 -07001930 res->fi = NULL;
1931 res->table = NULL;
Andy Walls27a954b2010-10-17 15:11:22 +00001932 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001933 goto brd_input;
1934
1935 /* Accept zero addresses only to limited broadcast;
1936 * I even do not know to fix it or not. Waiting for complains :-)
1937 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001938 if (ipv4_is_zeronet(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001939 goto martian_source;
1940
Thomas Grafd0daebc32012-06-12 00:44:01 +00001941 if (ipv4_is_zeronet(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001942 goto martian_destination;
1943
Eric Dumazet9eb43e72012-08-03 21:27:25 +00001944 /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
1945 * and call it once if daddr or/and saddr are loopback addresses
1946 */
1947 if (ipv4_is_loopback(daddr)) {
1948 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00001949 goto martian_destination;
Eric Dumazet9eb43e72012-08-03 21:27:25 +00001950 } else if (ipv4_is_loopback(saddr)) {
1951 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00001952 goto martian_source;
1953 }
1954
Linus Torvalds1da177e2005-04-16 15:20:36 -07001955 /*
1956 * Now we are ready to route packet.
1957 */
David S. Miller68a5e3d2011-03-11 20:07:33 -05001958 fl4.flowi4_oif = 0;
David Aherne0d56fd2016-09-10 12:09:57 -07001959 fl4.flowi4_iif = dev->ifindex;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001960 fl4.flowi4_mark = skb->mark;
1961 fl4.flowi4_tos = tos;
1962 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
David Ahernb84f7872015-09-29 19:07:07 -07001963 fl4.flowi4_flags = 0;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001964 fl4.daddr = daddr;
1965 fl4.saddr = saddr;
Julian Anastasov8bcfd092017-02-26 15:50:52 +02001966 fl4.flowi4_uid = sock_net_uid(net, NULL);
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001967
1968 if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys))
1969 flkeys = &_flkeys;
1970
David Ahern5510cdf2017-05-25 10:42:34 -07001971 err = fib_lookup(net, &fl4, res, 0);
Duan Jiongcd0f0b92014-02-14 18:26:22 +08001972 if (err != 0) {
1973 if (!IN_DEV_FORWARD(in_dev))
1974 err = -EHOSTUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001975 goto no_route;
Duan Jiongcd0f0b92014-02-14 18:26:22 +08001976 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001977
David Ahern5510cdf2017-05-25 10:42:34 -07001978 if (res->type == RTN_BROADCAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001979 goto brd_input;
1980
David Ahern5510cdf2017-05-25 10:42:34 -07001981 if (res->type == RTN_LOCAL) {
Michael Smith5c04c812011-04-07 04:51:50 +00001982 err = fib_validate_source(skb, saddr, daddr, tos,
Cong Wang0d5edc62014-04-15 16:25:35 -07001983 0, dev, in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001984 if (err < 0)
David Ahern0d753962015-09-28 11:10:44 -07001985 goto martian_source;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001986 goto local_input;
1987 }
1988
Duan Jiongcd0f0b92014-02-14 18:26:22 +08001989 if (!IN_DEV_FORWARD(in_dev)) {
1990 err = -EHOSTUNREACH;
David S. Miller251da412012-06-26 16:27:09 -07001991 goto no_route;
Duan Jiongcd0f0b92014-02-14 18:26:22 +08001992 }
David Ahern5510cdf2017-05-25 10:42:34 -07001993 if (res->type != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001994 goto martian_destination;
1995
Roopa Prabhue37b1e92018-02-28 22:42:41 -05001996 err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001997out: return err;
1998
1999brd_input:
2000 if (skb->protocol != htons(ETH_P_IP))
2001 goto e_inval;
2002
David S. Miller41347dc2012-06-28 04:05:27 -07002003 if (!ipv4_is_zeronet(saddr)) {
David S. Miller9e56e382012-06-28 18:54:02 -07002004 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
2005 in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002006 if (err < 0)
David Ahern0d753962015-09-28 11:10:44 -07002007 goto martian_source;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002008 }
2009 flags |= RTCF_BROADCAST;
David Ahern5510cdf2017-05-25 10:42:34 -07002010 res->type = RTN_BROADCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002011 RT_CACHE_STAT_INC(in_brd);
2012
2013local_input:
David S. Millerd2d68ba92012-07-17 12:58:50 -07002014 do_cache = false;
David Ahern5510cdf2017-05-25 10:42:34 -07002015 if (res->fi) {
David S. Millerfe3edf42012-07-23 13:22:20 -07002016 if (!itag) {
David Ahern5510cdf2017-05-25 10:42:34 -07002017 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
David S. Millerd2d68ba92012-07-17 12:58:50 -07002018 if (rt_cache_valid(rth)) {
David S. Millerc6cffba2012-07-26 11:14:38 +00002019 skb_dst_set_noref(skb, &rth->dst);
2020 err = 0;
2021 goto out;
David S. Millerd2d68ba92012-07-17 12:58:50 -07002022 }
2023 do_cache = true;
2024 }
2025 }
2026
David Ahernf5a0aab2016-12-29 15:29:03 -08002027 rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
David Ahern5510cdf2017-05-25 10:42:34 -07002028 flags | RTCF_LOCAL, res->type,
David S. Millerd2d68ba92012-07-17 12:58:50 -07002029 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002030 if (!rth)
2031 goto e_nobufs;
2032
Changli Gaod8d1f302010-06-10 23:31:35 -07002033 rth->dst.output= ip_rt_bug;
David S. Millercf911662011-04-28 14:31:47 -07002034#ifdef CONFIG_IP_ROUTE_CLASSID
2035 rth->dst.tclassid = itag;
2036#endif
David S. Miller9917e1e82012-07-17 14:44:26 -07002037 rth->rt_is_input = 1;
Roopa Prabhu571e7222015-07-21 10:43:47 +02002038
Duan Jionga6254862014-02-17 15:23:43 +08002039 RT_CACHE_STAT_INC(in_slow_tot);
David Ahern5510cdf2017-05-25 10:42:34 -07002040 if (res->type == RTN_UNREACHABLE) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002041 rth->dst.input= ip_error;
2042 rth->dst.error= -err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002043 rth->rt_flags &= ~RTCF_LOCAL;
2044 }
Thomas Grafefd85702016-11-30 17:10:09 +01002045
Alexei Starovoitovdcdfdf52013-11-19 19:12:34 -08002046 if (do_cache) {
David Ahern5510cdf2017-05-25 10:42:34 -07002047 struct fib_nh *nh = &FIB_RES_NH(*res);
Thomas Grafefd85702016-11-30 17:10:09 +01002048
2049 rth->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
2050 if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
2051 WARN_ON(rth->dst.input == lwtunnel_input);
2052 rth->dst.lwtstate->orig_input = rth->dst.input;
2053 rth->dst.input = lwtunnel_input;
2054 }
2055
Wei Wanga4c2fd72017-06-17 10:42:42 -07002056 if (unlikely(!rt_cache_route(nh, rth)))
Alexei Starovoitovdcdfdf52013-11-19 19:12:34 -08002057 rt_add_uncached_list(rth);
Alexei Starovoitovdcdfdf52013-11-19 19:12:34 -08002058 }
David S. Miller89aef892012-07-17 11:00:09 -07002059 skb_dst_set(skb, &rth->dst);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002060 err = 0;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002061 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002062
2063no_route:
2064 RT_CACHE_STAT_INC(in_no_route);
David Ahern5510cdf2017-05-25 10:42:34 -07002065 res->type = RTN_UNREACHABLE;
2066 res->fi = NULL;
2067 res->table = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002068 goto local_input;
2069
2070 /*
2071 * Do not cache martian addresses: they should be logged (RFC1812)
2072 */
2073martian_destination:
2074 RT_CACHE_STAT_INC(in_martian_dst);
2075#ifdef CONFIG_IP_ROUTE_VERBOSE
Joe Perchese87cc472012-05-13 21:56:26 +00002076 if (IN_DEV_LOG_MARTIANS(in_dev))
2077 net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
2078 &daddr, &saddr, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002079#endif
Dietmar Eggemann2c2910a2005-06-28 13:06:23 -07002080
Linus Torvalds1da177e2005-04-16 15:20:36 -07002081e_inval:
2082 err = -EINVAL;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002083 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002084
2085e_nobufs:
2086 err = -ENOBUFS;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002087 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002088
2089martian_source:
2090 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002091 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002092}
2093
David S. Millerc6cffba2012-07-26 11:14:38 +00002094int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2095 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002096{
David Ahern5510cdf2017-05-25 10:42:34 -07002097 struct fib_result res;
2098 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002099
Julian Anastasov6e280992017-02-26 17:14:35 +02002100 tos &= IPTOS_RT_MASK;
Eric Dumazet96d36222010-06-02 19:21:31 +00002101 rcu_read_lock();
David Ahern5510cdf2017-05-25 10:42:34 -07002102 err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res);
2103 rcu_read_unlock();
Eric Dumazet96d36222010-06-02 19:21:31 +00002104
David Ahern5510cdf2017-05-25 10:42:34 -07002105 return err;
2106}
2107EXPORT_SYMBOL(ip_route_input_noref);
2108
2109/* called with rcu_read_lock held */
2110int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2111 u8 tos, struct net_device *dev, struct fib_result *res)
2112{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002113 /* Multicast recognition logic is moved from route cache to here.
2114 The problem was that too many Ethernet cards have broken/missing
2115 hardware multicast filters :-( As result the host on multicasting
2116 network acquires a lot of useless route cache entries, sort of
2117 SDR messages from all the world. Now we try to get rid of them.
2118 Really, provided software IP multicast filter is organized
2119 reasonably (at least, hashed), it does not result in a slowdown
2120 comparing with route cache reject entries.
2121 Note, that multicast routers are not affected, because
2122 route cache entry is created eventually.
2123 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08002124 if (ipv4_is_multicast(daddr)) {
Eric Dumazet96d36222010-06-02 19:21:31 +00002125 struct in_device *in_dev = __in_dev_get_rcu(dev);
David Aherne58e4152016-10-31 15:54:00 -07002126 int our = 0;
David Ahern5510cdf2017-05-25 10:42:34 -07002127 int err = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002128
David Aherne58e4152016-10-31 15:54:00 -07002129 if (in_dev)
2130 our = ip_check_mc_rcu(in_dev, daddr, saddr,
2131 ip_hdr(skb)->protocol);
2132
2133 /* check l3 master if no match yet */
2134 if ((!in_dev || !our) && netif_is_l3_slave(dev)) {
2135 struct in_device *l3_in_dev;
2136
2137 l3_in_dev = __in_dev_get_rcu(skb->dev);
2138 if (l3_in_dev)
2139 our = ip_check_mc_rcu(l3_in_dev, daddr, saddr,
2140 ip_hdr(skb)->protocol);
2141 }
2142
David Aherne58e4152016-10-31 15:54:00 -07002143 if (our
Linus Torvalds1da177e2005-04-16 15:20:36 -07002144#ifdef CONFIG_IP_MROUTE
David Aherne58e4152016-10-31 15:54:00 -07002145 ||
2146 (!ipv4_is_local_multicast(daddr) &&
2147 IN_DEV_MFORWARD(in_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002148#endif
David Aherne58e4152016-10-31 15:54:00 -07002149 ) {
David Ahern5510cdf2017-05-25 10:42:34 -07002150 err = ip_route_input_mc(skb, daddr, saddr,
David Aherne58e4152016-10-31 15:54:00 -07002151 tos, dev, our);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002152 }
David Ahern5510cdf2017-05-25 10:42:34 -07002153 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002154 }
David Ahern5510cdf2017-05-25 10:42:34 -07002155
2156 return ip_route_input_slow(skb, daddr, saddr, tos, dev, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002157}
2158
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002159/* called with rcu_read_lock() */
David S. Miller982721f2011-02-16 21:44:24 -08002160static struct rtable *__mkroute_output(const struct fib_result *res,
David Miller1a00fee2012-07-01 02:02:56 +00002161 const struct flowi4 *fl4, int orig_oif,
Julian Anastasovf61759e2011-12-02 11:39:42 +00002162 struct net_device *dev_out,
David S. Miller5ada5522011-02-17 15:29:00 -08002163 unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002164{
David S. Miller982721f2011-02-16 21:44:24 -08002165 struct fib_info *fi = res->fi;
David S. Millerf2bb4be2012-07-17 12:20:47 -07002166 struct fib_nh_exception *fnhe;
David S. Miller5ada5522011-02-17 15:29:00 -08002167 struct in_device *in_dev;
David S. Miller982721f2011-02-16 21:44:24 -08002168 u16 type = res->type;
David S. Miller5ada5522011-02-17 15:29:00 -08002169 struct rtable *rth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00002170 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002171
Thomas Grafd0daebc32012-06-12 00:44:01 +00002172 in_dev = __in_dev_get_rcu(dev_out);
2173 if (!in_dev)
David S. Miller5ada5522011-02-17 15:29:00 -08002174 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002175
Thomas Grafd0daebc32012-06-12 00:44:01 +00002176 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
David Ahern5f02ce242016-09-10 12:09:54 -07002177 if (ipv4_is_loopback(fl4->saddr) &&
2178 !(dev_out->flags & IFF_LOOPBACK) &&
2179 !netif_is_l3_master(dev_out))
Thomas Grafd0daebc32012-06-12 00:44:01 +00002180 return ERR_PTR(-EINVAL);
2181
David S. Miller68a5e3d2011-03-11 20:07:33 -05002182 if (ipv4_is_lbcast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08002183 type = RTN_BROADCAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002184 else if (ipv4_is_multicast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08002185 type = RTN_MULTICAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002186 else if (ipv4_is_zeronet(fl4->daddr))
David S. Miller5ada5522011-02-17 15:29:00 -08002187 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002188
2189 if (dev_out->flags & IFF_LOOPBACK)
2190 flags |= RTCF_LOCAL;
2191
Julian Anastasov63617422012-11-22 23:04:14 +02002192 do_cache = true;
David S. Miller982721f2011-02-16 21:44:24 -08002193 if (type == RTN_BROADCAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002194 flags |= RTCF_BROADCAST | RTCF_LOCAL;
David S. Miller982721f2011-02-16 21:44:24 -08002195 fi = NULL;
2196 } else if (type == RTN_MULTICAST) {
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00002197 flags |= RTCF_MULTICAST | RTCF_LOCAL;
David S. Miller813b3b52011-04-28 14:48:42 -07002198 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
2199 fl4->flowi4_proto))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002200 flags &= ~RTCF_LOCAL;
Julian Anastasov63617422012-11-22 23:04:14 +02002201 else
2202 do_cache = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002203 /* If multicast route do not exist use
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00002204 * default one, but do not gateway in this case.
2205 * Yes, it is hack.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002206 */
David S. Miller982721f2011-02-16 21:44:24 -08002207 if (fi && res->prefixlen < 4)
2208 fi = NULL;
Chris Friesend6d5e992016-04-08 15:21:30 -06002209 } else if ((type == RTN_LOCAL) && (orig_oif != 0) &&
2210 (orig_oif != dev_out->ifindex)) {
2211 /* For local routes that require a particular output interface
2212 * we do not want to cache the result. Caching the result
2213 * causes incorrect behaviour when there are multiple source
2214 * addresses on the interface, the end result being that if the
2215 * intended recipient is waiting on that interface for the
2216 * packet he won't receive it because it will be delivered on
2217 * the loopback interface and the IP_PKTINFO ipi_ifindex will
2218 * be set to the loopback interface as well.
2219 */
2220 fi = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002221 }
2222
David S. Millerf2bb4be2012-07-17 12:20:47 -07002223 fnhe = NULL;
Julian Anastasov63617422012-11-22 23:04:14 +02002224 do_cache &= fi != NULL;
2225 if (do_cache) {
David S. Millerc5038a82012-07-31 15:02:02 -07002226 struct rtable __rcu **prth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00002227 struct fib_nh *nh = &FIB_RES_NH(*res);
Eric Dumazetd26b3a72012-07-31 05:45:30 +00002228
Julian Anastasovc92b9652012-10-08 11:41:19 +00002229 fnhe = find_exception(nh, fl4->daddr);
Xin Longdeed49d2016-02-18 21:21:19 +08002230 if (fnhe) {
Timo Teräs2ffae992013-06-27 10:27:05 +03002231 prth = &fnhe->fnhe_rth_output;
Xin Longdeed49d2016-02-18 21:21:19 +08002232 rth = rcu_dereference(*prth);
2233 if (rth && rth->dst.expires &&
2234 time_after(jiffies, rth->dst.expires)) {
2235 ip_del_fnhe(nh, fl4->daddr);
2236 fnhe = NULL;
2237 } else {
2238 goto rt_cache;
Julian Anastasovc92b9652012-10-08 11:41:19 +00002239 }
Julian Anastasovc92b9652012-10-08 11:41:19 +00002240 }
Xin Longdeed49d2016-02-18 21:21:19 +08002241
2242 if (unlikely(fl4->flowi4_flags &
2243 FLOWI_FLAG_KNOWN_NH &&
2244 !(nh->nh_gw &&
2245 nh->nh_scope == RT_SCOPE_LINK))) {
2246 do_cache = false;
2247 goto add;
2248 }
2249 prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
David S. Millerc5038a82012-07-31 15:02:02 -07002250 rth = rcu_dereference(*prth);
Xin Longdeed49d2016-02-18 21:21:19 +08002251
2252rt_cache:
Wei Wang9df16ef2017-06-17 10:42:31 -07002253 if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst))
David S. Millerc5038a82012-07-31 15:02:02 -07002254 return rth;
David S. Millerf2bb4be2012-07-17 12:20:47 -07002255 }
Julian Anastasovc92b9652012-10-08 11:41:19 +00002256
2257add:
David Ahernd08c4f32015-09-02 13:58:34 -07002258 rth = rt_dst_alloc(dev_out, flags, type,
David S. Miller5c1e6aa2011-04-28 14:13:38 -07002259 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerf2bb4be2012-07-17 12:20:47 -07002260 IN_DEV_CONF_GET(in_dev, NOXFRM),
Julian Anastasovc92b9652012-10-08 11:41:19 +00002261 do_cache);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00002262 if (!rth)
David S. Miller5ada5522011-02-17 15:29:00 -08002263 return ERR_PTR(-ENOBUFS);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00002264
David Ahern9438c872017-08-11 17:02:02 -07002265 rth->rt_iif = orig_oif;
David Ahernb7503e02015-09-02 13:58:35 -07002266
Linus Torvalds1da177e2005-04-16 15:20:36 -07002267 RT_CACHE_STAT_INC(out_slow_tot);
2268
Linus Torvalds1da177e2005-04-16 15:20:36 -07002269 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002270 if (flags & RTCF_LOCAL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07002271 !(dev_out->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002272 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002273 RT_CACHE_STAT_INC(out_slow_mc);
2274 }
2275#ifdef CONFIG_IP_MROUTE
David S. Miller982721f2011-02-16 21:44:24 -08002276 if (type == RTN_MULTICAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002277 if (IN_DEV_MFORWARD(in_dev) &&
David S. Miller813b3b52011-04-28 14:48:42 -07002278 !ipv4_is_local_multicast(fl4->daddr)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002279 rth->dst.input = ip_mr_input;
2280 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002281 }
2282 }
2283#endif
2284 }
2285
Wei Wanga4c2fd72017-06-17 10:42:42 -07002286 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache);
David Ahern99428952018-02-13 20:32:04 -08002287 lwtunnel_set_redirect(&rth->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002288
David S. Miller5ada5522011-02-17 15:29:00 -08002289 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002290}
2291
Linus Torvalds1da177e2005-04-16 15:20:36 -07002292/*
2293 * Major route resolver routine.
2294 */
2295
David Ahern3abd1ade2017-05-25 10:42:33 -07002296struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
2297 const struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002298{
Julian Anastasovf61759e2011-12-02 11:39:42 +00002299 __u8 tos = RT_FL_TOS(fl4);
David S. Miller813b3b52011-04-28 14:48:42 -07002300 struct fib_result res;
David S. Miller5ada5522011-02-17 15:29:00 -08002301 struct rtable *rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002302
David S. Miller85b91b02012-07-13 08:21:29 -07002303 res.tclassid = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002304 res.fi = NULL;
David S. Miller8b96d222012-06-11 02:01:56 -07002305 res.table = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002306
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002307 fl4->flowi4_iif = LOOPBACK_IFINDEX;
David S. Miller813b3b52011-04-28 14:48:42 -07002308 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
2309 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
2310 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
David S. Miller44713b62011-03-04 21:24:47 -08002311
David S. Miller010c2702011-02-17 15:37:09 -08002312 rcu_read_lock();
David Ahern3abd1ade2017-05-25 10:42:33 -07002313 rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
2314 rcu_read_unlock();
2315
2316 return rth;
2317}
2318EXPORT_SYMBOL_GPL(ip_route_output_key_hash);
2319
2320struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
2321 struct fib_result *res,
2322 const struct sk_buff *skb)
2323{
2324 struct net_device *dev_out = NULL;
2325 int orig_oif = fl4->flowi4_oif;
2326 unsigned int flags = 0;
2327 struct rtable *rth;
2328 int err = -ENETUNREACH;
2329
David S. Miller813b3b52011-04-28 14:48:42 -07002330 if (fl4->saddr) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08002331 rth = ERR_PTR(-EINVAL);
David S. Miller813b3b52011-04-28 14:48:42 -07002332 if (ipv4_is_multicast(fl4->saddr) ||
2333 ipv4_is_lbcast(fl4->saddr) ||
2334 ipv4_is_zeronet(fl4->saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002335 goto out;
2336
Linus Torvalds1da177e2005-04-16 15:20:36 -07002337 /* I removed check for oif == dev_out->oif here.
2338 It was wrong for two reasons:
Denis V. Lunev1ab35272008-01-22 22:04:30 -08002339 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
2340 is assigned to multiple interfaces.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002341 2. Moreover, we are allowed to send packets with saddr
2342 of another iface. --ANK
2343 */
2344
David S. Miller813b3b52011-04-28 14:48:42 -07002345 if (fl4->flowi4_oif == 0 &&
2346 (ipv4_is_multicast(fl4->daddr) ||
2347 ipv4_is_lbcast(fl4->daddr))) {
Julian Anastasova210d012008-10-01 07:28:28 -07002348 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07002349 dev_out = __ip_dev_find(net, fl4->saddr, false);
Ian Morris51456b22015-04-03 09:17:26 +01002350 if (!dev_out)
Julian Anastasova210d012008-10-01 07:28:28 -07002351 goto out;
2352
Linus Torvalds1da177e2005-04-16 15:20:36 -07002353 /* Special hack: user can direct multicasts
2354 and limited broadcast via necessary interface
2355 without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
2356 This hack is not just for fun, it allows
2357 vic,vat and friends to work.
2358 They bind socket to loopback, set ttl to zero
2359 and expect that it will work.
2360 From the viewpoint of routing cache they are broken,
2361 because we are not allowed to build multicast path
2362 with loopback source addr (look, routing cache
2363 cannot know, that ttl is zero, so that packet
2364 will not leave this host and route is valid).
2365 Luckily, this hack is good workaround.
2366 */
2367
David S. Miller813b3b52011-04-28 14:48:42 -07002368 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002369 goto make_route;
2370 }
Julian Anastasova210d012008-10-01 07:28:28 -07002371
David S. Miller813b3b52011-04-28 14:48:42 -07002372 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
Julian Anastasova210d012008-10-01 07:28:28 -07002373 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07002374 if (!__ip_dev_find(net, fl4->saddr, false))
Julian Anastasova210d012008-10-01 07:28:28 -07002375 goto out;
Julian Anastasova210d012008-10-01 07:28:28 -07002376 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002377 }
2378
2379
David S. Miller813b3b52011-04-28 14:48:42 -07002380 if (fl4->flowi4_oif) {
2381 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002382 rth = ERR_PTR(-ENODEV);
Ian Morris51456b22015-04-03 09:17:26 +01002383 if (!dev_out)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002384 goto out;
Herbert Xue5ed6392005-10-03 14:35:55 -07002385
2386 /* RACE: Check return value of inet_select_addr instead. */
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002387 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08002388 rth = ERR_PTR(-ENETUNREACH);
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002389 goto out;
2390 }
David S. Miller813b3b52011-04-28 14:48:42 -07002391 if (ipv4_is_local_multicast(fl4->daddr) ||
Andrew Lunn6a211652015-05-01 16:39:54 +02002392 ipv4_is_lbcast(fl4->daddr) ||
2393 fl4->flowi4_proto == IPPROTO_IGMP) {
David S. Miller813b3b52011-04-28 14:48:42 -07002394 if (!fl4->saddr)
2395 fl4->saddr = inet_select_addr(dev_out, 0,
2396 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002397 goto make_route;
2398 }
Jiri Benc0a7e2262013-10-04 17:04:48 +02002399 if (!fl4->saddr) {
David S. Miller813b3b52011-04-28 14:48:42 -07002400 if (ipv4_is_multicast(fl4->daddr))
2401 fl4->saddr = inet_select_addr(dev_out, 0,
2402 fl4->flowi4_scope);
2403 else if (!fl4->daddr)
2404 fl4->saddr = inet_select_addr(dev_out, 0,
2405 RT_SCOPE_HOST);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002406 }
2407 }
2408
David S. Miller813b3b52011-04-28 14:48:42 -07002409 if (!fl4->daddr) {
2410 fl4->daddr = fl4->saddr;
2411 if (!fl4->daddr)
2412 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
Denis V. Lunevb40afd02008-01-22 22:06:19 -08002413 dev_out = net->loopback_dev;
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002414 fl4->flowi4_oif = LOOPBACK_IFINDEX;
David Ahern3abd1ade2017-05-25 10:42:33 -07002415 res->type = RTN_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002416 flags |= RTCF_LOCAL;
2417 goto make_route;
2418 }
2419
David Ahern3abd1ade2017-05-25 10:42:33 -07002420 err = fib_lookup(net, fl4, res, 0);
Nikola ForrĂ³0315e382015-09-17 16:01:32 +02002421 if (err) {
David Ahern3abd1ade2017-05-25 10:42:33 -07002422 res->fi = NULL;
2423 res->table = NULL;
David Ahern6104e112016-10-12 13:20:11 -07002424 if (fl4->flowi4_oif &&
David Aherne58e4152016-10-31 15:54:00 -07002425 (ipv4_is_multicast(fl4->daddr) ||
2426 !netif_index_is_l3_master(net, fl4->flowi4_oif))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002427 /* Apparently, routing tables are wrong. Assume,
2428 that the destination is on link.
2429
2430 WHY? DW.
2431 Because we are allowed to send to iface
2432 even if it has NO routes and NO assigned
2433 addresses. When oif is specified, routing
2434 tables are looked up with only one purpose:
2435 to catch if destination is gatewayed, rather than
2436 direct. Moreover, if MSG_DONTROUTE is set,
2437 we send packet, ignoring both routing tables
2438 and ifaddr state. --ANK
2439
2440
2441 We could make it even if oif is unknown,
2442 likely IPv6, but we do not.
2443 */
2444
David S. Miller813b3b52011-04-28 14:48:42 -07002445 if (fl4->saddr == 0)
2446 fl4->saddr = inet_select_addr(dev_out, 0,
2447 RT_SCOPE_LINK);
David Ahern3abd1ade2017-05-25 10:42:33 -07002448 res->type = RTN_UNICAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002449 goto make_route;
2450 }
Nikola ForrĂ³0315e382015-09-17 16:01:32 +02002451 rth = ERR_PTR(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002452 goto out;
2453 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002454
David Ahern3abd1ade2017-05-25 10:42:33 -07002455 if (res->type == RTN_LOCAL) {
David S. Miller813b3b52011-04-28 14:48:42 -07002456 if (!fl4->saddr) {
David Ahern3abd1ade2017-05-25 10:42:33 -07002457 if (res->fi->fib_prefsrc)
2458 fl4->saddr = res->fi->fib_prefsrc;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002459 else
David S. Miller813b3b52011-04-28 14:48:42 -07002460 fl4->saddr = fl4->daddr;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002461 }
David Ahern5f02ce242016-09-10 12:09:54 -07002462
2463 /* L3 master device is the loopback for that domain */
David Ahern3abd1ade2017-05-25 10:42:33 -07002464 dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? :
Robert Shearmanb7c84872017-04-21 21:34:59 +01002465 net->loopback_dev;
David Ahern839da4d2017-08-10 13:49:10 -07002466
2467 /* make sure orig_oif points to fib result device even
2468 * though packet rx/tx happens over loopback or l3mdev
2469 */
2470 orig_oif = FIB_RES_OIF(*res);
2471
David S. Miller813b3b52011-04-28 14:48:42 -07002472 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002473 flags |= RTCF_LOCAL;
2474 goto make_route;
2475 }
2476
David Ahern3abd1ade2017-05-25 10:42:33 -07002477 fib_select_path(net, res, fl4, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002478
David Ahern3abd1ade2017-05-25 10:42:33 -07002479 dev_out = FIB_RES_DEV(*res);
David S. Miller813b3b52011-04-28 14:48:42 -07002480 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002481
2482
2483make_route:
David Ahern3abd1ade2017-05-25 10:42:33 -07002484 rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002485
David S. Miller010c2702011-02-17 15:37:09 -08002486out:
David S. Millerb23dd4f2011-03-02 14:31:35 -08002487 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002488}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002489
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002490static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2491{
2492 return NULL;
2493}
2494
Steffen Klassertebb762f2011-11-23 02:12:51 +00002495static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
Roland Dreierec831ea2011-01-31 13:16:00 -08002496{
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002497 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2498
2499 return mtu ? : dst->dev->mtu;
Roland Dreierec831ea2011-01-31 13:16:00 -08002500}
2501
David S. Miller6700c272012-07-17 03:29:28 -07002502static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2503 struct sk_buff *skb, u32 mtu)
David S. Miller14e50e52007-05-24 18:17:54 -07002504{
2505}
2506
David S. Miller6700c272012-07-17 03:29:28 -07002507static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2508 struct sk_buff *skb)
David S. Millerb587ee32012-07-12 00:39:24 -07002509{
2510}
2511
Held Bernhard0972ddb2011-04-24 22:07:32 +00002512static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2513 unsigned long old)
2514{
2515 return NULL;
2516}
2517
David S. Miller14e50e52007-05-24 18:17:54 -07002518static struct dst_ops ipv4_dst_blackhole_ops = {
2519 .family = AF_INET,
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002520 .check = ipv4_blackhole_dst_check,
Steffen Klassertebb762f2011-11-23 02:12:51 +00002521 .mtu = ipv4_blackhole_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -08002522 .default_advmss = ipv4_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -07002523 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
David S. Millerb587ee32012-07-12 00:39:24 -07002524 .redirect = ipv4_rt_blackhole_redirect,
Held Bernhard0972ddb2011-04-24 22:07:32 +00002525 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -07002526 .neigh_lookup = ipv4_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -07002527};
2528
David S. Miller2774c132011-03-01 14:59:04 -08002529struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -07002530{
David S. Miller2774c132011-03-01 14:59:04 -08002531 struct rtable *ort = (struct rtable *) dst_orig;
David S. Millerf5b0a872012-07-19 12:31:33 -07002532 struct rtable *rt;
David S. Miller14e50e52007-05-24 18:17:54 -07002533
Steffen Klassert6c0e7282017-10-09 08:43:55 +02002534 rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0);
David S. Miller14e50e52007-05-24 18:17:54 -07002535 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002536 struct dst_entry *new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -07002537
David S. Miller14e50e52007-05-24 18:17:54 -07002538 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -08002539 new->input = dst_discard;
Eric W. Biedermanede20592015-10-07 16:48:47 -05002540 new->output = dst_discard_out;
David S. Miller14e50e52007-05-24 18:17:54 -07002541
Wei Wang1dbe32522017-06-17 10:42:26 -07002542 new->dev = net->loopback_dev;
David S. Miller14e50e52007-05-24 18:17:54 -07002543 if (new->dev)
2544 dev_hold(new->dev);
2545
David S. Miller9917e1e82012-07-17 14:44:26 -07002546 rt->rt_is_input = ort->rt_is_input;
David S. Miller5e2b61f2011-03-04 21:47:09 -08002547 rt->rt_iif = ort->rt_iif;
David S. Miller59436342012-07-10 06:58:42 -07002548 rt->rt_pmtu = ort->rt_pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01002549 rt->rt_mtu_locked = ort->rt_mtu_locked;
David S. Miller14e50e52007-05-24 18:17:54 -07002550
fan.duca4c3fc2013-07-30 08:33:53 +08002551 rt->rt_genid = rt_genid_ipv4(net);
David S. Miller14e50e52007-05-24 18:17:54 -07002552 rt->rt_flags = ort->rt_flags;
2553 rt->rt_type = ort->rt_type;
David S. Miller14e50e52007-05-24 18:17:54 -07002554 rt->rt_gateway = ort->rt_gateway;
Julian Anastasov155e8332012-10-08 11:41:18 +00002555 rt->rt_uses_gateway = ort->rt_uses_gateway;
David S. Miller14e50e52007-05-24 18:17:54 -07002556
David S. Millercaacf052012-07-31 15:06:50 -07002557 INIT_LIST_HEAD(&rt->rt_uncached);
David S. Miller14e50e52007-05-24 18:17:54 -07002558 }
2559
David S. Miller2774c132011-03-01 14:59:04 -08002560 dst_release(dst_orig);
2561
2562 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -07002563}
2564
David S. Miller9d6ec932011-03-12 01:12:47 -05002565struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
Eric Dumazet6f9c9612015-09-25 07:39:10 -07002566 const struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002567{
David S. Miller9d6ec932011-03-12 01:12:47 -05002568 struct rtable *rt = __ip_route_output_key(net, flp4);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002569
David S. Millerb23dd4f2011-03-02 14:31:35 -08002570 if (IS_ERR(rt))
2571 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002572
David S. Miller56157872011-05-02 14:37:45 -07002573 if (flp4->flowi4_proto)
Steffen Klassertf92ee612014-09-16 10:08:40 +02002574 rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
2575 flowi4_to_flowi(flp4),
2576 sk, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002577
David S. Millerb23dd4f2011-03-02 14:31:35 -08002578 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002579}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002580EXPORT_SYMBOL_GPL(ip_route_output_flow);
2581
David Ahern3765d352017-05-25 10:42:36 -07002582/* called with rcu_read_lock held */
David Ahernc36ba662015-09-02 13:58:36 -07002583static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id,
Eric W. Biederman15e47302012-09-07 20:12:54 +00002584 struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
Roopa Prabhuba52d612017-05-31 22:53:25 -07002585 u32 seq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002586{
Roopa Prabhuba52d612017-05-31 22:53:25 -07002587 struct rtable *rt = skb_rtable(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002588 struct rtmsg *r;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002589 struct nlmsghdr *nlh;
Steffen Klassert2bc8ca42011-10-11 01:12:02 +00002590 unsigned long expires = 0;
David S. Millerf1850712012-07-10 07:26:01 -07002591 u32 error;
Julian Anastasov521f5492012-07-20 12:02:08 +03002592 u32 metrics[RTAX_MAX];
Thomas Grafbe403ea2006-08-17 18:15:17 -07002593
David Ahernd3166e02017-05-25 10:42:35 -07002594 nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), 0);
Ian Morris51456b22015-04-03 09:17:26 +01002595 if (!nlh)
Patrick McHardy26932562007-01-31 23:16:40 -08002596 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002597
2598 r = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002599 r->rtm_family = AF_INET;
2600 r->rtm_dst_len = 32;
2601 r->rtm_src_len = 0;
David Millerd6c0a4f2012-07-01 02:02:59 +00002602 r->rtm_tos = fl4->flowi4_tos;
David Ahern8a430ed2017-01-11 15:42:17 -08002603 r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT;
David Ahernc36ba662015-09-02 13:58:36 -07002604 if (nla_put_u32(skb, RTA_TABLE, table_id))
David S. Millerf3756b72012-04-01 20:39:02 -04002605 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002606 r->rtm_type = rt->rt_type;
2607 r->rtm_scope = RT_SCOPE_UNIVERSE;
2608 r->rtm_protocol = RTPROT_UNSPEC;
2609 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2610 if (rt->rt_flags & RTCF_NOTIFY)
2611 r->rtm_flags |= RTM_F_NOTIFY;
Hannes Frederic Sowadf4d9252015-01-23 12:01:26 +01002612 if (IPCB(skb)->flags & IPSKB_DOREDIRECT)
2613 r->rtm_flags |= RTCF_DOREDIRECT;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002614
Jiri Benc930345e2015-03-29 16:59:25 +02002615 if (nla_put_in_addr(skb, RTA_DST, dst))
David S. Millerf3756b72012-04-01 20:39:02 -04002616 goto nla_put_failure;
David Miller1a00fee2012-07-01 02:02:56 +00002617 if (src) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002618 r->rtm_src_len = 32;
Jiri Benc930345e2015-03-29 16:59:25 +02002619 if (nla_put_in_addr(skb, RTA_SRC, src))
David S. Millerf3756b72012-04-01 20:39:02 -04002620 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002621 }
David S. Millerf3756b72012-04-01 20:39:02 -04002622 if (rt->dst.dev &&
2623 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2624 goto nla_put_failure;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002625#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf3756b72012-04-01 20:39:02 -04002626 if (rt->dst.tclassid &&
2627 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2628 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002629#endif
David S. Miller41347dc2012-06-28 04:05:27 -07002630 if (!rt_is_input_route(rt) &&
David Millerd6c0a4f2012-07-01 02:02:59 +00002631 fl4->saddr != src) {
Jiri Benc930345e2015-03-29 16:59:25 +02002632 if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
David S. Millerf3756b72012-04-01 20:39:02 -04002633 goto nla_put_failure;
2634 }
Julian Anastasov155e8332012-10-08 11:41:18 +00002635 if (rt->rt_uses_gateway &&
Jiri Benc930345e2015-03-29 16:59:25 +02002636 nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gateway))
David S. Millerf3756b72012-04-01 20:39:02 -04002637 goto nla_put_failure;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002638
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002639 expires = rt->dst.expires;
2640 if (expires) {
2641 unsigned long now = jiffies;
2642
2643 if (time_before(now, expires))
2644 expires -= now;
2645 else
2646 expires = 0;
2647 }
2648
Julian Anastasov521f5492012-07-20 12:02:08 +03002649 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002650 if (rt->rt_pmtu && expires)
Julian Anastasov521f5492012-07-20 12:02:08 +03002651 metrics[RTAX_MTU - 1] = rt->rt_pmtu;
Sabrina Dubrocad52e5a72018-03-14 10:21:14 +01002652 if (rt->rt_mtu_locked && expires)
2653 metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU);
Julian Anastasov521f5492012-07-20 12:02:08 +03002654 if (rtnetlink_put_metrics(skb, metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -07002655 goto nla_put_failure;
2656
David Millerb4869882012-07-01 02:03:01 +00002657 if (fl4->flowi4_mark &&
stephen hemminger68aaed52012-10-10 08:27:25 +00002658 nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
David S. Millerf3756b72012-04-01 20:39:02 -04002659 goto nla_put_failure;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002660
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002661 if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
2662 nla_put_u32(skb, RTA_UID,
2663 from_kuid_munged(current_user_ns(), fl4->flowi4_uid)))
2664 goto nla_put_failure;
2665
Changli Gaod8d1f302010-06-10 23:31:35 -07002666 error = rt->dst.error;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002667
David S. Millerc7537962010-11-11 17:07:48 -08002668 if (rt_is_input_route(rt)) {
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002669#ifdef CONFIG_IP_MROUTE
2670 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2671 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2672 int err = ipmr_get_route(net, skb,
2673 fl4->saddr, fl4->daddr,
David Ahern9f09eae2017-01-06 17:39:06 -08002674 r, portid);
Nikolay Aleksandrov2cf75072016-09-25 23:08:31 +02002675
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002676 if (err <= 0) {
David Ahern0c8d8032017-01-05 19:32:46 -08002677 if (err == 0)
2678 return 0;
2679 goto nla_put_failure;
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002680 }
2681 } else
2682#endif
Julian Anastasov91146152014-04-13 18:08:02 +03002683 if (nla_put_u32(skb, RTA_IIF, skb->dev->ifindex))
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002684 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002685 }
2686
David S. Millerf1850712012-07-10 07:26:01 -07002687 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002688 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002689
Johannes Berg053c0952015-01-16 22:09:00 +01002690 nlmsg_end(skb, nlh);
2691 return 0;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002692
2693nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002694 nlmsg_cancel(skb, nlh);
2695 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002696}
2697
David Ahernc21ef3e2017-04-16 09:48:24 -07002698static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2699 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002700{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002701 struct net *net = sock_net(in_skb->sk);
Thomas Grafd889ce32006-08-17 18:15:44 -07002702 struct rtmsg *rtm;
2703 struct nlattr *tb[RTA_MAX+1];
David Ahern3765d352017-05-25 10:42:36 -07002704 struct fib_result res = {};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002705 struct rtable *rt = NULL;
David Millerd6c0a4f2012-07-01 02:02:59 +00002706 struct flowi4 fl4;
Al Viro9e12bb22006-09-26 21:25:20 -07002707 __be32 dst = 0;
2708 __be32 src = 0;
2709 u32 iif;
Thomas Grafd889ce32006-08-17 18:15:44 -07002710 int err;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002711 int mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002712 struct sk_buff *skb;
David Ahernc36ba662015-09-02 13:58:36 -07002713 u32 table_id = RT_TABLE_MAIN;
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002714 kuid_t uid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002715
Johannes Bergfceb6432017-04-12 14:34:07 +02002716 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy,
David Ahernc21ef3e2017-04-16 09:48:24 -07002717 extack);
Thomas Grafd889ce32006-08-17 18:15:44 -07002718 if (err < 0)
2719 goto errout;
2720
2721 rtm = nlmsg_data(nlh);
2722
Linus Torvalds1da177e2005-04-16 15:20:36 -07002723 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Ian Morris51456b22015-04-03 09:17:26 +01002724 if (!skb) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002725 err = -ENOBUFS;
2726 goto errout;
2727 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002728
2729 /* Reserve room for dummy headers, this skb can pass
2730 through good chunk of routing engine.
2731 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002732 skb_reset_mac_header(skb);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07002733 skb_reset_network_header(skb);
Stephen Hemmingerd2c962b2006-04-17 17:27:11 -07002734
Jiri Benc67b61f62015-03-29 16:59:26 +02002735 src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
2736 dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
Thomas Grafd889ce32006-08-17 18:15:44 -07002737 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002738 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002739 if (tb[RTA_UID])
2740 uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID]));
2741 else
2742 uid = (iif ? INVALID_UID : current_uid());
Linus Torvalds1da177e2005-04-16 15:20:36 -07002743
Florian Laryschbbadb9a2017-04-07 14:42:20 +02002744 /* Bugfix: need to give ip_route_input enough of an IP header to
2745 * not gag.
2746 */
2747 ip_hdr(skb)->protocol = IPPROTO_UDP;
2748 ip_hdr(skb)->saddr = src;
2749 ip_hdr(skb)->daddr = dst;
2750
2751 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2752
David Millerd6c0a4f2012-07-01 02:02:59 +00002753 memset(&fl4, 0, sizeof(fl4));
2754 fl4.daddr = dst;
2755 fl4.saddr = src;
2756 fl4.flowi4_tos = rtm->rtm_tos;
2757 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
2758 fl4.flowi4_mark = mark;
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002759 fl4.flowi4_uid = uid;
David Millerd6c0a4f2012-07-01 02:02:59 +00002760
David Ahern3765d352017-05-25 10:42:36 -07002761 rcu_read_lock();
2762
Linus Torvalds1da177e2005-04-16 15:20:36 -07002763 if (iif) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002764 struct net_device *dev;
2765
David Ahern3765d352017-05-25 10:42:36 -07002766 dev = dev_get_by_index_rcu(net, iif);
Ian Morris51456b22015-04-03 09:17:26 +01002767 if (!dev) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002768 err = -ENODEV;
2769 goto errout_free;
2770 }
2771
Linus Torvalds1da177e2005-04-16 15:20:36 -07002772 skb->protocol = htons(ETH_P_IP);
2773 skb->dev = dev;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002774 skb->mark = mark;
David Ahern3765d352017-05-25 10:42:36 -07002775 err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos,
2776 dev, &res);
Thomas Grafd889ce32006-08-17 18:15:44 -07002777
Eric Dumazet511c3f92009-06-02 05:14:27 +00002778 rt = skb_rtable(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07002779 if (err == 0 && rt->dst.error)
2780 err = -rt->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002781 } else {
Lorenzo Colitti6503a302018-01-11 18:36:26 +09002782 fl4.flowi4_iif = LOOPBACK_IFINDEX;
David Ahern3765d352017-05-25 10:42:36 -07002783 rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002784 err = 0;
2785 if (IS_ERR(rt))
2786 err = PTR_ERR(rt);
Florian Westphal2c87d632017-08-14 00:52:58 +02002787 else
2788 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002789 }
Thomas Grafd889ce32006-08-17 18:15:44 -07002790
Linus Torvalds1da177e2005-04-16 15:20:36 -07002791 if (err)
Thomas Grafd889ce32006-08-17 18:15:44 -07002792 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002793
Linus Torvalds1da177e2005-04-16 15:20:36 -07002794 if (rtm->rtm_flags & RTM_F_NOTIFY)
2795 rt->rt_flags |= RTCF_NOTIFY;
2796
David Ahernc36ba662015-09-02 13:58:36 -07002797 if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
David Ahern68e813a2018-02-14 14:24:28 -08002798 table_id = res.table ? res.table->tb_id : 0;
David Ahernc36ba662015-09-02 13:58:36 -07002799
Roopa Prabhubc3aae22017-08-16 12:38:52 -07002800 if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
2801 if (!res.fi) {
2802 err = fib_props[res.type].error;
2803 if (!err)
2804 err = -EHOSTUNREACH;
2805 goto errout_free;
2806 }
Roopa Prabhub6179812017-05-25 10:42:39 -07002807 err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
2808 nlh->nlmsg_seq, RTM_NEWROUTE, table_id,
2809 rt->rt_type, res.prefix, res.prefixlen,
2810 fl4.flowi4_tos, res.fi, 0);
Roopa Prabhubc3aae22017-08-16 12:38:52 -07002811 } else {
Roopa Prabhub6179812017-05-25 10:42:39 -07002812 err = rt_fill_info(net, dst, src, table_id, &fl4, skb,
Roopa Prabhuba52d612017-05-31 22:53:25 -07002813 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq);
Roopa Prabhubc3aae22017-08-16 12:38:52 -07002814 }
David S. Miller7b46a642015-01-18 23:36:08 -05002815 if (err < 0)
Thomas Grafd889ce32006-08-17 18:15:44 -07002816 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002817
David Ahern3765d352017-05-25 10:42:36 -07002818 rcu_read_unlock();
2819
Eric W. Biederman15e47302012-09-07 20:12:54 +00002820 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
Thomas Grafd889ce32006-08-17 18:15:44 -07002821errout:
Thomas Graf2942e902006-08-15 00:30:25 -07002822 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002823
Thomas Grafd889ce32006-08-17 18:15:44 -07002824errout_free:
David Ahern3765d352017-05-25 10:42:36 -07002825 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002826 kfree_skb(skb);
Thomas Grafd889ce32006-08-17 18:15:44 -07002827 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002828}
2829
Linus Torvalds1da177e2005-04-16 15:20:36 -07002830void ip_rt_multicast_event(struct in_device *in_dev)
2831{
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00002832 rt_cache_flush(dev_net(in_dev->dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002833}
2834
2835#ifdef CONFIG_SYSCTL
Gao feng082c7ca2013-02-19 00:43:12 +00002836static int ip_rt_gc_interval __read_mostly = 60 * HZ;
2837static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
2838static int ip_rt_gc_elasticity __read_mostly = 8;
Arnd Bergmann773daa32018-02-28 14:32:48 +01002839static int ip_min_valid_pmtu __read_mostly = IPV4_MIN_MTU;
Gao feng082c7ca2013-02-19 00:43:12 +00002840
Joe Perchesfe2c6332013-06-11 23:04:25 -07002841static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002842 void __user *buffer,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002843 size_t *lenp, loff_t *ppos)
2844{
Timo Teräs5aad1de2013-05-27 20:46:33 +00002845 struct net *net = (struct net *)__ctl->extra1;
2846
Linus Torvalds1da177e2005-04-16 15:20:36 -07002847 if (write) {
Timo Teräs5aad1de2013-05-27 20:46:33 +00002848 rt_cache_flush(net);
2849 fnhe_genid_bump(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002850 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002851 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002852
2853 return -EINVAL;
2854}
2855
Joe Perchesfe2c6332013-06-11 23:04:25 -07002856static struct ctl_table ipv4_route_table[] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002857 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002858 .procname = "gc_thresh",
2859 .data = &ipv4_dst_ops.gc_thresh,
2860 .maxlen = sizeof(int),
2861 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002862 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002863 },
2864 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002865 .procname = "max_size",
2866 .data = &ip_rt_max_size,
2867 .maxlen = sizeof(int),
2868 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002869 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002870 },
2871 {
2872 /* Deprecated. Use gc_min_interval_ms */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002873
Linus Torvalds1da177e2005-04-16 15:20:36 -07002874 .procname = "gc_min_interval",
2875 .data = &ip_rt_gc_min_interval,
2876 .maxlen = sizeof(int),
2877 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002878 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002879 },
2880 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002881 .procname = "gc_min_interval_ms",
2882 .data = &ip_rt_gc_min_interval,
2883 .maxlen = sizeof(int),
2884 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002885 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002886 },
2887 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002888 .procname = "gc_timeout",
2889 .data = &ip_rt_gc_timeout,
2890 .maxlen = sizeof(int),
2891 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002892 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002893 },
2894 {
Eric Dumazet9f28a2f2011-12-21 15:47:16 -05002895 .procname = "gc_interval",
2896 .data = &ip_rt_gc_interval,
2897 .maxlen = sizeof(int),
2898 .mode = 0644,
2899 .proc_handler = proc_dointvec_jiffies,
2900 },
2901 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002902 .procname = "redirect_load",
2903 .data = &ip_rt_redirect_load,
2904 .maxlen = sizeof(int),
2905 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002906 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002907 },
2908 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002909 .procname = "redirect_number",
2910 .data = &ip_rt_redirect_number,
2911 .maxlen = sizeof(int),
2912 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002913 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002914 },
2915 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002916 .procname = "redirect_silence",
2917 .data = &ip_rt_redirect_silence,
2918 .maxlen = sizeof(int),
2919 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002920 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002921 },
2922 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002923 .procname = "error_cost",
2924 .data = &ip_rt_error_cost,
2925 .maxlen = sizeof(int),
2926 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002927 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002928 },
2929 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002930 .procname = "error_burst",
2931 .data = &ip_rt_error_burst,
2932 .maxlen = sizeof(int),
2933 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002934 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002935 },
2936 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002937 .procname = "gc_elasticity",
2938 .data = &ip_rt_gc_elasticity,
2939 .maxlen = sizeof(int),
2940 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002941 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002942 },
2943 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002944 .procname = "mtu_expires",
2945 .data = &ip_rt_mtu_expires,
2946 .maxlen = sizeof(int),
2947 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002948 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002949 },
2950 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002951 .procname = "min_pmtu",
2952 .data = &ip_rt_min_pmtu,
2953 .maxlen = sizeof(int),
2954 .mode = 0644,
Sabrina Dubrocac7272c22018-02-26 16:13:43 +01002955 .proc_handler = proc_dointvec_minmax,
2956 .extra1 = &ip_min_valid_pmtu,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002957 },
2958 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002959 .procname = "min_adv_mss",
2960 .data = &ip_rt_min_advmss,
2961 .maxlen = sizeof(int),
2962 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002963 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002964 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002965 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002966};
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002967
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002968static struct ctl_table ipv4_route_flush_table[] = {
2969 {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002970 .procname = "flush",
2971 .maxlen = sizeof(int),
2972 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002973 .proc_handler = ipv4_sysctl_rtcache_flush,
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002974 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002975 { },
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002976};
2977
2978static __net_init int sysctl_route_net_init(struct net *net)
2979{
2980 struct ctl_table *tbl;
2981
2982 tbl = ipv4_route_flush_table;
Octavian Purdila09ad9bc2009-11-25 15:14:13 -08002983 if (!net_eq(net, &init_net)) {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002984 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
Ian Morris51456b22015-04-03 09:17:26 +01002985 if (!tbl)
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002986 goto err_dup;
Eric W. Biederman464dc802012-11-16 03:02:59 +00002987
2988 /* Don't export sysctls to unprivileged users */
2989 if (net->user_ns != &init_user_ns)
2990 tbl[0].procname = NULL;
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002991 }
2992 tbl[0].extra1 = net;
2993
Eric W. Biedermanec8f23c2012-04-19 13:44:49 +00002994 net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
Ian Morris51456b22015-04-03 09:17:26 +01002995 if (!net->ipv4.route_hdr)
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002996 goto err_reg;
2997 return 0;
2998
2999err_reg:
3000 if (tbl != ipv4_route_flush_table)
3001 kfree(tbl);
3002err_dup:
3003 return -ENOMEM;
3004}
3005
3006static __net_exit void sysctl_route_net_exit(struct net *net)
3007{
3008 struct ctl_table *tbl;
3009
3010 tbl = net->ipv4.route_hdr->ctl_table_arg;
3011 unregister_net_sysctl_table(net->ipv4.route_hdr);
3012 BUG_ON(tbl == ipv4_route_flush_table);
3013 kfree(tbl);
3014}
3015
3016static __net_initdata struct pernet_operations sysctl_route_ops = {
3017 .init = sysctl_route_net_init,
3018 .exit = sysctl_route_net_exit,
3019};
Linus Torvalds1da177e2005-04-16 15:20:36 -07003020#endif
3021
Neil Horman3ee94372010-05-08 01:57:52 -07003022static __net_init int rt_genid_init(struct net *net)
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003023{
fan.duca4c3fc2013-07-30 08:33:53 +08003024 atomic_set(&net->ipv4.rt_genid, 0);
Timo Teräs5aad1de2013-05-27 20:46:33 +00003025 atomic_set(&net->fnhe_genid, 0);
Jason A. Donenfeld7aed9f72017-06-07 23:01:20 -04003026 atomic_set(&net->ipv4.dev_addr_genid, get_random_int());
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003027 return 0;
3028}
3029
Neil Horman3ee94372010-05-08 01:57:52 -07003030static __net_initdata struct pernet_operations rt_genid_ops = {
3031 .init = rt_genid_init,
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003032};
3033
David S. Millerc3426b42012-06-09 16:27:05 -07003034static int __net_init ipv4_inetpeer_init(struct net *net)
3035{
3036 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3037
3038 if (!bp)
3039 return -ENOMEM;
3040 inet_peer_base_init(bp);
3041 net->ipv4.peers = bp;
3042 return 0;
3043}
3044
3045static void __net_exit ipv4_inetpeer_exit(struct net *net)
3046{
3047 struct inet_peer_base *bp = net->ipv4.peers;
3048
3049 net->ipv4.peers = NULL;
David S. Miller56a6b242012-06-09 16:32:41 -07003050 inetpeer_invalidate_tree(bp);
David S. Millerc3426b42012-06-09 16:27:05 -07003051 kfree(bp);
3052}
3053
3054static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
3055 .init = ipv4_inetpeer_init,
3056 .exit = ipv4_inetpeer_exit,
3057};
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003058
Patrick McHardyc7066f72011-01-14 13:36:42 +01003059#ifdef CONFIG_IP_ROUTE_CLASSID
Tejun Heo7d720c32010-02-16 15:20:26 +00003060struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
Patrick McHardyc7066f72011-01-14 13:36:42 +01003061#endif /* CONFIG_IP_ROUTE_CLASSID */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003062
Linus Torvalds1da177e2005-04-16 15:20:36 -07003063int __init ip_rt_init(void)
3064{
Eric Dumazet5055c372015-01-14 15:17:06 -08003065 int cpu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003066
Eric Dumazet73f156a2014-06-02 05:26:03 -07003067 ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
3068 if (!ip_idents)
3069 panic("IP: failed to allocate ip_idents\n");
3070
3071 prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
3072
Eric Dumazet355b590c2015-05-01 10:37:49 -07003073 ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL);
3074 if (!ip_tstamps)
3075 panic("IP: failed to allocate ip_tstamps\n");
3076
Eric Dumazet5055c372015-01-14 15:17:06 -08003077 for_each_possible_cpu(cpu) {
3078 struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
3079
3080 INIT_LIST_HEAD(&ul->head);
3081 spin_lock_init(&ul->lock);
3082 }
Patrick McHardyc7066f72011-01-14 13:36:42 +01003083#ifdef CONFIG_IP_ROUTE_CLASSID
Ingo Molnar0dcec8c2009-02-25 14:07:33 +01003084 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
Linus Torvalds1da177e2005-04-16 15:20:36 -07003085 if (!ip_rt_acct)
3086 panic("IP: failed to allocate ip_rt_acct\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003087#endif
3088
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07003089 ipv4_dst_ops.kmem_cachep =
3090 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09003091 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003092
David S. Miller14e50e52007-05-24 18:17:54 -07003093 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
3094
Eric Dumazetfc66f952010-10-08 06:37:34 +00003095 if (dst_entries_init(&ipv4_dst_ops) < 0)
3096 panic("IP: failed to allocate ipv4_dst_ops counter\n");
3097
3098 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
3099 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
3100
David S. Miller89aef892012-07-17 11:00:09 -07003101 ipv4_dst_ops.gc_thresh = ~0;
3102 ip_rt_max_size = INT_MAX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003103
Linus Torvalds1da177e2005-04-16 15:20:36 -07003104 devinet_init();
3105 ip_fib_init();
3106
Denis V. Lunev73b38712008-02-28 20:51:18 -08003107 if (ip_rt_proc_init())
Joe Perches058bd4d2012-03-11 18:36:11 +00003108 pr_err("Unable to create route proc files\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003109#ifdef CONFIG_XFRM
3110 xfrm_init();
Steffen Klassert703fb942012-11-13 08:52:24 +01003111 xfrm4_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003112#endif
Florian Westphal394f51a2017-08-15 16:34:44 +02003113 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL,
3114 RTNL_FLAG_DOIT_UNLOCKED);
Thomas Graf63f34442007-03-22 11:55:17 -07003115
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003116#ifdef CONFIG_SYSCTL
3117 register_pernet_subsys(&sysctl_route_ops);
3118#endif
Neil Horman3ee94372010-05-08 01:57:52 -07003119 register_pernet_subsys(&rt_genid_ops);
David S. Millerc3426b42012-06-09 16:27:05 -07003120 register_pernet_subsys(&ipv4_inetpeer_ops);
Tim Hansen1bcdca32017-10-04 15:59:49 -04003121 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003122}
3123
Al Viroa1bc6eb2008-07-30 06:32:52 -04003124#ifdef CONFIG_SYSCTL
Al Viroeeb61f72008-07-27 08:59:33 +01003125/*
3126 * We really need to sanitize the damn ipv4 init order, then all
3127 * this nonsense will go away.
3128 */
3129void __init ip_static_sysctl_init(void)
3130{
Eric W. Biederman4e5ca782012-04-19 13:32:39 +00003131 register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
Al Viroeeb61f72008-07-27 08:59:33 +01003132}
Al Viroa1bc6eb2008-07-30 06:32:52 -04003133#endif