blob: 49cc1c1df1bac0c9046f8d34db02c4bef18079e4 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * ROUTE - implementation of the IP router.
7 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07008 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
12 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 *
14 * Fixes:
15 * Alan Cox : Verify area fixes.
16 * Alan Cox : cli() protects routing changes
17 * Rui Oliveira : ICMP routing table updates
18 * (rco@di.uminho.pt) Routing table insertion and update
19 * Linus Torvalds : Rewrote bits to be sensible
20 * Alan Cox : Added BSD route gw semantics
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090021 * Alan Cox : Super /proc >4K
Linus Torvalds1da177e2005-04-16 15:20:36 -070022 * Alan Cox : MTU in route table
23 * Alan Cox : MSS actually. Also added the window
24 * clamper.
25 * Sam Lantinga : Fixed route matching in rt_del()
26 * Alan Cox : Routing cache support.
27 * Alan Cox : Removed compatibility cruft.
28 * Alan Cox : RTF_REJECT support.
29 * Alan Cox : TCP irtt support.
30 * Jonathan Naylor : Added Metric support.
31 * Miquel van Smoorenburg : BSD API fixes.
32 * Miquel van Smoorenburg : Metrics.
33 * Alan Cox : Use __u32 properly
34 * Alan Cox : Aligned routing errors more closely with BSD
35 * our system is still very different.
36 * Alan Cox : Faster /proc handling
37 * Alexey Kuznetsov : Massive rework to support tree based routing,
38 * routing caches and better behaviour.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090039 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 * Olaf Erb : irtt wasn't being copied right.
41 * Bjorn Ekwall : Kerneld route support.
42 * Alan Cox : Multicast fixed (I hope)
43 * Pavel Krauz : Limited broadcast fixed
44 * Mike McLagan : Routing by source
45 * Alexey Kuznetsov : End of old history. Split to fib.c and
46 * route.c and rewritten from scratch.
47 * Andi Kleen : Load-limit warning messages.
48 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
49 * Vitaly E. Lavrov : Race condition in ip_route_input_slow.
50 * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow.
51 * Vladimir V. Ivanov : IP rule info (flowid) is really useful.
52 * Marc Boucher : routing by fwmark
53 * Robert Olsson : Added rt_cache statistics
54 * Arnaldo C. Melo : Convert proc stuff to seq_file
Eric Dumazetbb1d23b2005-07-05 15:00:32 -070055 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
Ilia Sotnikovcef26852006-03-25 01:38:55 -080056 * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect
57 * Ilia Sotnikov : Removed TOS from hash calculations
Linus Torvalds1da177e2005-04-16 15:20:36 -070058 *
59 * This program is free software; you can redistribute it and/or
60 * modify it under the terms of the GNU General Public License
61 * as published by the Free Software Foundation; either version
62 * 2 of the License, or (at your option) any later version.
63 */
64
Joe Perchesafd465032012-03-12 07:03:32 +000065#define pr_fmt(fmt) "IPv4: " fmt
66
Linus Torvalds1da177e2005-04-16 15:20:36 -070067#include <linux/module.h>
Linus Torvalds7c0f6ba2016-12-24 11:46:01 -080068#include <linux/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070069#include <linux/bitops.h>
70#include <linux/types.h>
71#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070072#include <linux/mm.h>
73#include <linux/string.h>
74#include <linux/socket.h>
75#include <linux/sockios.h>
76#include <linux/errno.h>
77#include <linux/in.h>
78#include <linux/inet.h>
79#include <linux/netdevice.h>
80#include <linux/proc_fs.h>
81#include <linux/init.h>
82#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070083#include <linux/inetdevice.h>
84#include <linux/igmp.h>
85#include <linux/pkt_sched.h>
86#include <linux/mroute.h>
87#include <linux/netfilter_ipv4.h>
88#include <linux/random.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070089#include <linux/rcupdate.h>
90#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090091#include <linux/slab.h>
Eric Dumazet73f156a2014-06-02 05:26:03 -070092#include <linux/jhash.h>
Herbert Xu352e5122007-11-13 21:34:06 -080093#include <net/dst.h>
Thomas Graf1b7179d2015-07-21 10:43:59 +020094#include <net/dst_metadata.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020095#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070096#include <net/protocol.h>
97#include <net/ip.h>
98#include <net/route.h>
99#include <net/inetpeer.h>
100#include <net/sock.h>
101#include <net/ip_fib.h>
102#include <net/arp.h>
103#include <net/tcp.h>
104#include <net/icmp.h>
105#include <net/xfrm.h>
Roopa Prabhu571e7222015-07-21 10:43:47 +0200106#include <net/lwtunnel.h>
Tom Tucker8d717402006-07-30 20:43:36 -0700107#include <net/netevent.h>
Thomas Graf63f34442007-03-22 11:55:17 -0700108#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109#ifdef CONFIG_SYSCTL
110#include <linux/sysctl.h>
Shan Wei7426a562012-04-18 18:05:46 +0000111#include <linux/kmemleak.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112#endif
David S. Miller6e5714e2011-08-03 20:50:44 -0700113#include <net/secure_seq.h>
Thomas Graf1b7179d2015-07-21 10:43:59 +0200114#include <net/ip_tunnels.h>
David Ahern385add92015-09-29 20:07:13 -0700115#include <net/l3mdev.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116
Roopa Prabhub6179812017-05-25 10:42:39 -0700117#include "fib_lookup.h"
118
David S. Miller68a5e3d2011-03-11 20:07:33 -0500119#define RT_FL_TOS(oldflp4) \
Julian Anastasovf61759e2011-12-02 11:39:42 +0000120 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700121
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122#define RT_GC_TIMEOUT (300*HZ)
123
Linus Torvalds1da177e2005-04-16 15:20:36 -0700124static int ip_rt_max_size;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700125static int ip_rt_redirect_number __read_mostly = 9;
126static int ip_rt_redirect_load __read_mostly = HZ / 50;
127static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
128static int ip_rt_error_cost __read_mostly = HZ;
129static int ip_rt_error_burst __read_mostly = 5 * HZ;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700130static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
131static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
132static int ip_rt_min_advmss __read_mostly = 256;
Eric Dumazet9f28a2f2011-12-21 15:47:16 -0500133
Xin Longdeed49d2016-02-18 21:21:19 +0800134static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135/*
136 * Interface to generic destination cache.
137 */
138
139static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -0800140static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
Steffen Klassertebb762f2011-11-23 02:12:51 +0000141static unsigned int ipv4_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
143static void ipv4_link_failure(struct sk_buff *skb);
David S. Miller6700c272012-07-17 03:29:28 -0700144static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
145 struct sk_buff *skb, u32 mtu);
146static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
147 struct sk_buff *skb);
David S. Millercaacf052012-07-31 15:06:50 -0700148static void ipv4_dst_destroy(struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700149
David S. Miller62fa8a82011-01-26 20:51:05 -0800150static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
151{
David S. Miller31248732012-07-10 07:08:18 -0700152 WARN_ON(1);
153 return NULL;
David S. Miller62fa8a82011-01-26 20:51:05 -0800154}
155
David S. Millerf894cbf2012-07-02 21:52:24 -0700156static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
157 struct sk_buff *skb,
158 const void *daddr);
Julian Anastasov63fca652017-02-06 23:14:15 +0200159static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700160
Linus Torvalds1da177e2005-04-16 15:20:36 -0700161static struct dst_ops ipv4_dst_ops = {
162 .family = AF_INET,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163 .check = ipv4_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800164 .default_advmss = ipv4_default_advmss,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000165 .mtu = ipv4_mtu,
David S. Miller62fa8a82011-01-26 20:51:05 -0800166 .cow_metrics = ipv4_cow_metrics,
David S. Millercaacf052012-07-31 15:06:50 -0700167 .destroy = ipv4_dst_destroy,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168 .negative_advice = ipv4_negative_advice,
169 .link_failure = ipv4_link_failure,
170 .update_pmtu = ip_rt_update_pmtu,
David S. Millere47a1852012-07-11 20:55:47 -0700171 .redirect = ip_do_redirect,
Eric W. Biedermanb92dacd2015-10-07 16:48:37 -0500172 .local_out = __ip_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700173 .neigh_lookup = ipv4_neigh_lookup,
Julian Anastasov63fca652017-02-06 23:14:15 +0200174 .confirm_neigh = ipv4_confirm_neigh,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175};
176
177#define ECN_OR_COST(class) TC_PRIO_##class
178
Philippe De Muyter4839c522007-07-09 15:32:57 -0700179const __u8 ip_tos2prio[16] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180 TC_PRIO_BESTEFFORT,
Dan Siemon4a2b9c32011-03-15 13:56:07 +0000181 ECN_OR_COST(BESTEFFORT),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182 TC_PRIO_BESTEFFORT,
183 ECN_OR_COST(BESTEFFORT),
184 TC_PRIO_BULK,
185 ECN_OR_COST(BULK),
186 TC_PRIO_BULK,
187 ECN_OR_COST(BULK),
188 TC_PRIO_INTERACTIVE,
189 ECN_OR_COST(INTERACTIVE),
190 TC_PRIO_INTERACTIVE,
191 ECN_OR_COST(INTERACTIVE),
192 TC_PRIO_INTERACTIVE_BULK,
193 ECN_OR_COST(INTERACTIVE_BULK),
194 TC_PRIO_INTERACTIVE_BULK,
195 ECN_OR_COST(INTERACTIVE_BULK)
196};
Amir Vadaid4a96862012-04-04 21:33:28 +0000197EXPORT_SYMBOL(ip_tos2prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700198
Eric Dumazet2f970d82006-01-17 02:54:36 -0800199static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
Christoph Lameter3ed66e92014-04-07 15:39:40 -0700200#define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
204{
Eric Dumazet29e75252008-01-31 17:05:09 -0800205 if (*pos)
David S. Miller89aef892012-07-17 11:00:09 -0700206 return NULL;
Eric Dumazet29e75252008-01-31 17:05:09 -0800207 return SEQ_START_TOKEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208}
209
210static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
211{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212 ++*pos;
David S. Miller89aef892012-07-17 11:00:09 -0700213 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214}
215
216static void rt_cache_seq_stop(struct seq_file *seq, void *v)
217{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218}
219
220static int rt_cache_seq_show(struct seq_file *seq, void *v)
221{
222 if (v == SEQ_START_TOKEN)
223 seq_printf(seq, "%-127s\n",
224 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
225 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
226 "HHUptod\tSpecDst");
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900227 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700228}
229
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700230static const struct seq_operations rt_cache_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231 .start = rt_cache_seq_start,
232 .next = rt_cache_seq_next,
233 .stop = rt_cache_seq_stop,
234 .show = rt_cache_seq_show,
235};
236
237static int rt_cache_seq_open(struct inode *inode, struct file *file)
238{
David S. Miller89aef892012-07-17 11:00:09 -0700239 return seq_open(file, &rt_cache_seq_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240}
241
Arjan van de Ven9a321442007-02-12 00:55:35 -0800242static const struct file_operations rt_cache_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243 .open = rt_cache_seq_open,
244 .read = seq_read,
245 .llseek = seq_lseek,
David S. Miller89aef892012-07-17 11:00:09 -0700246 .release = seq_release,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247};
248
249
250static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
251{
252 int cpu;
253
254 if (*pos == 0)
255 return SEQ_START_TOKEN;
256
Rusty Russell0f23174a2008-12-29 12:23:42 +0000257 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 if (!cpu_possible(cpu))
259 continue;
260 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800261 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262 }
263 return NULL;
264}
265
266static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
267{
268 int cpu;
269
Rusty Russell0f23174a2008-12-29 12:23:42 +0000270 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 if (!cpu_possible(cpu))
272 continue;
273 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800274 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 }
276 return NULL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900277
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278}
279
280static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
281{
282
283}
284
285static int rt_cpu_seq_show(struct seq_file *seq, void *v)
286{
287 struct rt_cache_stat *st = v;
288
289 if (v == SEQ_START_TOKEN) {
Olaf Rempel5bec0032005-04-28 12:16:08 -0700290 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291 return 0;
292 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900293
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
295 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
Eric Dumazetfc66f952010-10-08 06:37:34 +0000296 dst_entries_get_slow(&ipv4_dst_ops),
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700297 0, /* st->in_hit */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700298 st->in_slow_tot,
299 st->in_slow_mc,
300 st->in_no_route,
301 st->in_brd,
302 st->in_martian_dst,
303 st->in_martian_src,
304
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700305 0, /* st->out_hit */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 st->out_slow_tot,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900307 st->out_slow_mc,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700309 0, /* st->gc_total */
310 0, /* st->gc_ignored */
311 0, /* st->gc_goal_miss */
312 0, /* st->gc_dst_overflow */
313 0, /* st->in_hlist_search */
314 0 /* st->out_hlist_search */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315 );
316 return 0;
317}
318
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700319static const struct seq_operations rt_cpu_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320 .start = rt_cpu_seq_start,
321 .next = rt_cpu_seq_next,
322 .stop = rt_cpu_seq_stop,
323 .show = rt_cpu_seq_show,
324};
325
326
327static int rt_cpu_seq_open(struct inode *inode, struct file *file)
328{
329 return seq_open(file, &rt_cpu_seq_ops);
330}
331
Arjan van de Ven9a321442007-02-12 00:55:35 -0800332static const struct file_operations rt_cpu_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333 .open = rt_cpu_seq_open,
334 .read = seq_read,
335 .llseek = seq_lseek,
336 .release = seq_release,
337};
338
Patrick McHardyc7066f72011-01-14 13:36:42 +0100339#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800340static int rt_acct_proc_show(struct seq_file *m, void *v)
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800341{
Alexey Dobriyana661c412009-11-25 15:40:35 -0800342 struct ip_rt_acct *dst, *src;
343 unsigned int i, j;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800344
Alexey Dobriyana661c412009-11-25 15:40:35 -0800345 dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
346 if (!dst)
347 return -ENOMEM;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800348
Alexey Dobriyana661c412009-11-25 15:40:35 -0800349 for_each_possible_cpu(i) {
350 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
351 for (j = 0; j < 256; j++) {
352 dst[j].o_bytes += src[j].o_bytes;
353 dst[j].o_packets += src[j].o_packets;
354 dst[j].i_bytes += src[j].i_bytes;
355 dst[j].i_packets += src[j].i_packets;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800356 }
357 }
Alexey Dobriyana661c412009-11-25 15:40:35 -0800358
359 seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
360 kfree(dst);
361 return 0;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800362}
Alexey Dobriyana661c412009-11-25 15:40:35 -0800363
364static int rt_acct_proc_open(struct inode *inode, struct file *file)
365{
366 return single_open(file, rt_acct_proc_show, NULL);
367}
368
369static const struct file_operations rt_acct_proc_fops = {
Alexey Dobriyana661c412009-11-25 15:40:35 -0800370 .open = rt_acct_proc_open,
371 .read = seq_read,
372 .llseek = seq_lseek,
373 .release = single_release,
374};
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800375#endif
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800376
Denis V. Lunev73b38712008-02-28 20:51:18 -0800377static int __net_init ip_rt_do_proc_init(struct net *net)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800378{
379 struct proc_dir_entry *pde;
380
Gao fengd4beaa62013-02-18 01:34:54 +0000381 pde = proc_create("rt_cache", S_IRUGO, net->proc_net,
382 &rt_cache_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800383 if (!pde)
384 goto err1;
385
Wang Chen77020722008-02-28 14:14:25 -0800386 pde = proc_create("rt_cache", S_IRUGO,
387 net->proc_net_stat, &rt_cpu_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800388 if (!pde)
389 goto err2;
390
Patrick McHardyc7066f72011-01-14 13:36:42 +0100391#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800392 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800393 if (!pde)
394 goto err3;
395#endif
396 return 0;
397
Patrick McHardyc7066f72011-01-14 13:36:42 +0100398#ifdef CONFIG_IP_ROUTE_CLASSID
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800399err3:
400 remove_proc_entry("rt_cache", net->proc_net_stat);
401#endif
402err2:
403 remove_proc_entry("rt_cache", net->proc_net);
404err1:
405 return -ENOMEM;
406}
Denis V. Lunev73b38712008-02-28 20:51:18 -0800407
408static void __net_exit ip_rt_do_proc_exit(struct net *net)
409{
410 remove_proc_entry("rt_cache", net->proc_net_stat);
411 remove_proc_entry("rt_cache", net->proc_net);
Patrick McHardyc7066f72011-01-14 13:36:42 +0100412#ifdef CONFIG_IP_ROUTE_CLASSID
Denis V. Lunev73b38712008-02-28 20:51:18 -0800413 remove_proc_entry("rt_acct", net->proc_net);
Alexey Dobriyan0a931ac2010-01-17 03:32:50 +0000414#endif
Denis V. Lunev73b38712008-02-28 20:51:18 -0800415}
416
417static struct pernet_operations ip_rt_proc_ops __net_initdata = {
418 .init = ip_rt_do_proc_init,
419 .exit = ip_rt_do_proc_exit,
420};
421
422static int __init ip_rt_proc_init(void)
423{
424 return register_pernet_subsys(&ip_rt_proc_ops);
425}
426
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800427#else
Denis V. Lunev73b38712008-02-28 20:51:18 -0800428static inline int ip_rt_proc_init(void)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800429{
430 return 0;
431}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432#endif /* CONFIG_PROC_FS */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900433
Eric Dumazet4331deb2012-07-25 05:11:23 +0000434static inline bool rt_is_expired(const struct rtable *rth)
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700435{
fan.duca4c3fc2013-07-30 08:33:53 +0800436 return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev));
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700437}
438
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +0000439void rt_cache_flush(struct net *net)
Eric Dumazet29e75252008-01-31 17:05:09 -0800440{
fan.duca4c3fc2013-07-30 08:33:53 +0800441 rt_genid_bump_ipv4(net);
Eric Dumazet98376382010-03-08 03:20:00 +0000442}
443
David S. Millerf894cbf2012-07-02 21:52:24 -0700444static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
445 struct sk_buff *skb,
446 const void *daddr)
David Miller3769cff2011-07-11 22:44:24 +0000447{
David S. Millerd3aaeb32011-07-18 00:40:17 -0700448 struct net_device *dev = dst->dev;
449 const __be32 *pkey = daddr;
David S. Miller39232972012-01-26 15:22:32 -0500450 const struct rtable *rt;
David Miller3769cff2011-07-11 22:44:24 +0000451 struct neighbour *n;
452
David S. Miller39232972012-01-26 15:22:32 -0500453 rt = (const struct rtable *) dst;
David S. Millera263b302012-07-02 02:02:15 -0700454 if (rt->rt_gateway)
David S. Miller39232972012-01-26 15:22:32 -0500455 pkey = (const __be32 *) &rt->rt_gateway;
David S. Millerf894cbf2012-07-02 21:52:24 -0700456 else if (skb)
457 pkey = &ip_hdr(skb)->daddr;
David S. Millerd3aaeb32011-07-18 00:40:17 -0700458
David S. Miller80703d22012-02-15 17:48:35 -0500459 n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700460 if (n)
461 return n;
David Miller32092ec2011-07-25 00:01:41 +0000462 return neigh_create(&arp_tbl, pkey, dev);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700463}
464
Julian Anastasov63fca652017-02-06 23:14:15 +0200465static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)
466{
467 struct net_device *dev = dst->dev;
468 const __be32 *pkey = daddr;
469 const struct rtable *rt;
470
471 rt = (const struct rtable *)dst;
472 if (rt->rt_gateway)
473 pkey = (const __be32 *)&rt->rt_gateway;
474 else if (!daddr ||
475 (rt->rt_flags &
476 (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL)))
477 return;
478
479 __ipv4_confirm_neigh(dev, *(__force u32 *)pkey);
480}
481
Eric Dumazet04ca6972014-07-26 08:58:10 +0200482#define IP_IDENTS_SZ 2048u
Eric Dumazet04ca6972014-07-26 08:58:10 +0200483
Eric Dumazet355b5902015-05-01 10:37:49 -0700484static atomic_t *ip_idents __read_mostly;
485static u32 *ip_tstamps __read_mostly;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200486
487/* In order to protect privacy, we add a perturbation to identifiers
488 * if one generator is seldom used. This makes hard for an attacker
489 * to infer how many packets were sent between two points in time.
490 */
491u32 ip_idents_reserve(u32 hash, int segs)
492{
Eric Dumazet355b5902015-05-01 10:37:49 -0700493 u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
494 atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
Mark Rutland6aa7de02017-10-23 14:07:29 -0700495 u32 old = READ_ONCE(*p_tstamp);
Eric Dumazet04ca6972014-07-26 08:58:10 +0200496 u32 now = (u32)jiffies;
Eric Dumazetadb03112016-09-20 18:06:17 -0700497 u32 new, delta = 0;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200498
Eric Dumazet355b5902015-05-01 10:37:49 -0700499 if (old != now && cmpxchg(p_tstamp, old, now) == old)
Eric Dumazet04ca6972014-07-26 08:58:10 +0200500 delta = prandom_u32_max(now - old);
501
Eric Dumazetadb03112016-09-20 18:06:17 -0700502 /* Do not use atomic_add_return() as it makes UBSAN unhappy */
503 do {
504 old = (u32)atomic_read(p_id);
505 new = old + delta + segs;
506 } while (atomic_cmpxchg(p_id, old, new) != old);
507
508 return new - segs;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200509}
510EXPORT_SYMBOL(ip_idents_reserve);
Eric Dumazet73f156a2014-06-02 05:26:03 -0700511
Hannes Frederic Sowab6a77192015-03-25 17:07:44 +0100512void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700513{
Eric Dumazet73f156a2014-06-02 05:26:03 -0700514 static u32 ip_idents_hashrnd __read_mostly;
515 u32 hash, id;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516
Eric Dumazet73f156a2014-06-02 05:26:03 -0700517 net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700518
Eric Dumazet04ca6972014-07-26 08:58:10 +0200519 hash = jhash_3words((__force u32)iph->daddr,
520 (__force u32)iph->saddr,
Hannes Frederic Sowab6a77192015-03-25 17:07:44 +0100521 iph->protocol ^ net_hash_mix(net),
Eric Dumazet04ca6972014-07-26 08:58:10 +0200522 ip_idents_hashrnd);
Eric Dumazet73f156a2014-06-02 05:26:03 -0700523 id = ip_idents_reserve(hash, segs);
524 iph->id = htons(id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000526EXPORT_SYMBOL(__ip_select_ident);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900528static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
529 const struct sock *sk,
David S. Miller4895c772012-07-17 04:19:00 -0700530 const struct iphdr *iph,
531 int oif, u8 tos,
532 u8 prot, u32 mark, int flow_flags)
533{
534 if (sk) {
535 const struct inet_sock *inet = inet_sk(sk);
536
537 oif = sk->sk_bound_dev_if;
538 mark = sk->sk_mark;
539 tos = RT_CONN_FLAGS(sk);
540 prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
541 }
542 flowi4_init_output(fl4, oif, mark, tos,
543 RT_SCOPE_UNIVERSE, prot,
544 flow_flags,
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900545 iph->daddr, iph->saddr, 0, 0,
546 sock_net_uid(net, sk));
David S. Miller4895c772012-07-17 04:19:00 -0700547}
548
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200549static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
550 const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700551{
Lorenzo Colittid109e612016-11-30 02:56:47 +0900552 const struct net *net = dev_net(skb->dev);
David S. Miller4895c772012-07-17 04:19:00 -0700553 const struct iphdr *iph = ip_hdr(skb);
554 int oif = skb->dev->ifindex;
555 u8 tos = RT_TOS(iph->tos);
556 u8 prot = iph->protocol;
557 u32 mark = skb->mark;
558
Lorenzo Colittid109e612016-11-30 02:56:47 +0900559 __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0);
David S. Miller4895c772012-07-17 04:19:00 -0700560}
561
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200562static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700563{
564 const struct inet_sock *inet = inet_sk(sk);
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200565 const struct ip_options_rcu *inet_opt;
David S. Miller4895c772012-07-17 04:19:00 -0700566 __be32 daddr = inet->inet_daddr;
567
568 rcu_read_lock();
569 inet_opt = rcu_dereference(inet->inet_opt);
570 if (inet_opt && inet_opt->opt.srr)
571 daddr = inet_opt->opt.faddr;
572 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
573 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
574 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
575 inet_sk_flowi_flags(sk),
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900576 daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
David S. Miller4895c772012-07-17 04:19:00 -0700577 rcu_read_unlock();
578}
579
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200580static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
581 const struct sk_buff *skb)
David S. Miller4895c772012-07-17 04:19:00 -0700582{
583 if (skb)
584 build_skb_flow_key(fl4, skb, sk);
585 else
586 build_sk_flow_key(fl4, sk);
587}
588
David S. Millerc5038a82012-07-31 15:02:02 -0700589static DEFINE_SPINLOCK(fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700590
Timo Teräs2ffae992013-06-27 10:27:05 +0300591static void fnhe_flush_routes(struct fib_nh_exception *fnhe)
592{
593 struct rtable *rt;
594
595 rt = rcu_dereference(fnhe->fnhe_rth_input);
596 if (rt) {
597 RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL);
Wei Wang95c47f92017-06-17 10:42:30 -0700598 dst_dev_put(&rt->dst);
Wei Wang08301062017-06-17 10:42:29 -0700599 dst_release(&rt->dst);
Timo Teräs2ffae992013-06-27 10:27:05 +0300600 }
601 rt = rcu_dereference(fnhe->fnhe_rth_output);
602 if (rt) {
603 RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL);
Wei Wang95c47f92017-06-17 10:42:30 -0700604 dst_dev_put(&rt->dst);
Wei Wang08301062017-06-17 10:42:29 -0700605 dst_release(&rt->dst);
Timo Teräs2ffae992013-06-27 10:27:05 +0300606 }
607}
608
Julian Anastasovaee06da2012-07-18 10:15:35 +0000609static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
David S. Miller4895c772012-07-17 04:19:00 -0700610{
611 struct fib_nh_exception *fnhe, *oldest;
612
613 oldest = rcu_dereference(hash->chain);
614 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
615 fnhe = rcu_dereference(fnhe->fnhe_next)) {
616 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
617 oldest = fnhe;
618 }
Timo Teräs2ffae992013-06-27 10:27:05 +0300619 fnhe_flush_routes(oldest);
David S. Miller4895c772012-07-17 04:19:00 -0700620 return oldest;
621}
622
David S. Millerd3a25c92012-07-17 13:23:08 -0700623static inline u32 fnhe_hashfun(__be32 daddr)
624{
Eric Dumazetd546c622014-09-04 08:21:31 -0700625 static u32 fnhe_hashrnd __read_mostly;
David S. Millerd3a25c92012-07-17 13:23:08 -0700626 u32 hval;
627
Eric Dumazetd546c622014-09-04 08:21:31 -0700628 net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd));
629 hval = jhash_1word((__force u32) daddr, fnhe_hashrnd);
630 return hash_32(hval, FNHE_HASH_SHIFT);
David S. Millerd3a25c92012-07-17 13:23:08 -0700631}
632
Timo Teräs387aa652013-05-27 20:46:31 +0000633static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
634{
635 rt->rt_pmtu = fnhe->fnhe_pmtu;
636 rt->dst.expires = fnhe->fnhe_expires;
637
638 if (fnhe->fnhe_gw) {
639 rt->rt_flags |= RTCF_REDIRECTED;
640 rt->rt_gateway = fnhe->fnhe_gw;
641 rt->rt_uses_gateway = 1;
642 }
643}
644
Julian Anastasovaee06da2012-07-18 10:15:35 +0000645static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
646 u32 pmtu, unsigned long expires)
David S. Miller4895c772012-07-17 04:19:00 -0700647{
Julian Anastasovaee06da2012-07-18 10:15:35 +0000648 struct fnhe_hash_bucket *hash;
David S. Miller4895c772012-07-17 04:19:00 -0700649 struct fib_nh_exception *fnhe;
Timo Teräs387aa652013-05-27 20:46:31 +0000650 struct rtable *rt;
Xin Longcebe84c2017-11-17 14:27:18 +0800651 u32 genid, hval;
Timo Teräs387aa652013-05-27 20:46:31 +0000652 unsigned int i;
David S. Miller4895c772012-07-17 04:19:00 -0700653 int depth;
Xin Longcebe84c2017-11-17 14:27:18 +0800654
655 genid = fnhe_genid(dev_net(nh->nh_dev));
656 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -0700657
David S. Millerc5038a82012-07-31 15:02:02 -0700658 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000659
Eric Dumazetcaa41522014-09-03 22:21:56 -0700660 hash = rcu_dereference(nh->nh_exceptions);
David S. Miller4895c772012-07-17 04:19:00 -0700661 if (!hash) {
Julian Anastasovaee06da2012-07-18 10:15:35 +0000662 hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC);
David S. Miller4895c772012-07-17 04:19:00 -0700663 if (!hash)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000664 goto out_unlock;
Eric Dumazetcaa41522014-09-03 22:21:56 -0700665 rcu_assign_pointer(nh->nh_exceptions, hash);
David S. Miller4895c772012-07-17 04:19:00 -0700666 }
667
David S. Miller4895c772012-07-17 04:19:00 -0700668 hash += hval;
669
670 depth = 0;
671 for (fnhe = rcu_dereference(hash->chain); fnhe;
672 fnhe = rcu_dereference(fnhe->fnhe_next)) {
673 if (fnhe->fnhe_daddr == daddr)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000674 break;
David S. Miller4895c772012-07-17 04:19:00 -0700675 depth++;
676 }
677
Julian Anastasovaee06da2012-07-18 10:15:35 +0000678 if (fnhe) {
Xin Longcebe84c2017-11-17 14:27:18 +0800679 if (fnhe->fnhe_genid != genid)
680 fnhe->fnhe_genid = genid;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000681 if (gw)
682 fnhe->fnhe_gw = gw;
Xin Longe39d5242017-11-17 14:27:06 +0800683 if (pmtu)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000684 fnhe->fnhe_pmtu = pmtu;
Xin Longe39d5242017-11-17 14:27:06 +0800685 fnhe->fnhe_expires = max(1UL, expires);
Timo Teräs387aa652013-05-27 20:46:31 +0000686 /* Update all cached dsts too */
Timo Teräs2ffae992013-06-27 10:27:05 +0300687 rt = rcu_dereference(fnhe->fnhe_rth_input);
688 if (rt)
689 fill_route_from_fnhe(rt, fnhe);
690 rt = rcu_dereference(fnhe->fnhe_rth_output);
Timo Teräs387aa652013-05-27 20:46:31 +0000691 if (rt)
692 fill_route_from_fnhe(rt, fnhe);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000693 } else {
694 if (depth > FNHE_RECLAIM_DEPTH)
695 fnhe = fnhe_oldest(hash);
696 else {
697 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
698 if (!fnhe)
699 goto out_unlock;
700
701 fnhe->fnhe_next = hash->chain;
702 rcu_assign_pointer(hash->chain, fnhe);
703 }
Xin Longcebe84c2017-11-17 14:27:18 +0800704 fnhe->fnhe_genid = genid;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000705 fnhe->fnhe_daddr = daddr;
706 fnhe->fnhe_gw = gw;
707 fnhe->fnhe_pmtu = pmtu;
708 fnhe->fnhe_expires = expires;
Timo Teräs387aa652013-05-27 20:46:31 +0000709
710 /* Exception created; mark the cached routes for the nexthop
711 * stale, so anyone caching it rechecks if this exception
712 * applies to them.
713 */
Timo Teräs2ffae992013-06-27 10:27:05 +0300714 rt = rcu_dereference(nh->nh_rth_input);
715 if (rt)
716 rt->dst.obsolete = DST_OBSOLETE_KILL;
717
Timo Teräs387aa652013-05-27 20:46:31 +0000718 for_each_possible_cpu(i) {
719 struct rtable __rcu **prt;
720 prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i);
721 rt = rcu_dereference(*prt);
722 if (rt)
723 rt->dst.obsolete = DST_OBSOLETE_KILL;
724 }
David S. Miller4895c772012-07-17 04:19:00 -0700725 }
David S. Miller4895c772012-07-17 04:19:00 -0700726
David S. Miller4895c772012-07-17 04:19:00 -0700727 fnhe->fnhe_stamp = jiffies;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000728
729out_unlock:
David S. Millerc5038a82012-07-31 15:02:02 -0700730 spin_unlock_bh(&fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700731}
732
David S. Millerceb33202012-07-17 11:31:28 -0700733static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
734 bool kill_route)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735{
David S. Millere47a1852012-07-11 20:55:47 -0700736 __be32 new_gw = icmp_hdr(skb)->un.gateway;
David S. Miller94206122012-07-11 20:38:08 -0700737 __be32 old_gw = ip_hdr(skb)->saddr;
David S. Millere47a1852012-07-11 20:55:47 -0700738 struct net_device *dev = skb->dev;
David S. Millere47a1852012-07-11 20:55:47 -0700739 struct in_device *in_dev;
David S. Miller4895c772012-07-17 04:19:00 -0700740 struct fib_result res;
David S. Millere47a1852012-07-11 20:55:47 -0700741 struct neighbour *n;
Denis V. Lunev317805b2008-02-28 20:50:06 -0800742 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700743
David S. Miller94206122012-07-11 20:38:08 -0700744 switch (icmp_hdr(skb)->code & 7) {
745 case ICMP_REDIR_NET:
746 case ICMP_REDIR_NETTOS:
747 case ICMP_REDIR_HOST:
748 case ICMP_REDIR_HOSTTOS:
749 break;
750
751 default:
752 return;
753 }
754
David S. Millere47a1852012-07-11 20:55:47 -0700755 if (rt->rt_gateway != old_gw)
756 return;
757
758 in_dev = __in_dev_get_rcu(dev);
759 if (!in_dev)
760 return;
761
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900762 net = dev_net(dev);
Joe Perches9d4fb272009-11-23 10:41:23 -0800763 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
764 ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
765 ipv4_is_zeronet(new_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766 goto reject_redirect;
767
768 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
769 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
770 goto reject_redirect;
771 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
772 goto reject_redirect;
773 } else {
Denis V. Lunev317805b2008-02-28 20:50:06 -0800774 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700775 goto reject_redirect;
776 }
777
Stephen Suryaputra Lin969447f2016-11-10 11:16:15 -0500778 n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
779 if (!n)
780 n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
WANG Cong2c1a4312014-09-24 17:07:53 -0700781 if (!IS_ERR(n)) {
David S. Millere47a1852012-07-11 20:55:47 -0700782 if (!(n->nud_state & NUD_VALID)) {
783 neigh_event_send(n, NULL);
784 } else {
Andy Gospodarek0eeb0752015-06-23 13:45:37 -0400785 if (fib_lookup(net, fl4, &res, 0) == 0) {
David S. Miller4895c772012-07-17 04:19:00 -0700786 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -0700787
Julian Anastasovaee06da2012-07-18 10:15:35 +0000788 update_or_create_fnhe(nh, fl4->daddr, new_gw,
Xin Longdeed49d2016-02-18 21:21:19 +0800789 0, jiffies + ip_rt_gc_timeout);
David S. Miller4895c772012-07-17 04:19:00 -0700790 }
David S. Millerceb33202012-07-17 11:31:28 -0700791 if (kill_route)
792 rt->dst.obsolete = DST_OBSOLETE_KILL;
David S. Millere47a1852012-07-11 20:55:47 -0700793 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
794 }
795 neigh_release(n);
796 }
797 return;
798
799reject_redirect:
800#ifdef CONFIG_IP_ROUTE_VERBOSE
David S. Miller99ee0382012-07-12 07:40:05 -0700801 if (IN_DEV_LOG_MARTIANS(in_dev)) {
802 const struct iphdr *iph = (const struct iphdr *) skb->data;
803 __be32 daddr = iph->daddr;
804 __be32 saddr = iph->saddr;
805
David S. Millere47a1852012-07-11 20:55:47 -0700806 net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
807 " Advised path = %pI4 -> %pI4\n",
808 &old_gw, dev->name, &new_gw,
809 &saddr, &daddr);
David S. Miller99ee0382012-07-12 07:40:05 -0700810 }
David S. Millere47a1852012-07-11 20:55:47 -0700811#endif
812 ;
813}
814
David S. Miller4895c772012-07-17 04:19:00 -0700815static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
816{
817 struct rtable *rt;
818 struct flowi4 fl4;
Michal Kubecekf96ef982013-05-28 08:26:49 +0200819 const struct iphdr *iph = (const struct iphdr *) skb->data;
Lorenzo Colitti7d995692016-12-23 00:33:57 +0900820 struct net *net = dev_net(skb->dev);
Michal Kubecekf96ef982013-05-28 08:26:49 +0200821 int oif = skb->dev->ifindex;
822 u8 tos = RT_TOS(iph->tos);
823 u8 prot = iph->protocol;
824 u32 mark = skb->mark;
David S. Miller4895c772012-07-17 04:19:00 -0700825
826 rt = (struct rtable *) dst;
827
Lorenzo Colitti7d995692016-12-23 00:33:57 +0900828 __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
David S. Millerceb33202012-07-17 11:31:28 -0700829 __ip_do_redirect(rt, skb, &fl4, true);
David S. Miller4895c772012-07-17 04:19:00 -0700830}
831
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
833{
Eric Dumazetee6b9672008-03-05 18:30:47 -0800834 struct rtable *rt = (struct rtable *)dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700835 struct dst_entry *ret = dst;
836
837 if (rt) {
Timo Teräsd11a4dc2010-03-18 23:20:20 +0000838 if (dst->obsolete > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839 ip_rt_put(rt);
840 ret = NULL;
David S. Miller59436342012-07-10 06:58:42 -0700841 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
842 rt->dst.expires) {
David S. Miller89aef892012-07-17 11:00:09 -0700843 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700844 ret = NULL;
845 }
846 }
847 return ret;
848}
849
850/*
851 * Algorithm:
852 * 1. The first ip_rt_redirect_number redirects are sent
853 * with exponential backoff, then we stop sending them at all,
854 * assuming that the host ignores our redirects.
855 * 2. If we did not see packets requiring redirects
856 * during ip_rt_redirect_silence, we assume that the host
857 * forgot redirected route and start to send redirects again.
858 *
859 * This algorithm is much cheaper and more intelligent than dumb load limiting
860 * in icmp.c.
861 *
862 * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
863 * and "frag. need" (breaks PMTU discovery) in icmp.c.
864 */
865
866void ip_rt_send_redirect(struct sk_buff *skb)
867{
Eric Dumazet511c3f92009-06-02 05:14:27 +0000868 struct rtable *rt = skb_rtable(skb);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700869 struct in_device *in_dev;
David S. Miller92d86822011-02-04 15:55:25 -0800870 struct inet_peer *peer;
David S. Miller1d861aa2012-07-10 03:58:16 -0700871 struct net *net;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700872 int log_martians;
David Ahern192132b2015-08-27 16:07:03 -0700873 int vif;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874
Eric Dumazet30038fc2009-08-28 23:52:01 -0700875 rcu_read_lock();
Changli Gaod8d1f302010-06-10 23:31:35 -0700876 in_dev = __in_dev_get_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700877 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
878 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700879 return;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700880 }
881 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
David Ahern385add92015-09-29 20:07:13 -0700882 vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700883 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884
David S. Miller1d861aa2012-07-10 03:58:16 -0700885 net = dev_net(rt->dst.dev);
David Ahern192132b2015-08-27 16:07:03 -0700886 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800887 if (!peer) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000888 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
889 rt_nexthop(rt, ip_hdr(skb)->daddr));
David S. Miller92d86822011-02-04 15:55:25 -0800890 return;
891 }
892
Linus Torvalds1da177e2005-04-16 15:20:36 -0700893 /* No redirected packets during ip_rt_redirect_silence;
894 * reset the algorithm.
895 */
David S. Miller92d86822011-02-04 15:55:25 -0800896 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
897 peer->rate_tokens = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898
899 /* Too many ignored redirects; do not send anything
Changli Gaod8d1f302010-06-10 23:31:35 -0700900 * set dst.rate_last to the last seen redirected packet.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700901 */
David S. Miller92d86822011-02-04 15:55:25 -0800902 if (peer->rate_tokens >= ip_rt_redirect_number) {
903 peer->rate_last = jiffies;
David S. Miller1d861aa2012-07-10 03:58:16 -0700904 goto out_put_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905 }
906
907 /* Check for load limit; set rate_last to the latest sent
908 * redirect.
909 */
David S. Miller92d86822011-02-04 15:55:25 -0800910 if (peer->rate_tokens == 0 ||
Li Yewang14fb8a72006-12-18 00:26:35 -0800911 time_after(jiffies,
David S. Miller92d86822011-02-04 15:55:25 -0800912 (peer->rate_last +
913 (ip_rt_redirect_load << peer->rate_tokens)))) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000914 __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
915
916 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
David S. Miller92d86822011-02-04 15:55:25 -0800917 peer->rate_last = jiffies;
918 ++peer->rate_tokens;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700919#ifdef CONFIG_IP_ROUTE_VERBOSE
Eric Dumazet30038fc2009-08-28 23:52:01 -0700920 if (log_martians &&
Joe Perchese87cc472012-05-13 21:56:26 +0000921 peer->rate_tokens == ip_rt_redirect_number)
922 net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
David S. Miller92101b32012-07-23 16:29:00 -0700923 &ip_hdr(skb)->saddr, inet_iif(skb),
Julian Anastasove81da0e2012-10-08 11:41:15 +0000924 &ip_hdr(skb)->daddr, &gw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925#endif
926 }
David S. Miller1d861aa2012-07-10 03:58:16 -0700927out_put_peer:
928 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929}
930
931static int ip_error(struct sk_buff *skb)
932{
David S. Miller251da412012-06-26 16:27:09 -0700933 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
Eric Dumazet511c3f92009-06-02 05:14:27 +0000934 struct rtable *rt = skb_rtable(skb);
David S. Miller92d86822011-02-04 15:55:25 -0800935 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700936 unsigned long now;
David S. Miller251da412012-06-26 16:27:09 -0700937 struct net *net;
David S. Miller92d86822011-02-04 15:55:25 -0800938 bool send;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939 int code;
940
Eric W. Biederman381c7592015-05-22 04:58:12 -0500941 /* IP on this device is disabled. */
942 if (!in_dev)
943 goto out;
944
David S. Miller251da412012-06-26 16:27:09 -0700945 net = dev_net(rt->dst.dev);
946 if (!IN_DEV_FORWARD(in_dev)) {
947 switch (rt->dst.error) {
948 case EHOSTUNREACH:
Eric Dumazetb45386e2016-04-27 16:44:35 -0700949 __IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS);
David S. Miller251da412012-06-26 16:27:09 -0700950 break;
951
952 case ENETUNREACH:
Eric Dumazetb45386e2016-04-27 16:44:35 -0700953 __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
David S. Miller251da412012-06-26 16:27:09 -0700954 break;
955 }
956 goto out;
957 }
958
Changli Gaod8d1f302010-06-10 23:31:35 -0700959 switch (rt->dst.error) {
Joe Perches4500ebf2011-07-01 09:43:07 +0000960 case EINVAL:
961 default:
962 goto out;
963 case EHOSTUNREACH:
964 code = ICMP_HOST_UNREACH;
965 break;
966 case ENETUNREACH:
967 code = ICMP_NET_UNREACH;
Eric Dumazetb45386e2016-04-27 16:44:35 -0700968 __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
Joe Perches4500ebf2011-07-01 09:43:07 +0000969 break;
970 case EACCES:
971 code = ICMP_PKT_FILTERED;
972 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700973 }
974
David Ahern192132b2015-08-27 16:07:03 -0700975 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
David Ahern385add92015-09-29 20:07:13 -0700976 l3mdev_master_ifindex(skb->dev), 1);
David S. Miller92d86822011-02-04 15:55:25 -0800977
978 send = true;
979 if (peer) {
980 now = jiffies;
981 peer->rate_tokens += now - peer->rate_last;
982 if (peer->rate_tokens > ip_rt_error_burst)
983 peer->rate_tokens = ip_rt_error_burst;
984 peer->rate_last = now;
985 if (peer->rate_tokens >= ip_rt_error_cost)
986 peer->rate_tokens -= ip_rt_error_cost;
987 else
988 send = false;
David S. Miller1d861aa2012-07-10 03:58:16 -0700989 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700990 }
David S. Miller92d86822011-02-04 15:55:25 -0800991 if (send)
992 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700993
994out: kfree_skb(skb);
995 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900996}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700997
Steffen Klassertd851c122012-10-07 22:47:25 +0000998static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700999{
Steffen Klassertd851c122012-10-07 22:47:25 +00001000 struct dst_entry *dst = &rt->dst;
David S. Miller4895c772012-07-17 04:19:00 -07001001 struct fib_result res;
David S. Miller2c8cec52011-02-09 20:42:07 -08001002
Steffen Klassertfa1e4922013-01-16 20:58:10 +00001003 if (dst_metric_locked(dst, RTAX_MTU))
1004 return;
1005
Herbert Xucb6ccf02015-04-28 11:43:15 +08001006 if (ipv4_mtu(dst) < mtu)
Li Wei3cdaa5b2015-01-29 16:09:03 +08001007 return;
1008
David S. Miller59436342012-07-10 06:58:42 -07001009 if (mtu < ip_rt_min_pmtu)
1010 mtu = ip_rt_min_pmtu;
Eric Dumazetfe6fe792011-06-08 06:07:07 +00001011
Timo Teräsf0162292013-05-27 20:46:32 +00001012 if (rt->rt_pmtu == mtu &&
1013 time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
1014 return;
1015
Eric Dumazetc5ae7d42012-08-28 12:33:07 +00001016 rcu_read_lock();
Andy Gospodarek0eeb0752015-06-23 13:45:37 -04001017 if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
David S. Miller4895c772012-07-17 04:19:00 -07001018 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -07001019
Julian Anastasovaee06da2012-07-18 10:15:35 +00001020 update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
1021 jiffies + ip_rt_mtu_expires);
David S. Miller4895c772012-07-17 04:19:00 -07001022 }
Eric Dumazetc5ae7d42012-08-28 12:33:07 +00001023 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024}
1025
David S. Miller4895c772012-07-17 04:19:00 -07001026static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1027 struct sk_buff *skb, u32 mtu)
1028{
1029 struct rtable *rt = (struct rtable *) dst;
1030 struct flowi4 fl4;
1031
1032 ip_rt_build_flow_key(&fl4, sk, skb);
Steffen Klassertd851c122012-10-07 22:47:25 +00001033 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller4895c772012-07-17 04:19:00 -07001034}
1035
David S. Miller36393392012-06-14 22:21:46 -07001036void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
1037 int oif, u32 mark, u8 protocol, int flow_flags)
1038{
David S. Miller4895c772012-07-17 04:19:00 -07001039 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Miller36393392012-06-14 22:21:46 -07001040 struct flowi4 fl4;
1041 struct rtable *rt;
1042
Lorenzo Colitti1b3c61d2014-05-13 10:17:34 -07001043 if (!mark)
1044 mark = IP4_REPLY_MARK(net, skb->mark);
1045
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001046 __build_flow_key(net, &fl4, NULL, iph, oif,
David S. Miller4895c772012-07-17 04:19:00 -07001047 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Miller36393392012-06-14 22:21:46 -07001048 rt = __ip_route_output_key(net, &fl4);
1049 if (!IS_ERR(rt)) {
David S. Miller4895c772012-07-17 04:19:00 -07001050 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller36393392012-06-14 22:21:46 -07001051 ip_rt_put(rt);
1052 }
1053}
1054EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
1055
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001056static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
David S. Miller36393392012-06-14 22:21:46 -07001057{
David S. Miller4895c772012-07-17 04:19:00 -07001058 const struct iphdr *iph = (const struct iphdr *) skb->data;
1059 struct flowi4 fl4;
1060 struct rtable *rt;
David S. Miller36393392012-06-14 22:21:46 -07001061
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001062 __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0);
Lorenzo Colitti1b3c61d2014-05-13 10:17:34 -07001063
1064 if (!fl4.flowi4_mark)
1065 fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
1066
David S. Miller4895c772012-07-17 04:19:00 -07001067 rt = __ip_route_output_key(sock_net(sk), &fl4);
1068 if (!IS_ERR(rt)) {
1069 __ip_rt_update_pmtu(rt, &fl4, mtu);
1070 ip_rt_put(rt);
1071 }
David S. Miller36393392012-06-14 22:21:46 -07001072}
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001073
1074void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1075{
1076 const struct iphdr *iph = (const struct iphdr *) skb->data;
1077 struct flowi4 fl4;
1078 struct rtable *rt;
Eric Dumazet7f502362014-06-30 01:26:23 -07001079 struct dst_entry *odst = NULL;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001080 bool new = false;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001081 struct net *net = sock_net(sk);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001082
1083 bh_lock_sock(sk);
Hannes Frederic Sowa482fc602013-11-05 02:24:17 +01001084
1085 if (!ip_sk_accept_pmtu(sk))
1086 goto out;
1087
Eric Dumazet7f502362014-06-30 01:26:23 -07001088 odst = sk_dst_get(sk);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001089
Eric Dumazet7f502362014-06-30 01:26:23 -07001090 if (sock_owned_by_user(sk) || !odst) {
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001091 __ipv4_sk_update_pmtu(skb, sk, mtu);
1092 goto out;
1093 }
1094
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001095 __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001096
Eric Dumazet7f502362014-06-30 01:26:23 -07001097 rt = (struct rtable *)odst;
Ian Morris51456b22015-04-03 09:17:26 +01001098 if (odst->obsolete && !odst->ops->check(odst, 0)) {
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001099 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1100 if (IS_ERR(rt))
1101 goto out;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001102
1103 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001104 }
1105
David Miller0f6c4802017-11-28 15:40:46 -05001106 __ip_rt_update_pmtu((struct rtable *) xfrm_dst_path(&rt->dst), &fl4, mtu);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001107
Eric Dumazet7f502362014-06-30 01:26:23 -07001108 if (!dst_check(&rt->dst, 0)) {
Steffen Klassertb44108d2013-01-22 00:01:28 +00001109 if (new)
1110 dst_release(&rt->dst);
1111
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001112 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1113 if (IS_ERR(rt))
1114 goto out;
1115
Steffen Klassertb44108d2013-01-22 00:01:28 +00001116 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001117 }
1118
Steffen Klassertb44108d2013-01-22 00:01:28 +00001119 if (new)
Eric Dumazet7f502362014-06-30 01:26:23 -07001120 sk_dst_set(sk, &rt->dst);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001121
1122out:
1123 bh_unlock_sock(sk);
Eric Dumazet7f502362014-06-30 01:26:23 -07001124 dst_release(odst);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001125}
David S. Miller36393392012-06-14 22:21:46 -07001126EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
David S. Millerf39925d2011-02-09 22:00:16 -08001127
David S. Millerb42597e2012-07-11 21:25:45 -07001128void ipv4_redirect(struct sk_buff *skb, struct net *net,
1129 int oif, u32 mark, u8 protocol, int flow_flags)
1130{
David S. Miller4895c772012-07-17 04:19:00 -07001131 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Millerb42597e2012-07-11 21:25:45 -07001132 struct flowi4 fl4;
1133 struct rtable *rt;
1134
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001135 __build_flow_key(net, &fl4, NULL, iph, oif,
David S. Miller4895c772012-07-17 04:19:00 -07001136 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Millerb42597e2012-07-11 21:25:45 -07001137 rt = __ip_route_output_key(net, &fl4);
1138 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001139 __ip_do_redirect(rt, skb, &fl4, false);
David S. Millerb42597e2012-07-11 21:25:45 -07001140 ip_rt_put(rt);
1141 }
1142}
1143EXPORT_SYMBOL_GPL(ipv4_redirect);
1144
1145void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
1146{
David S. Miller4895c772012-07-17 04:19:00 -07001147 const struct iphdr *iph = (const struct iphdr *) skb->data;
1148 struct flowi4 fl4;
1149 struct rtable *rt;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001150 struct net *net = sock_net(sk);
David S. Millerb42597e2012-07-11 21:25:45 -07001151
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001152 __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
1153 rt = __ip_route_output_key(net, &fl4);
David S. Miller4895c772012-07-17 04:19:00 -07001154 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001155 __ip_do_redirect(rt, skb, &fl4, false);
David S. Miller4895c772012-07-17 04:19:00 -07001156 ip_rt_put(rt);
1157 }
David S. Millerb42597e2012-07-11 21:25:45 -07001158}
1159EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1160
David S. Millerefbc368d2011-12-01 13:38:59 -05001161static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1162{
1163 struct rtable *rt = (struct rtable *) dst;
1164
David S. Millerceb33202012-07-17 11:31:28 -07001165 /* All IPV4 dsts are created with ->obsolete set to the value
1166 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1167 * into this function always.
1168 *
Timo Teräs387aa652013-05-27 20:46:31 +00001169 * When a PMTU/redirect information update invalidates a route,
1170 * this is indicated by setting obsolete to DST_OBSOLETE_KILL or
1171 * DST_OBSOLETE_DEAD by dst_free().
David S. Millerceb33202012-07-17 11:31:28 -07001172 */
Timo Teräs387aa652013-05-27 20:46:31 +00001173 if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
David S. Millerefbc368d2011-12-01 13:38:59 -05001174 return NULL;
Timo Teräsd11a4dc2010-03-18 23:20:20 +00001175 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001176}
1177
Linus Torvalds1da177e2005-04-16 15:20:36 -07001178static void ipv4_link_failure(struct sk_buff *skb)
1179{
1180 struct rtable *rt;
1181
1182 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1183
Eric Dumazet511c3f92009-06-02 05:14:27 +00001184 rt = skb_rtable(skb);
David S. Miller59436342012-07-10 06:58:42 -07001185 if (rt)
1186 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001187}
1188
Eric W. Biedermanede20592015-10-07 16:48:47 -05001189static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190{
Joe Perches91df42b2012-05-15 14:11:54 +00001191 pr_debug("%s: %pI4 -> %pI4, %s\n",
1192 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1193 skb->dev ? skb->dev->name : "?");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194 kfree_skb(skb);
Dave Jonesc378a9c2011-05-21 07:16:42 +00001195 WARN_ON(1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001196 return 0;
1197}
1198
1199/*
1200 We do not cache source address of outgoing interface,
1201 because it is used only by IP RR, TS and SRR options,
1202 so that it out of fast path.
1203
1204 BTW remember: "addr" is allowed to be not aligned
1205 in IP options!
1206 */
1207
David S. Miller8e363602011-05-13 17:29:41 -04001208void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209{
Al Viroa61ced52006-09-26 21:27:54 -07001210 __be32 src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001211
David S. Millerc7537962010-11-11 17:07:48 -08001212 if (rt_is_output_route(rt))
David S. Millerc5be24f2011-05-13 18:01:21 -04001213 src = ip_hdr(skb)->saddr;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001214 else {
David S. Miller8e363602011-05-13 17:29:41 -04001215 struct fib_result res;
1216 struct flowi4 fl4;
1217 struct iphdr *iph;
1218
1219 iph = ip_hdr(skb);
1220
1221 memset(&fl4, 0, sizeof(fl4));
1222 fl4.daddr = iph->daddr;
1223 fl4.saddr = iph->saddr;
Julian Anastasovb0fe4a32011-07-23 02:00:41 +00001224 fl4.flowi4_tos = RT_TOS(iph->tos);
David S. Miller8e363602011-05-13 17:29:41 -04001225 fl4.flowi4_oif = rt->dst.dev->ifindex;
1226 fl4.flowi4_iif = skb->dev->ifindex;
1227 fl4.flowi4_mark = skb->mark;
David S. Miller5e2b61f2011-03-04 21:47:09 -08001228
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001229 rcu_read_lock();
Andy Gospodarek0eeb0752015-06-23 13:45:37 -04001230 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0)
David S. Miller436c3b62011-03-24 17:42:21 -07001231 src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001232 else
David S. Millerf8126f12012-07-13 05:03:45 -07001233 src = inet_select_addr(rt->dst.dev,
1234 rt_nexthop(rt, iph->daddr),
1235 RT_SCOPE_UNIVERSE);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001236 rcu_read_unlock();
1237 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001238 memcpy(addr, &src, 4);
1239}
1240
Patrick McHardyc7066f72011-01-14 13:36:42 +01001241#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001242static void set_class_tag(struct rtable *rt, u32 tag)
1243{
Changli Gaod8d1f302010-06-10 23:31:35 -07001244 if (!(rt->dst.tclassid & 0xFFFF))
1245 rt->dst.tclassid |= tag & 0xFFFF;
1246 if (!(rt->dst.tclassid & 0xFFFF0000))
1247 rt->dst.tclassid |= tag & 0xFFFF0000;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001248}
1249#endif
1250
David S. Miller0dbaee32010-12-13 12:52:14 -08001251static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1252{
Gao Feng7ed14d92017-04-12 12:34:03 +08001253 unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
Eric Dumazet164a5e72017-10-18 17:02:03 -07001254 unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
Gao Feng7ed14d92017-04-12 12:34:03 +08001255 ip_rt_min_advmss);
David S. Miller0dbaee32010-12-13 12:52:14 -08001256
Gao Feng7ed14d92017-04-12 12:34:03 +08001257 return min(advmss, IPV4_MAX_PMTU - header_size);
David S. Miller0dbaee32010-12-13 12:52:14 -08001258}
1259
Steffen Klassertebb762f2011-11-23 02:12:51 +00001260static unsigned int ipv4_mtu(const struct dst_entry *dst)
David S. Millerd33e4552010-12-14 13:01:14 -08001261{
Steffen Klassert261663b2011-11-23 02:14:50 +00001262 const struct rtable *rt = (const struct rtable *) dst;
David S. Miller59436342012-07-10 06:58:42 -07001263 unsigned int mtu = rt->rt_pmtu;
1264
Alexander Duyck98d75c32012-08-27 06:30:01 +00001265 if (!mtu || time_after_eq(jiffies, rt->dst.expires))
David S. Miller59436342012-07-10 06:58:42 -07001266 mtu = dst_metric_raw(dst, RTAX_MTU);
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001267
Steffen Klassert38d523e2013-01-16 20:55:01 +00001268 if (mtu)
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001269 return mtu;
1270
Eric Dumazetc780a042017-08-16 11:09:12 -07001271 mtu = READ_ONCE(dst->dev->mtu);
David S. Millerd33e4552010-12-14 13:01:14 -08001272
1273 if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
Julian Anastasov155e8332012-10-08 11:41:18 +00001274 if (rt->rt_uses_gateway && mtu > 576)
David S. Millerd33e4552010-12-14 13:01:14 -08001275 mtu = 576;
1276 }
1277
Roopa Prabhu14972cb2016-08-24 20:10:43 -07001278 mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
1279
1280 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
David S. Millerd33e4552010-12-14 13:01:14 -08001281}
1282
David S. Millerf2bb4be2012-07-17 12:20:47 -07001283static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
David S. Miller4895c772012-07-17 04:19:00 -07001284{
Eric Dumazetcaa41522014-09-03 22:21:56 -07001285 struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions);
David S. Miller4895c772012-07-17 04:19:00 -07001286 struct fib_nh_exception *fnhe;
1287 u32 hval;
1288
David S. Millerf2bb4be2012-07-17 12:20:47 -07001289 if (!hash)
1290 return NULL;
1291
David S. Millerd3a25c92012-07-17 13:23:08 -07001292 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -07001293
1294 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1295 fnhe = rcu_dereference(fnhe->fnhe_next)) {
David S. Millerf2bb4be2012-07-17 12:20:47 -07001296 if (fnhe->fnhe_daddr == daddr)
1297 return fnhe;
1298 }
1299 return NULL;
1300}
David S. Miller4895c772012-07-17 04:19:00 -07001301
David S. Millercaacf052012-07-31 15:06:50 -07001302static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
Wei Wanga4c2fd72017-06-17 10:42:42 -07001303 __be32 daddr, const bool do_cache)
David S. Millerf2bb4be2012-07-17 12:20:47 -07001304{
David S. Millercaacf052012-07-31 15:06:50 -07001305 bool ret = false;
1306
David S. Millerc5038a82012-07-31 15:02:02 -07001307 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +00001308
David S. Millerc5038a82012-07-31 15:02:02 -07001309 if (daddr == fnhe->fnhe_daddr) {
Timo Teräs2ffae992013-06-27 10:27:05 +03001310 struct rtable __rcu **porig;
1311 struct rtable *orig;
Timo Teräs5aad1de2013-05-27 20:46:33 +00001312 int genid = fnhe_genid(dev_net(rt->dst.dev));
Timo Teräs2ffae992013-06-27 10:27:05 +03001313
1314 if (rt_is_input_route(rt))
1315 porig = &fnhe->fnhe_rth_input;
1316 else
1317 porig = &fnhe->fnhe_rth_output;
1318 orig = rcu_dereference(*porig);
Timo Teräs5aad1de2013-05-27 20:46:33 +00001319
1320 if (fnhe->fnhe_genid != genid) {
1321 fnhe->fnhe_genid = genid;
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001322 fnhe->fnhe_gw = 0;
1323 fnhe->fnhe_pmtu = 0;
1324 fnhe->fnhe_expires = 0;
Timo Teräs2ffae992013-06-27 10:27:05 +03001325 fnhe_flush_routes(fnhe);
1326 orig = NULL;
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001327 }
Timo Teräs387aa652013-05-27 20:46:31 +00001328 fill_route_from_fnhe(rt, fnhe);
1329 if (!rt->rt_gateway)
Julian Anastasov155e8332012-10-08 11:41:18 +00001330 rt->rt_gateway = daddr;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001331
Wei Wanga4c2fd72017-06-17 10:42:42 -07001332 if (do_cache) {
Wei Wang08301062017-06-17 10:42:29 -07001333 dst_hold(&rt->dst);
Timo Teräs2ffae992013-06-27 10:27:05 +03001334 rcu_assign_pointer(*porig, rt);
Wei Wang08301062017-06-17 10:42:29 -07001335 if (orig) {
Wei Wang95c47f92017-06-17 10:42:30 -07001336 dst_dev_put(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001337 dst_release(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001338 }
Timo Teräs2ffae992013-06-27 10:27:05 +03001339 ret = true;
1340 }
David S. Millerc5038a82012-07-31 15:02:02 -07001341
1342 fnhe->fnhe_stamp = jiffies;
David S. Millerc5038a82012-07-31 15:02:02 -07001343 }
1344 spin_unlock_bh(&fnhe_lock);
David S. Millercaacf052012-07-31 15:06:50 -07001345
1346 return ret;
Eric Dumazet54764bb2012-07-31 01:08:23 +00001347}
1348
David S. Millercaacf052012-07-31 15:06:50 -07001349static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
David S. Millerf2bb4be2012-07-17 12:20:47 -07001350{
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001351 struct rtable *orig, *prev, **p;
David S. Millercaacf052012-07-31 15:06:50 -07001352 bool ret = true;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001353
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001354 if (rt_is_input_route(rt)) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001355 p = (struct rtable **)&nh->nh_rth_input;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001356 } else {
Christoph Lameter903ceff2014-08-17 12:30:35 -05001357 p = (struct rtable **)raw_cpu_ptr(nh->nh_pcpu_rth_output);
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001358 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001359 orig = *p;
1360
Wei Wang08301062017-06-17 10:42:29 -07001361 /* hold dst before doing cmpxchg() to avoid race condition
1362 * on this dst
1363 */
1364 dst_hold(&rt->dst);
David S. Millerf2bb4be2012-07-17 12:20:47 -07001365 prev = cmpxchg(p, orig, rt);
1366 if (prev == orig) {
Wei Wang08301062017-06-17 10:42:29 -07001367 if (orig) {
Wei Wang95c47f92017-06-17 10:42:30 -07001368 dst_dev_put(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001369 dst_release(&orig->dst);
Wei Wang08301062017-06-17 10:42:29 -07001370 }
1371 } else {
1372 dst_release(&rt->dst);
David S. Millercaacf052012-07-31 15:06:50 -07001373 ret = false;
Wei Wang08301062017-06-17 10:42:29 -07001374 }
David S. Millercaacf052012-07-31 15:06:50 -07001375
1376 return ret;
1377}
1378
Eric Dumazet5055c372015-01-14 15:17:06 -08001379struct uncached_list {
1380 spinlock_t lock;
1381 struct list_head head;
1382};
1383
1384static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
David S. Millercaacf052012-07-31 15:06:50 -07001385
1386static void rt_add_uncached_list(struct rtable *rt)
1387{
Eric Dumazet5055c372015-01-14 15:17:06 -08001388 struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list);
1389
1390 rt->rt_uncached_list = ul;
1391
1392 spin_lock_bh(&ul->lock);
1393 list_add_tail(&rt->rt_uncached, &ul->head);
1394 spin_unlock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001395}
1396
1397static void ipv4_dst_destroy(struct dst_entry *dst)
1398{
Eric Dumazet3fb07da2017-05-25 14:27:35 -07001399 struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
David S. Millercaacf052012-07-31 15:06:50 -07001400 struct rtable *rt = (struct rtable *) dst;
1401
Eric Dumazet9620fef2017-08-18 12:08:07 -07001402 if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
Eric Dumazet3fb07da2017-05-25 14:27:35 -07001403 kfree(p);
1404
Eric Dumazet78df76a2012-08-24 05:40:47 +00001405 if (!list_empty(&rt->rt_uncached)) {
Eric Dumazet5055c372015-01-14 15:17:06 -08001406 struct uncached_list *ul = rt->rt_uncached_list;
1407
1408 spin_lock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001409 list_del(&rt->rt_uncached);
Eric Dumazet5055c372015-01-14 15:17:06 -08001410 spin_unlock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001411 }
1412}
1413
1414void rt_flush_dev(struct net_device *dev)
1415{
Eric Dumazet5055c372015-01-14 15:17:06 -08001416 struct net *net = dev_net(dev);
1417 struct rtable *rt;
1418 int cpu;
David S. Millercaacf052012-07-31 15:06:50 -07001419
Eric Dumazet5055c372015-01-14 15:17:06 -08001420 for_each_possible_cpu(cpu) {
1421 struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
1422
1423 spin_lock_bh(&ul->lock);
1424 list_for_each_entry(rt, &ul->head, rt_uncached) {
David S. Millercaacf052012-07-31 15:06:50 -07001425 if (rt->dst.dev != dev)
1426 continue;
1427 rt->dst.dev = net->loopback_dev;
1428 dev_hold(rt->dst.dev);
1429 dev_put(dev);
1430 }
Eric Dumazet5055c372015-01-14 15:17:06 -08001431 spin_unlock_bh(&ul->lock);
David S. Miller4895c772012-07-17 04:19:00 -07001432 }
1433}
1434
Eric Dumazet4331deb2012-07-25 05:11:23 +00001435static bool rt_cache_valid(const struct rtable *rt)
David S. Millerd2d68ba2012-07-17 12:58:50 -07001436{
Eric Dumazet4331deb2012-07-25 05:11:23 +00001437 return rt &&
1438 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1439 !rt_is_expired(rt);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001440}
1441
David S. Millerf2bb4be2012-07-17 12:20:47 -07001442static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
David S. Miller5e2b61f2011-03-04 21:47:09 -08001443 const struct fib_result *res,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001444 struct fib_nh_exception *fnhe,
Wei Wanga4c2fd72017-06-17 10:42:42 -07001445 struct fib_info *fi, u16 type, u32 itag,
1446 const bool do_cache)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001447{
David S. Millercaacf052012-07-31 15:06:50 -07001448 bool cached = false;
1449
Linus Torvalds1da177e2005-04-16 15:20:36 -07001450 if (fi) {
David S. Miller4895c772012-07-17 04:19:00 -07001451 struct fib_nh *nh = &FIB_RES_NH(*res);
1452
Julian Anastasov155e8332012-10-08 11:41:18 +00001453 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) {
David S. Miller4895c772012-07-17 04:19:00 -07001454 rt->rt_gateway = nh->nh_gw;
Julian Anastasov155e8332012-10-08 11:41:18 +00001455 rt->rt_uses_gateway = 1;
1456 }
Eric Dumazet3fb07da2017-05-25 14:27:35 -07001457 dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true);
1458 if (fi->fib_metrics != &dst_default_metrics) {
1459 rt->dst._metrics |= DST_METRICS_REFCOUNTED;
Eric Dumazet9620fef2017-08-18 12:08:07 -07001460 refcount_inc(&fi->fib_metrics->refcnt);
Eric Dumazet3fb07da2017-05-25 14:27:35 -07001461 }
Patrick McHardyc7066f72011-01-14 13:36:42 +01001462#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf2bb4be2012-07-17 12:20:47 -07001463 rt->dst.tclassid = nh->nh_tclassid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001464#endif
Jiri Benc61adedf2015-08-20 13:56:25 +02001465 rt->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
David S. Millerc5038a82012-07-31 15:02:02 -07001466 if (unlikely(fnhe))
Wei Wanga4c2fd72017-06-17 10:42:42 -07001467 cached = rt_bind_exception(rt, fnhe, daddr, do_cache);
1468 else if (do_cache)
David S. Millercaacf052012-07-31 15:06:50 -07001469 cached = rt_cache_route(nh, rt);
Julian Anastasov155e8332012-10-08 11:41:18 +00001470 if (unlikely(!cached)) {
1471 /* Routes we intend to cache in nexthop exception or
1472 * FIB nexthop have the DST_NOCACHE bit clear.
1473 * However, if we are unsuccessful at storing this
1474 * route into the cache we really need to set it.
1475 */
Julian Anastasov155e8332012-10-08 11:41:18 +00001476 if (!rt->rt_gateway)
1477 rt->rt_gateway = daddr;
1478 rt_add_uncached_list(rt);
1479 }
1480 } else
David S. Millercaacf052012-07-31 15:06:50 -07001481 rt_add_uncached_list(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001482
Patrick McHardyc7066f72011-01-14 13:36:42 +01001483#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001484#ifdef CONFIG_IP_MULTIPLE_TABLES
David S. Miller85b91b02012-07-13 08:21:29 -07001485 set_class_tag(rt, res->tclassid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001486#endif
1487 set_class_tag(rt, itag);
1488#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001489}
1490
David Ahern9ab179d2016-04-07 11:10:06 -07001491struct rtable *rt_dst_alloc(struct net_device *dev,
1492 unsigned int flags, u16 type,
1493 bool nopolicy, bool noxfrm, bool will_cache)
David S. Miller0c4dcd52011-02-17 15:42:37 -08001494{
David Ahernd08c4f32015-09-02 13:58:34 -07001495 struct rtable *rt;
1496
1497 rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
Wei Wanga4c2fd72017-06-17 10:42:42 -07001498 (will_cache ? 0 : DST_HOST) |
David Ahernd08c4f32015-09-02 13:58:34 -07001499 (nopolicy ? DST_NOPOLICY : 0) |
Wei Wangb2a9c0e2017-06-17 10:42:41 -07001500 (noxfrm ? DST_NOXFRM : 0));
David Ahernd08c4f32015-09-02 13:58:34 -07001501
1502 if (rt) {
1503 rt->rt_genid = rt_genid_ipv4(dev_net(dev));
1504 rt->rt_flags = flags;
1505 rt->rt_type = type;
1506 rt->rt_is_input = 0;
1507 rt->rt_iif = 0;
1508 rt->rt_pmtu = 0;
1509 rt->rt_gateway = 0;
1510 rt->rt_uses_gateway = 0;
David Ahernb7503e02015-09-02 13:58:35 -07001511 rt->rt_table_id = 0;
David Ahernd08c4f32015-09-02 13:58:34 -07001512 INIT_LIST_HEAD(&rt->rt_uncached);
1513
1514 rt->dst.output = ip_output;
1515 if (flags & RTCF_LOCAL)
1516 rt->dst.input = ip_local_deliver;
1517 }
1518
1519 return rt;
David S. Miller0c4dcd52011-02-17 15:42:37 -08001520}
David Ahern9ab179d2016-04-07 11:10:06 -07001521EXPORT_SYMBOL(rt_dst_alloc);
David S. Miller0c4dcd52011-02-17 15:42:37 -08001522
Eric Dumazet96d36222010-06-02 19:21:31 +00001523/* called in rcu_read_lock() section */
Paolo Abenibc044e82017-09-28 15:51:37 +02001524int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1525 u8 tos, struct net_device *dev,
1526 struct in_device *in_dev, u32 *itag)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001527{
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001528 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001529
1530 /* Primary sanity checks. */
Ian Morris51456b22015-04-03 09:17:26 +01001531 if (!in_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532 return -EINVAL;
1533
Jan Engelhardt1e637c72008-01-21 03:18:08 -08001534 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
Thomas Grafd0daebc32012-06-12 00:44:01 +00001535 skb->protocol != htons(ETH_P_IP))
Paolo Abenibc044e82017-09-28 15:51:37 +02001536 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001537
Alexander Duyck75fea732015-09-28 11:10:38 -07001538 if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
Paolo Abenibc044e82017-09-28 15:51:37 +02001539 return -EINVAL;
Thomas Grafd0daebc32012-06-12 00:44:01 +00001540
Joe Perchesf97c1e02007-12-16 13:45:43 -08001541 if (ipv4_is_zeronet(saddr)) {
1542 if (!ipv4_is_local_multicast(daddr))
Paolo Abenibc044e82017-09-28 15:51:37 +02001543 return -EINVAL;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001544 } else {
David S. Miller9e56e382012-06-28 18:54:02 -07001545 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
Paolo Abenibc044e82017-09-28 15:51:37 +02001546 in_dev, itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001547 if (err < 0)
Paolo Abenibc044e82017-09-28 15:51:37 +02001548 return err;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001549 }
Paolo Abenibc044e82017-09-28 15:51:37 +02001550 return 0;
1551}
1552
1553/* called in rcu_read_lock() section */
1554static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1555 u8 tos, struct net_device *dev, int our)
1556{
1557 struct in_device *in_dev = __in_dev_get_rcu(dev);
1558 unsigned int flags = RTCF_MULTICAST;
1559 struct rtable *rth;
1560 u32 itag = 0;
1561 int err;
1562
1563 err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag);
1564 if (err)
1565 return err;
1566
David Ahernd08c4f32015-09-02 13:58:34 -07001567 if (our)
1568 flags |= RTCF_LOCAL;
1569
1570 rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001571 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001572 if (!rth)
Paolo Abenibc044e82017-09-28 15:51:37 +02001573 return -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001574
Patrick McHardyc7066f72011-01-14 13:36:42 +01001575#ifdef CONFIG_IP_ROUTE_CLASSID
Changli Gaod8d1f302010-06-10 23:31:35 -07001576 rth->dst.tclassid = itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001577#endif
David S. Millercf911662011-04-28 14:31:47 -07001578 rth->dst.output = ip_rt_bug;
David S. Miller9917e1e82012-07-17 14:44:26 -07001579 rth->rt_is_input= 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001580
1581#ifdef CONFIG_IP_MROUTE
Joe Perchesf97c1e02007-12-16 13:45:43 -08001582 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
Changli Gaod8d1f302010-06-10 23:31:35 -07001583 rth->dst.input = ip_mr_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001584#endif
1585 RT_CACHE_STAT_INC(in_slow_mc);
1586
David S. Miller89aef892012-07-17 11:00:09 -07001587 skb_dst_set(skb, &rth->dst);
1588 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001589}
1590
1591
1592static void ip_handle_martian_source(struct net_device *dev,
1593 struct in_device *in_dev,
1594 struct sk_buff *skb,
Al Viro9e12bb22006-09-26 21:25:20 -07001595 __be32 daddr,
1596 __be32 saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001597{
1598 RT_CACHE_STAT_INC(in_martian_src);
1599#ifdef CONFIG_IP_ROUTE_VERBOSE
1600 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1601 /*
1602 * RFC1812 recommendation, if source is martian,
1603 * the only hint is MAC header.
1604 */
Joe Perches058bd4d2012-03-11 18:36:11 +00001605 pr_warn("martian source %pI4 from %pI4, on dev %s\n",
Harvey Harrison673d57e2008-10-31 00:53:57 -07001606 &daddr, &saddr, dev->name);
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001607 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
Joe Perches058bd4d2012-03-11 18:36:11 +00001608 print_hex_dump(KERN_WARNING, "ll header: ",
1609 DUMP_PREFIX_OFFSET, 16, 1,
1610 skb_mac_header(skb),
1611 dev->hard_header_len, true);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612 }
1613 }
1614#endif
1615}
1616
Xin Longdeed49d2016-02-18 21:21:19 +08001617static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
1618{
1619 struct fnhe_hash_bucket *hash;
1620 struct fib_nh_exception *fnhe, __rcu **fnhe_p;
1621 u32 hval = fnhe_hashfun(daddr);
1622
1623 spin_lock_bh(&fnhe_lock);
1624
1625 hash = rcu_dereference_protected(nh->nh_exceptions,
1626 lockdep_is_held(&fnhe_lock));
1627 hash += hval;
1628
1629 fnhe_p = &hash->chain;
1630 fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
1631 while (fnhe) {
1632 if (fnhe->fnhe_daddr == daddr) {
1633 rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
1634 fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
1635 fnhe_flush_routes(fnhe);
1636 kfree_rcu(fnhe, rcu);
1637 break;
1638 }
1639 fnhe_p = &fnhe->fnhe_next;
1640 fnhe = rcu_dereference_protected(fnhe->fnhe_next,
1641 lockdep_is_held(&fnhe_lock));
1642 }
1643
1644 spin_unlock_bh(&fnhe_lock);
1645}
1646
Thomas Grafefd85702016-11-30 17:10:09 +01001647static void set_lwt_redirect(struct rtable *rth)
1648{
1649 if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
1650 rth->dst.lwtstate->orig_output = rth->dst.output;
1651 rth->dst.output = lwtunnel_output;
1652 }
1653
1654 if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
1655 rth->dst.lwtstate->orig_input = rth->dst.input;
1656 rth->dst.input = lwtunnel_input;
1657 }
1658}
1659
Eric Dumazet47360222010-06-03 04:13:21 +00001660/* called in rcu_read_lock() section */
Stephen Hemminger5969f712008-04-10 01:52:09 -07001661static int __mkroute_input(struct sk_buff *skb,
David S. Miller982721f2011-02-16 21:44:24 -08001662 const struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001663 struct in_device *in_dev,
David S. Millerc6cffba2012-07-26 11:14:38 +00001664 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001665{
Timo Teräs2ffae992013-06-27 10:27:05 +03001666 struct fib_nh_exception *fnhe;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001667 struct rtable *rth;
1668 int err;
1669 struct in_device *out_dev;
David S. Millerd2d68ba2012-07-17 12:58:50 -07001670 bool do_cache;
Li RongQingfbdc0ad2014-05-22 16:36:55 +08001671 u32 itag = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001672
1673 /* get a working reference to the output device */
Eric Dumazet47360222010-06-03 04:13:21 +00001674 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
Ian Morris51456b22015-04-03 09:17:26 +01001675 if (!out_dev) {
Joe Perchese87cc472012-05-13 21:56:26 +00001676 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001677 return -EINVAL;
1678 }
1679
Michael Smith5c04c812011-04-07 04:51:50 +00001680 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
David S. Miller9e56e382012-06-28 18:54:02 -07001681 in_dev->dev, in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001682 if (err < 0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001683 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684 saddr);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001685
Linus Torvalds1da177e2005-04-16 15:20:36 -07001686 goto cleanup;
1687 }
1688
Julian Anastasove81da0e2012-10-08 11:41:15 +00001689 do_cache = res->fi && !itag;
1690 if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
Hannes Frederic Sowadf4d9252015-01-23 12:01:26 +01001691 skb->protocol == htons(ETH_P_IP) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001692 (IN_DEV_SHARED_MEDIA(out_dev) ||
Hannes Frederic Sowadf4d9252015-01-23 12:01:26 +01001693 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1694 IPCB(skb)->flags |= IPSKB_DOREDIRECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695
1696 if (skb->protocol != htons(ETH_P_IP)) {
1697 /* Not IP (i.e. ARP). Do not create route, if it is
1698 * invalid for proxy arp. DNAT routes are always valid.
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001699 *
1700 * Proxy arp feature have been extended to allow, ARP
1701 * replies back to the same interface, to support
1702 * Private VLAN switch technologies. See arp.c.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001703 */
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001704 if (out_dev == in_dev &&
1705 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001706 err = -EINVAL;
1707 goto cleanup;
1708 }
1709 }
1710
Timo Teräs2ffae992013-06-27 10:27:05 +03001711 fnhe = find_exception(&FIB_RES_NH(*res), daddr);
Julian Anastasove81da0e2012-10-08 11:41:15 +00001712 if (do_cache) {
Xin Longdeed49d2016-02-18 21:21:19 +08001713 if (fnhe) {
Timo Teräs2ffae992013-06-27 10:27:05 +03001714 rth = rcu_dereference(fnhe->fnhe_rth_input);
Xin Longdeed49d2016-02-18 21:21:19 +08001715 if (rth && rth->dst.expires &&
1716 time_after(jiffies, rth->dst.expires)) {
1717 ip_del_fnhe(&FIB_RES_NH(*res), daddr);
1718 fnhe = NULL;
1719 } else {
1720 goto rt_cache;
1721 }
1722 }
Timo Teräs2ffae992013-06-27 10:27:05 +03001723
Xin Longdeed49d2016-02-18 21:21:19 +08001724 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1725
1726rt_cache:
Julian Anastasove81da0e2012-10-08 11:41:15 +00001727 if (rt_cache_valid(rth)) {
1728 skb_dst_set_noref(skb, &rth->dst);
1729 goto out;
David S. Millerd2d68ba2012-07-17 12:58:50 -07001730 }
1731 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001732
David Ahernd08c4f32015-09-02 13:58:34 -07001733 rth = rt_dst_alloc(out_dev->dev, 0, res->type,
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001734 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerd2d68ba2012-07-17 12:58:50 -07001735 IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001736 if (!rth) {
1737 err = -ENOBUFS;
1738 goto cleanup;
1739 }
1740
David S. Miller9917e1e82012-07-17 14:44:26 -07001741 rth->rt_is_input = 1;
David Ahernb7503e02015-09-02 13:58:35 -07001742 if (res->table)
1743 rth->rt_table_id = res->table->tb_id;
Duan Jionga6254862014-02-17 15:23:43 +08001744 RT_CACHE_STAT_INC(in_slow_tot);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001745
Changli Gaod8d1f302010-06-10 23:31:35 -07001746 rth->dst.input = ip_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001747
Wei Wanga4c2fd72017-06-17 10:42:42 -07001748 rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag,
1749 do_cache);
Thomas Grafefd85702016-11-30 17:10:09 +01001750 set_lwt_redirect(rth);
David S. Millerc6cffba2012-07-26 11:14:38 +00001751 skb_dst_set(skb, &rth->dst);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001752out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001753 err = 0;
1754 cleanup:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001755 return err;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001756}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001757
Peter Nørlund79a13152015-09-30 10:12:22 +02001758#ifdef CONFIG_IP_ROUTE_MULTIPATH
Peter Nørlund79a13152015-09-30 10:12:22 +02001759/* To make ICMP packets follow the right flow, the multipath hash is
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001760 * calculated from the inner IP addresses.
Peter Nørlund79a13152015-09-30 10:12:22 +02001761 */
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001762static void ip_multipath_l3_keys(const struct sk_buff *skb,
1763 struct flow_keys *hash_keys)
Peter Nørlund79a13152015-09-30 10:12:22 +02001764{
1765 const struct iphdr *outer_iph = ip_hdr(skb);
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001766 const struct iphdr *inner_iph;
Peter Nørlund79a13152015-09-30 10:12:22 +02001767 const struct icmphdr *icmph;
1768 struct iphdr _inner_iph;
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001769 struct icmphdr _icmph;
1770
1771 hash_keys->addrs.v4addrs.src = outer_iph->saddr;
1772 hash_keys->addrs.v4addrs.dst = outer_iph->daddr;
1773 if (likely(outer_iph->protocol != IPPROTO_ICMP))
1774 return;
Peter Nørlund79a13152015-09-30 10:12:22 +02001775
1776 if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001777 return;
Peter Nørlund79a13152015-09-30 10:12:22 +02001778
1779 icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
1780 &_icmph);
1781 if (!icmph)
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001782 return;
Peter Nørlund79a13152015-09-30 10:12:22 +02001783
1784 if (icmph->type != ICMP_DEST_UNREACH &&
1785 icmph->type != ICMP_REDIRECT &&
1786 icmph->type != ICMP_TIME_EXCEEDED &&
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001787 icmph->type != ICMP_PARAMETERPROB)
1788 return;
Peter Nørlund79a13152015-09-30 10:12:22 +02001789
1790 inner_iph = skb_header_pointer(skb,
1791 outer_iph->ihl * 4 + sizeof(_icmph),
1792 sizeof(_inner_iph), &_inner_iph);
1793 if (!inner_iph)
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001794 return;
1795 hash_keys->addrs.v4addrs.src = inner_iph->saddr;
1796 hash_keys->addrs.v4addrs.dst = inner_iph->daddr;
Peter Nørlund79a13152015-09-30 10:12:22 +02001797}
1798
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001799/* if skb is set it will be used and fl4 can be NULL */
1800int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
1801 const struct sk_buff *skb)
1802{
1803 struct net *net = fi->fib_net;
1804 struct flow_keys hash_keys;
1805 u32 mhash;
1806
1807 switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
1808 case 0:
1809 memset(&hash_keys, 0, sizeof(hash_keys));
1810 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1811 if (skb) {
1812 ip_multipath_l3_keys(skb, &hash_keys);
1813 } else {
1814 hash_keys.addrs.v4addrs.src = fl4->saddr;
1815 hash_keys.addrs.v4addrs.dst = fl4->daddr;
1816 }
1817 break;
1818 case 1:
1819 /* skb is currently provided only when forwarding */
1820 if (skb) {
1821 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
1822 struct flow_keys keys;
1823
1824 /* short-circuit if we already have L4 hash present */
1825 if (skb->l4_hash)
1826 return skb_get_hash_raw(skb) >> 1;
1827 memset(&hash_keys, 0, sizeof(hash_keys));
1828 skb_flow_dissect_flow_keys(skb, &keys, flag);
1829 hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
1830 hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
1831 hash_keys.ports.src = keys.ports.src;
1832 hash_keys.ports.dst = keys.ports.dst;
1833 hash_keys.basic.ip_proto = keys.basic.ip_proto;
1834 } else {
1835 memset(&hash_keys, 0, sizeof(hash_keys));
1836 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1837 hash_keys.addrs.v4addrs.src = fl4->saddr;
1838 hash_keys.addrs.v4addrs.dst = fl4->daddr;
1839 hash_keys.ports.src = fl4->fl4_sport;
1840 hash_keys.ports.dst = fl4->fl4_dport;
1841 hash_keys.basic.ip_proto = fl4->flowi4_proto;
1842 }
1843 break;
1844 }
1845 mhash = flow_hash_from_keys(&hash_keys);
1846
1847 return mhash >> 1;
1848}
1849EXPORT_SYMBOL_GPL(fib_multipath_hash);
Peter Nørlund79a13152015-09-30 10:12:22 +02001850#endif /* CONFIG_IP_ROUTE_MULTIPATH */
1851
Stephen Hemminger5969f712008-04-10 01:52:09 -07001852static int ip_mkroute_input(struct sk_buff *skb,
1853 struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001854 struct in_device *in_dev,
1855 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001856{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001857#ifdef CONFIG_IP_ROUTE_MULTIPATH
Peter Nørlund0e884c72015-09-30 10:12:21 +02001858 if (res->fi && res->fi->fib_nhs > 1) {
Nikolay Aleksandrovbf4e0a32017-03-16 15:28:00 +02001859 int h = fib_multipath_hash(res->fi, NULL, skb);
Peter Nørlund0e884c72015-09-30 10:12:21 +02001860
Peter Nørlund0e884c72015-09-30 10:12:21 +02001861 fib_select_multipath(res, h);
1862 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001863#endif
1864
1865 /* create a routing cache entry */
David S. Millerc6cffba2012-07-26 11:14:38 +00001866 return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001867}
1868
Linus Torvalds1da177e2005-04-16 15:20:36 -07001869/*
1870 * NOTE. We drop all the packets that has local source
1871 * addresses, because every properly looped back packet
1872 * must have correct destination already attached by output routine.
1873 *
1874 * Such approach solves two big problems:
1875 * 1. Not simplex devices are handled properly.
1876 * 2. IP spoofing attempts are filtered with 100% of guarantee.
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001877 * called with rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001878 */
1879
Al Viro9e12bb22006-09-26 21:25:20 -07001880static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
David Ahern5510cdf2017-05-25 10:42:34 -07001881 u8 tos, struct net_device *dev,
1882 struct fib_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001883{
Eric Dumazet96d36222010-06-02 19:21:31 +00001884 struct in_device *in_dev = __in_dev_get_rcu(dev);
Thomas Graf1b7179d2015-07-21 10:43:59 +02001885 struct ip_tunnel_info *tun_info;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001886 struct flowi4 fl4;
Eric Dumazet95c96172012-04-15 05:58:06 +00001887 unsigned int flags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001888 u32 itag = 0;
Eric Dumazet95c96172012-04-15 05:58:06 +00001889 struct rtable *rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001890 int err = -EINVAL;
Daniel Baluta5e73ea12012-04-15 01:34:41 +00001891 struct net *net = dev_net(dev);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001892 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001893
1894 /* IP on this device is disabled. */
1895
1896 if (!in_dev)
1897 goto out;
1898
1899 /* Check for the most weird martians, which can be not detected
1900 by fib_lookup.
1901 */
1902
Jiri Benc61adedf2015-08-20 13:56:25 +02001903 tun_info = skb_tunnel_info(skb);
Jiri Benc46fa0622015-08-28 20:48:19 +02001904 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
Thomas Graf1b7179d2015-07-21 10:43:59 +02001905 fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id;
1906 else
1907 fl4.flowi4_tun_key.tun_id = 0;
Thomas Graff38a9eb2015-07-21 10:43:56 +02001908 skb_dst_drop(skb);
1909
Thomas Grafd0daebc32012-06-12 00:44:01 +00001910 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001911 goto martian_source;
1912
David Ahern5510cdf2017-05-25 10:42:34 -07001913 res->fi = NULL;
1914 res->table = NULL;
Andy Walls27a954b2010-10-17 15:11:22 +00001915 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001916 goto brd_input;
1917
1918 /* Accept zero addresses only to limited broadcast;
1919 * I even do not know to fix it or not. Waiting for complains :-)
1920 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001921 if (ipv4_is_zeronet(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001922 goto martian_source;
1923
Thomas Grafd0daebc32012-06-12 00:44:01 +00001924 if (ipv4_is_zeronet(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001925 goto martian_destination;
1926
Eric Dumazet9eb43e72012-08-03 21:27:25 +00001927 /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
1928 * and call it once if daddr or/and saddr are loopback addresses
1929 */
1930 if (ipv4_is_loopback(daddr)) {
1931 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00001932 goto martian_destination;
Eric Dumazet9eb43e72012-08-03 21:27:25 +00001933 } else if (ipv4_is_loopback(saddr)) {
1934 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00001935 goto martian_source;
1936 }
1937
Linus Torvalds1da177e2005-04-16 15:20:36 -07001938 /*
1939 * Now we are ready to route packet.
1940 */
David S. Miller68a5e3d2011-03-11 20:07:33 -05001941 fl4.flowi4_oif = 0;
David Aherne0d56fd2016-09-10 12:09:57 -07001942 fl4.flowi4_iif = dev->ifindex;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001943 fl4.flowi4_mark = skb->mark;
1944 fl4.flowi4_tos = tos;
1945 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
David Ahernb84f7872015-09-29 19:07:07 -07001946 fl4.flowi4_flags = 0;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001947 fl4.daddr = daddr;
1948 fl4.saddr = saddr;
Julian Anastasov8bcfd092017-02-26 15:50:52 +02001949 fl4.flowi4_uid = sock_net_uid(net, NULL);
David Ahern5510cdf2017-05-25 10:42:34 -07001950 err = fib_lookup(net, &fl4, res, 0);
Duan Jiongcd0f0b92014-02-14 18:26:22 +08001951 if (err != 0) {
1952 if (!IN_DEV_FORWARD(in_dev))
1953 err = -EHOSTUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001954 goto no_route;
Duan Jiongcd0f0b92014-02-14 18:26:22 +08001955 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001956
David Ahern5510cdf2017-05-25 10:42:34 -07001957 if (res->type == RTN_BROADCAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001958 goto brd_input;
1959
David Ahern5510cdf2017-05-25 10:42:34 -07001960 if (res->type == RTN_LOCAL) {
Michael Smith5c04c812011-04-07 04:51:50 +00001961 err = fib_validate_source(skb, saddr, daddr, tos,
Cong Wang0d5edc62014-04-15 16:25:35 -07001962 0, dev, in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001963 if (err < 0)
David Ahern0d753962015-09-28 11:10:44 -07001964 goto martian_source;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001965 goto local_input;
1966 }
1967
Duan Jiongcd0f0b92014-02-14 18:26:22 +08001968 if (!IN_DEV_FORWARD(in_dev)) {
1969 err = -EHOSTUNREACH;
David S. Miller251da412012-06-26 16:27:09 -07001970 goto no_route;
Duan Jiongcd0f0b92014-02-14 18:26:22 +08001971 }
David Ahern5510cdf2017-05-25 10:42:34 -07001972 if (res->type != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001973 goto martian_destination;
1974
David Ahern5510cdf2017-05-25 10:42:34 -07001975 err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001976out: return err;
1977
1978brd_input:
1979 if (skb->protocol != htons(ETH_P_IP))
1980 goto e_inval;
1981
David S. Miller41347dc2012-06-28 04:05:27 -07001982 if (!ipv4_is_zeronet(saddr)) {
David S. Miller9e56e382012-06-28 18:54:02 -07001983 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1984 in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001985 if (err < 0)
David Ahern0d753962015-09-28 11:10:44 -07001986 goto martian_source;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001987 }
1988 flags |= RTCF_BROADCAST;
David Ahern5510cdf2017-05-25 10:42:34 -07001989 res->type = RTN_BROADCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001990 RT_CACHE_STAT_INC(in_brd);
1991
1992local_input:
David S. Millerd2d68ba2012-07-17 12:58:50 -07001993 do_cache = false;
David Ahern5510cdf2017-05-25 10:42:34 -07001994 if (res->fi) {
David S. Millerfe3edf42012-07-23 13:22:20 -07001995 if (!itag) {
David Ahern5510cdf2017-05-25 10:42:34 -07001996 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001997 if (rt_cache_valid(rth)) {
David S. Millerc6cffba2012-07-26 11:14:38 +00001998 skb_dst_set_noref(skb, &rth->dst);
1999 err = 0;
2000 goto out;
David S. Millerd2d68ba2012-07-17 12:58:50 -07002001 }
2002 do_cache = true;
2003 }
2004 }
2005
David Ahernf5a0aab2016-12-29 15:29:03 -08002006 rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
David Ahern5510cdf2017-05-25 10:42:34 -07002007 flags | RTCF_LOCAL, res->type,
David S. Millerd2d68ba2012-07-17 12:58:50 -07002008 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002009 if (!rth)
2010 goto e_nobufs;
2011
Changli Gaod8d1f302010-06-10 23:31:35 -07002012 rth->dst.output= ip_rt_bug;
David S. Millercf911662011-04-28 14:31:47 -07002013#ifdef CONFIG_IP_ROUTE_CLASSID
2014 rth->dst.tclassid = itag;
2015#endif
David S. Miller9917e1e82012-07-17 14:44:26 -07002016 rth->rt_is_input = 1;
David Ahern5510cdf2017-05-25 10:42:34 -07002017 if (res->table)
2018 rth->rt_table_id = res->table->tb_id;
Roopa Prabhu571e7222015-07-21 10:43:47 +02002019
Duan Jionga6254862014-02-17 15:23:43 +08002020 RT_CACHE_STAT_INC(in_slow_tot);
David Ahern5510cdf2017-05-25 10:42:34 -07002021 if (res->type == RTN_UNREACHABLE) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002022 rth->dst.input= ip_error;
2023 rth->dst.error= -err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002024 rth->rt_flags &= ~RTCF_LOCAL;
2025 }
Thomas Grafefd85702016-11-30 17:10:09 +01002026
Alexei Starovoitovdcdfdf52013-11-19 19:12:34 -08002027 if (do_cache) {
David Ahern5510cdf2017-05-25 10:42:34 -07002028 struct fib_nh *nh = &FIB_RES_NH(*res);
Thomas Grafefd85702016-11-30 17:10:09 +01002029
2030 rth->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
2031 if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
2032 WARN_ON(rth->dst.input == lwtunnel_input);
2033 rth->dst.lwtstate->orig_input = rth->dst.input;
2034 rth->dst.input = lwtunnel_input;
2035 }
2036
Wei Wanga4c2fd72017-06-17 10:42:42 -07002037 if (unlikely(!rt_cache_route(nh, rth)))
Alexei Starovoitovdcdfdf52013-11-19 19:12:34 -08002038 rt_add_uncached_list(rth);
Alexei Starovoitovdcdfdf52013-11-19 19:12:34 -08002039 }
David S. Miller89aef892012-07-17 11:00:09 -07002040 skb_dst_set(skb, &rth->dst);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002041 err = 0;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002042 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002043
2044no_route:
2045 RT_CACHE_STAT_INC(in_no_route);
David Ahern5510cdf2017-05-25 10:42:34 -07002046 res->type = RTN_UNREACHABLE;
2047 res->fi = NULL;
2048 res->table = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002049 goto local_input;
2050
2051 /*
2052 * Do not cache martian addresses: they should be logged (RFC1812)
2053 */
2054martian_destination:
2055 RT_CACHE_STAT_INC(in_martian_dst);
2056#ifdef CONFIG_IP_ROUTE_VERBOSE
Joe Perchese87cc472012-05-13 21:56:26 +00002057 if (IN_DEV_LOG_MARTIANS(in_dev))
2058 net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
2059 &daddr, &saddr, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002060#endif
Dietmar Eggemann2c2910a2005-06-28 13:06:23 -07002061
Linus Torvalds1da177e2005-04-16 15:20:36 -07002062e_inval:
2063 err = -EINVAL;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002064 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002065
2066e_nobufs:
2067 err = -ENOBUFS;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002068 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002069
2070martian_source:
2071 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002072 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002073}
2074
David S. Millerc6cffba2012-07-26 11:14:38 +00002075int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2076 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002077{
David Ahern5510cdf2017-05-25 10:42:34 -07002078 struct fib_result res;
2079 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002080
Julian Anastasov6e280992017-02-26 17:14:35 +02002081 tos &= IPTOS_RT_MASK;
Eric Dumazet96d36222010-06-02 19:21:31 +00002082 rcu_read_lock();
David Ahern5510cdf2017-05-25 10:42:34 -07002083 err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res);
2084 rcu_read_unlock();
Eric Dumazet96d36222010-06-02 19:21:31 +00002085
David Ahern5510cdf2017-05-25 10:42:34 -07002086 return err;
2087}
2088EXPORT_SYMBOL(ip_route_input_noref);
2089
2090/* called with rcu_read_lock held */
2091int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2092 u8 tos, struct net_device *dev, struct fib_result *res)
2093{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002094 /* Multicast recognition logic is moved from route cache to here.
2095 The problem was that too many Ethernet cards have broken/missing
2096 hardware multicast filters :-( As result the host on multicasting
2097 network acquires a lot of useless route cache entries, sort of
2098 SDR messages from all the world. Now we try to get rid of them.
2099 Really, provided software IP multicast filter is organized
2100 reasonably (at least, hashed), it does not result in a slowdown
2101 comparing with route cache reject entries.
2102 Note, that multicast routers are not affected, because
2103 route cache entry is created eventually.
2104 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08002105 if (ipv4_is_multicast(daddr)) {
Eric Dumazet96d36222010-06-02 19:21:31 +00002106 struct in_device *in_dev = __in_dev_get_rcu(dev);
David Aherne58e4152016-10-31 15:54:00 -07002107 int our = 0;
David Ahern5510cdf2017-05-25 10:42:34 -07002108 int err = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002109
David Aherne58e4152016-10-31 15:54:00 -07002110 if (in_dev)
2111 our = ip_check_mc_rcu(in_dev, daddr, saddr,
2112 ip_hdr(skb)->protocol);
2113
2114 /* check l3 master if no match yet */
2115 if ((!in_dev || !our) && netif_is_l3_slave(dev)) {
2116 struct in_device *l3_in_dev;
2117
2118 l3_in_dev = __in_dev_get_rcu(skb->dev);
2119 if (l3_in_dev)
2120 our = ip_check_mc_rcu(l3_in_dev, daddr, saddr,
2121 ip_hdr(skb)->protocol);
2122 }
2123
David Aherne58e4152016-10-31 15:54:00 -07002124 if (our
Linus Torvalds1da177e2005-04-16 15:20:36 -07002125#ifdef CONFIG_IP_MROUTE
David Aherne58e4152016-10-31 15:54:00 -07002126 ||
2127 (!ipv4_is_local_multicast(daddr) &&
2128 IN_DEV_MFORWARD(in_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002129#endif
David Aherne58e4152016-10-31 15:54:00 -07002130 ) {
David Ahern5510cdf2017-05-25 10:42:34 -07002131 err = ip_route_input_mc(skb, daddr, saddr,
David Aherne58e4152016-10-31 15:54:00 -07002132 tos, dev, our);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002133 }
David Ahern5510cdf2017-05-25 10:42:34 -07002134 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002135 }
David Ahern5510cdf2017-05-25 10:42:34 -07002136
2137 return ip_route_input_slow(skb, daddr, saddr, tos, dev, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002138}
2139
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002140/* called with rcu_read_lock() */
David S. Miller982721f2011-02-16 21:44:24 -08002141static struct rtable *__mkroute_output(const struct fib_result *res,
David Miller1a00fee2012-07-01 02:02:56 +00002142 const struct flowi4 *fl4, int orig_oif,
Julian Anastasovf61759e2011-12-02 11:39:42 +00002143 struct net_device *dev_out,
David S. Miller5ada5522011-02-17 15:29:00 -08002144 unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002145{
David S. Miller982721f2011-02-16 21:44:24 -08002146 struct fib_info *fi = res->fi;
David S. Millerf2bb4be2012-07-17 12:20:47 -07002147 struct fib_nh_exception *fnhe;
David S. Miller5ada5522011-02-17 15:29:00 -08002148 struct in_device *in_dev;
David S. Miller982721f2011-02-16 21:44:24 -08002149 u16 type = res->type;
David S. Miller5ada5522011-02-17 15:29:00 -08002150 struct rtable *rth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00002151 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002152
Thomas Grafd0daebc32012-06-12 00:44:01 +00002153 in_dev = __in_dev_get_rcu(dev_out);
2154 if (!in_dev)
David S. Miller5ada5522011-02-17 15:29:00 -08002155 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002156
Thomas Grafd0daebc32012-06-12 00:44:01 +00002157 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
David Ahern5f02ce242016-09-10 12:09:54 -07002158 if (ipv4_is_loopback(fl4->saddr) &&
2159 !(dev_out->flags & IFF_LOOPBACK) &&
2160 !netif_is_l3_master(dev_out))
Thomas Grafd0daebc32012-06-12 00:44:01 +00002161 return ERR_PTR(-EINVAL);
2162
David S. Miller68a5e3d2011-03-11 20:07:33 -05002163 if (ipv4_is_lbcast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08002164 type = RTN_BROADCAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002165 else if (ipv4_is_multicast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08002166 type = RTN_MULTICAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002167 else if (ipv4_is_zeronet(fl4->daddr))
David S. Miller5ada5522011-02-17 15:29:00 -08002168 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002169
2170 if (dev_out->flags & IFF_LOOPBACK)
2171 flags |= RTCF_LOCAL;
2172
Julian Anastasov63617422012-11-22 23:04:14 +02002173 do_cache = true;
David S. Miller982721f2011-02-16 21:44:24 -08002174 if (type == RTN_BROADCAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002175 flags |= RTCF_BROADCAST | RTCF_LOCAL;
David S. Miller982721f2011-02-16 21:44:24 -08002176 fi = NULL;
2177 } else if (type == RTN_MULTICAST) {
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00002178 flags |= RTCF_MULTICAST | RTCF_LOCAL;
David S. Miller813b3b52011-04-28 14:48:42 -07002179 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
2180 fl4->flowi4_proto))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002181 flags &= ~RTCF_LOCAL;
Julian Anastasov63617422012-11-22 23:04:14 +02002182 else
2183 do_cache = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002184 /* If multicast route do not exist use
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00002185 * default one, but do not gateway in this case.
2186 * Yes, it is hack.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002187 */
David S. Miller982721f2011-02-16 21:44:24 -08002188 if (fi && res->prefixlen < 4)
2189 fi = NULL;
Chris Friesend6d5e992016-04-08 15:21:30 -06002190 } else if ((type == RTN_LOCAL) && (orig_oif != 0) &&
2191 (orig_oif != dev_out->ifindex)) {
2192 /* For local routes that require a particular output interface
2193 * we do not want to cache the result. Caching the result
2194 * causes incorrect behaviour when there are multiple source
2195 * addresses on the interface, the end result being that if the
2196 * intended recipient is waiting on that interface for the
2197 * packet he won't receive it because it will be delivered on
2198 * the loopback interface and the IP_PKTINFO ipi_ifindex will
2199 * be set to the loopback interface as well.
2200 */
2201 fi = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002202 }
2203
David S. Millerf2bb4be2012-07-17 12:20:47 -07002204 fnhe = NULL;
Julian Anastasov63617422012-11-22 23:04:14 +02002205 do_cache &= fi != NULL;
2206 if (do_cache) {
David S. Millerc5038a82012-07-31 15:02:02 -07002207 struct rtable __rcu **prth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00002208 struct fib_nh *nh = &FIB_RES_NH(*res);
Eric Dumazetd26b3a72012-07-31 05:45:30 +00002209
Julian Anastasovc92b9652012-10-08 11:41:19 +00002210 fnhe = find_exception(nh, fl4->daddr);
Xin Longdeed49d2016-02-18 21:21:19 +08002211 if (fnhe) {
Timo Teräs2ffae992013-06-27 10:27:05 +03002212 prth = &fnhe->fnhe_rth_output;
Xin Longdeed49d2016-02-18 21:21:19 +08002213 rth = rcu_dereference(*prth);
2214 if (rth && rth->dst.expires &&
2215 time_after(jiffies, rth->dst.expires)) {
2216 ip_del_fnhe(nh, fl4->daddr);
2217 fnhe = NULL;
2218 } else {
2219 goto rt_cache;
Julian Anastasovc92b9652012-10-08 11:41:19 +00002220 }
Julian Anastasovc92b9652012-10-08 11:41:19 +00002221 }
Xin Longdeed49d2016-02-18 21:21:19 +08002222
2223 if (unlikely(fl4->flowi4_flags &
2224 FLOWI_FLAG_KNOWN_NH &&
2225 !(nh->nh_gw &&
2226 nh->nh_scope == RT_SCOPE_LINK))) {
2227 do_cache = false;
2228 goto add;
2229 }
2230 prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
David S. Millerc5038a82012-07-31 15:02:02 -07002231 rth = rcu_dereference(*prth);
Xin Longdeed49d2016-02-18 21:21:19 +08002232
2233rt_cache:
Wei Wang9df16ef2017-06-17 10:42:31 -07002234 if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst))
David S. Millerc5038a82012-07-31 15:02:02 -07002235 return rth;
David S. Millerf2bb4be2012-07-17 12:20:47 -07002236 }
Julian Anastasovc92b9652012-10-08 11:41:19 +00002237
2238add:
David Ahernd08c4f32015-09-02 13:58:34 -07002239 rth = rt_dst_alloc(dev_out, flags, type,
David S. Miller5c1e6aa2011-04-28 14:13:38 -07002240 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerf2bb4be2012-07-17 12:20:47 -07002241 IN_DEV_CONF_GET(in_dev, NOXFRM),
Julian Anastasovc92b9652012-10-08 11:41:19 +00002242 do_cache);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00002243 if (!rth)
David S. Miller5ada5522011-02-17 15:29:00 -08002244 return ERR_PTR(-ENOBUFS);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00002245
David Ahern9438c872017-08-11 17:02:02 -07002246 rth->rt_iif = orig_oif;
David Ahernb7503e02015-09-02 13:58:35 -07002247 if (res->table)
2248 rth->rt_table_id = res->table->tb_id;
2249
Linus Torvalds1da177e2005-04-16 15:20:36 -07002250 RT_CACHE_STAT_INC(out_slow_tot);
2251
Linus Torvalds1da177e2005-04-16 15:20:36 -07002252 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002253 if (flags & RTCF_LOCAL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07002254 !(dev_out->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002255 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002256 RT_CACHE_STAT_INC(out_slow_mc);
2257 }
2258#ifdef CONFIG_IP_MROUTE
David S. Miller982721f2011-02-16 21:44:24 -08002259 if (type == RTN_MULTICAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002260 if (IN_DEV_MFORWARD(in_dev) &&
David S. Miller813b3b52011-04-28 14:48:42 -07002261 !ipv4_is_local_multicast(fl4->daddr)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002262 rth->dst.input = ip_mr_input;
2263 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002264 }
2265 }
2266#endif
2267 }
2268
Wei Wanga4c2fd72017-06-17 10:42:42 -07002269 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache);
Thomas Grafefd85702016-11-30 17:10:09 +01002270 set_lwt_redirect(rth);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002271
David S. Miller5ada5522011-02-17 15:29:00 -08002272 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002273}
2274
Linus Torvalds1da177e2005-04-16 15:20:36 -07002275/*
2276 * Major route resolver routine.
2277 */
2278
David Ahern3abd1ade2017-05-25 10:42:33 -07002279struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
2280 const struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002281{
Julian Anastasovf61759e2011-12-02 11:39:42 +00002282 __u8 tos = RT_FL_TOS(fl4);
David S. Miller813b3b52011-04-28 14:48:42 -07002283 struct fib_result res;
David S. Miller5ada5522011-02-17 15:29:00 -08002284 struct rtable *rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002285
David S. Miller85b91b02012-07-13 08:21:29 -07002286 res.tclassid = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002287 res.fi = NULL;
David S. Miller8b96d222012-06-11 02:01:56 -07002288 res.table = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002289
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002290 fl4->flowi4_iif = LOOPBACK_IFINDEX;
David S. Miller813b3b52011-04-28 14:48:42 -07002291 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
2292 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
2293 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
David S. Miller44713b62011-03-04 21:24:47 -08002294
David S. Miller010c2702011-02-17 15:37:09 -08002295 rcu_read_lock();
David Ahern3abd1ade2017-05-25 10:42:33 -07002296 rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
2297 rcu_read_unlock();
2298
2299 return rth;
2300}
2301EXPORT_SYMBOL_GPL(ip_route_output_key_hash);
2302
2303struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
2304 struct fib_result *res,
2305 const struct sk_buff *skb)
2306{
2307 struct net_device *dev_out = NULL;
2308 int orig_oif = fl4->flowi4_oif;
2309 unsigned int flags = 0;
2310 struct rtable *rth;
2311 int err = -ENETUNREACH;
2312
David S. Miller813b3b52011-04-28 14:48:42 -07002313 if (fl4->saddr) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08002314 rth = ERR_PTR(-EINVAL);
David S. Miller813b3b52011-04-28 14:48:42 -07002315 if (ipv4_is_multicast(fl4->saddr) ||
2316 ipv4_is_lbcast(fl4->saddr) ||
2317 ipv4_is_zeronet(fl4->saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002318 goto out;
2319
Linus Torvalds1da177e2005-04-16 15:20:36 -07002320 /* I removed check for oif == dev_out->oif here.
2321 It was wrong for two reasons:
Denis V. Lunev1ab35272008-01-22 22:04:30 -08002322 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
2323 is assigned to multiple interfaces.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002324 2. Moreover, we are allowed to send packets with saddr
2325 of another iface. --ANK
2326 */
2327
David S. Miller813b3b52011-04-28 14:48:42 -07002328 if (fl4->flowi4_oif == 0 &&
2329 (ipv4_is_multicast(fl4->daddr) ||
2330 ipv4_is_lbcast(fl4->daddr))) {
Julian Anastasova210d012008-10-01 07:28:28 -07002331 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07002332 dev_out = __ip_dev_find(net, fl4->saddr, false);
Ian Morris51456b22015-04-03 09:17:26 +01002333 if (!dev_out)
Julian Anastasova210d012008-10-01 07:28:28 -07002334 goto out;
2335
Linus Torvalds1da177e2005-04-16 15:20:36 -07002336 /* Special hack: user can direct multicasts
2337 and limited broadcast via necessary interface
2338 without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
2339 This hack is not just for fun, it allows
2340 vic,vat and friends to work.
2341 They bind socket to loopback, set ttl to zero
2342 and expect that it will work.
2343 From the viewpoint of routing cache they are broken,
2344 because we are not allowed to build multicast path
2345 with loopback source addr (look, routing cache
2346 cannot know, that ttl is zero, so that packet
2347 will not leave this host and route is valid).
2348 Luckily, this hack is good workaround.
2349 */
2350
David S. Miller813b3b52011-04-28 14:48:42 -07002351 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002352 goto make_route;
2353 }
Julian Anastasova210d012008-10-01 07:28:28 -07002354
David S. Miller813b3b52011-04-28 14:48:42 -07002355 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
Julian Anastasova210d012008-10-01 07:28:28 -07002356 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07002357 if (!__ip_dev_find(net, fl4->saddr, false))
Julian Anastasova210d012008-10-01 07:28:28 -07002358 goto out;
Julian Anastasova210d012008-10-01 07:28:28 -07002359 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002360 }
2361
2362
David S. Miller813b3b52011-04-28 14:48:42 -07002363 if (fl4->flowi4_oif) {
2364 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002365 rth = ERR_PTR(-ENODEV);
Ian Morris51456b22015-04-03 09:17:26 +01002366 if (!dev_out)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002367 goto out;
Herbert Xue5ed6392005-10-03 14:35:55 -07002368
2369 /* RACE: Check return value of inet_select_addr instead. */
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002370 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08002371 rth = ERR_PTR(-ENETUNREACH);
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002372 goto out;
2373 }
David S. Miller813b3b52011-04-28 14:48:42 -07002374 if (ipv4_is_local_multicast(fl4->daddr) ||
Andrew Lunn6a211652015-05-01 16:39:54 +02002375 ipv4_is_lbcast(fl4->daddr) ||
2376 fl4->flowi4_proto == IPPROTO_IGMP) {
David S. Miller813b3b52011-04-28 14:48:42 -07002377 if (!fl4->saddr)
2378 fl4->saddr = inet_select_addr(dev_out, 0,
2379 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002380 goto make_route;
2381 }
Jiri Benc0a7e2262013-10-04 17:04:48 +02002382 if (!fl4->saddr) {
David S. Miller813b3b52011-04-28 14:48:42 -07002383 if (ipv4_is_multicast(fl4->daddr))
2384 fl4->saddr = inet_select_addr(dev_out, 0,
2385 fl4->flowi4_scope);
2386 else if (!fl4->daddr)
2387 fl4->saddr = inet_select_addr(dev_out, 0,
2388 RT_SCOPE_HOST);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002389 }
2390 }
2391
David S. Miller813b3b52011-04-28 14:48:42 -07002392 if (!fl4->daddr) {
2393 fl4->daddr = fl4->saddr;
2394 if (!fl4->daddr)
2395 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
Denis V. Lunevb40afd02008-01-22 22:06:19 -08002396 dev_out = net->loopback_dev;
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002397 fl4->flowi4_oif = LOOPBACK_IFINDEX;
David Ahern3abd1ade2017-05-25 10:42:33 -07002398 res->type = RTN_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002399 flags |= RTCF_LOCAL;
2400 goto make_route;
2401 }
2402
David Ahern3abd1ade2017-05-25 10:42:33 -07002403 err = fib_lookup(net, fl4, res, 0);
Nikola ForrĂ³0315e382015-09-17 16:01:32 +02002404 if (err) {
David Ahern3abd1ade2017-05-25 10:42:33 -07002405 res->fi = NULL;
2406 res->table = NULL;
David Ahern6104e112016-10-12 13:20:11 -07002407 if (fl4->flowi4_oif &&
David Aherne58e4152016-10-31 15:54:00 -07002408 (ipv4_is_multicast(fl4->daddr) ||
2409 !netif_index_is_l3_master(net, fl4->flowi4_oif))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002410 /* Apparently, routing tables are wrong. Assume,
2411 that the destination is on link.
2412
2413 WHY? DW.
2414 Because we are allowed to send to iface
2415 even if it has NO routes and NO assigned
2416 addresses. When oif is specified, routing
2417 tables are looked up with only one purpose:
2418 to catch if destination is gatewayed, rather than
2419 direct. Moreover, if MSG_DONTROUTE is set,
2420 we send packet, ignoring both routing tables
2421 and ifaddr state. --ANK
2422
2423
2424 We could make it even if oif is unknown,
2425 likely IPv6, but we do not.
2426 */
2427
David S. Miller813b3b52011-04-28 14:48:42 -07002428 if (fl4->saddr == 0)
2429 fl4->saddr = inet_select_addr(dev_out, 0,
2430 RT_SCOPE_LINK);
David Ahern3abd1ade2017-05-25 10:42:33 -07002431 res->type = RTN_UNICAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002432 goto make_route;
2433 }
Nikola ForrĂ³0315e382015-09-17 16:01:32 +02002434 rth = ERR_PTR(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002435 goto out;
2436 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002437
David Ahern3abd1ade2017-05-25 10:42:33 -07002438 if (res->type == RTN_LOCAL) {
David S. Miller813b3b52011-04-28 14:48:42 -07002439 if (!fl4->saddr) {
David Ahern3abd1ade2017-05-25 10:42:33 -07002440 if (res->fi->fib_prefsrc)
2441 fl4->saddr = res->fi->fib_prefsrc;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002442 else
David S. Miller813b3b52011-04-28 14:48:42 -07002443 fl4->saddr = fl4->daddr;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002444 }
David Ahern5f02ce242016-09-10 12:09:54 -07002445
2446 /* L3 master device is the loopback for that domain */
David Ahern3abd1ade2017-05-25 10:42:33 -07002447 dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? :
Robert Shearmanb7c84872017-04-21 21:34:59 +01002448 net->loopback_dev;
David Ahern839da4d2017-08-10 13:49:10 -07002449
2450 /* make sure orig_oif points to fib result device even
2451 * though packet rx/tx happens over loopback or l3mdev
2452 */
2453 orig_oif = FIB_RES_OIF(*res);
2454
David S. Miller813b3b52011-04-28 14:48:42 -07002455 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002456 flags |= RTCF_LOCAL;
2457 goto make_route;
2458 }
2459
David Ahern3abd1ade2017-05-25 10:42:33 -07002460 fib_select_path(net, res, fl4, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002461
David Ahern3abd1ade2017-05-25 10:42:33 -07002462 dev_out = FIB_RES_DEV(*res);
David S. Miller813b3b52011-04-28 14:48:42 -07002463 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002464
2465
2466make_route:
David Ahern3abd1ade2017-05-25 10:42:33 -07002467 rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002468
David S. Miller010c2702011-02-17 15:37:09 -08002469out:
David S. Millerb23dd4f2011-03-02 14:31:35 -08002470 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002471}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002472
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002473static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2474{
2475 return NULL;
2476}
2477
Steffen Klassertebb762f2011-11-23 02:12:51 +00002478static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
Roland Dreierec831ea2011-01-31 13:16:00 -08002479{
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002480 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2481
2482 return mtu ? : dst->dev->mtu;
Roland Dreierec831ea2011-01-31 13:16:00 -08002483}
2484
David S. Miller6700c272012-07-17 03:29:28 -07002485static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2486 struct sk_buff *skb, u32 mtu)
David S. Miller14e50e52007-05-24 18:17:54 -07002487{
2488}
2489
David S. Miller6700c272012-07-17 03:29:28 -07002490static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2491 struct sk_buff *skb)
David S. Millerb587ee32012-07-12 00:39:24 -07002492{
2493}
2494
Held Bernhard0972ddb2011-04-24 22:07:32 +00002495static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2496 unsigned long old)
2497{
2498 return NULL;
2499}
2500
David S. Miller14e50e52007-05-24 18:17:54 -07002501static struct dst_ops ipv4_dst_blackhole_ops = {
2502 .family = AF_INET,
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002503 .check = ipv4_blackhole_dst_check,
Steffen Klassertebb762f2011-11-23 02:12:51 +00002504 .mtu = ipv4_blackhole_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -08002505 .default_advmss = ipv4_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -07002506 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
David S. Millerb587ee32012-07-12 00:39:24 -07002507 .redirect = ipv4_rt_blackhole_redirect,
Held Bernhard0972ddb2011-04-24 22:07:32 +00002508 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -07002509 .neigh_lookup = ipv4_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -07002510};
2511
David S. Miller2774c132011-03-01 14:59:04 -08002512struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -07002513{
David S. Miller2774c132011-03-01 14:59:04 -08002514 struct rtable *ort = (struct rtable *) dst_orig;
David S. Millerf5b0a872012-07-19 12:31:33 -07002515 struct rtable *rt;
David S. Miller14e50e52007-05-24 18:17:54 -07002516
Steffen Klassert6c0e7282017-10-09 08:43:55 +02002517 rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0);
David S. Miller14e50e52007-05-24 18:17:54 -07002518 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002519 struct dst_entry *new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -07002520
David S. Miller14e50e52007-05-24 18:17:54 -07002521 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -08002522 new->input = dst_discard;
Eric W. Biedermanede20592015-10-07 16:48:47 -05002523 new->output = dst_discard_out;
David S. Miller14e50e52007-05-24 18:17:54 -07002524
Wei Wang1dbe32522017-06-17 10:42:26 -07002525 new->dev = net->loopback_dev;
David S. Miller14e50e52007-05-24 18:17:54 -07002526 if (new->dev)
2527 dev_hold(new->dev);
2528
David S. Miller9917e1e82012-07-17 14:44:26 -07002529 rt->rt_is_input = ort->rt_is_input;
David S. Miller5e2b61f2011-03-04 21:47:09 -08002530 rt->rt_iif = ort->rt_iif;
David S. Miller59436342012-07-10 06:58:42 -07002531 rt->rt_pmtu = ort->rt_pmtu;
David S. Miller14e50e52007-05-24 18:17:54 -07002532
fan.duca4c3fc2013-07-30 08:33:53 +08002533 rt->rt_genid = rt_genid_ipv4(net);
David S. Miller14e50e52007-05-24 18:17:54 -07002534 rt->rt_flags = ort->rt_flags;
2535 rt->rt_type = ort->rt_type;
David S. Miller14e50e52007-05-24 18:17:54 -07002536 rt->rt_gateway = ort->rt_gateway;
Julian Anastasov155e8332012-10-08 11:41:18 +00002537 rt->rt_uses_gateway = ort->rt_uses_gateway;
David S. Miller14e50e52007-05-24 18:17:54 -07002538
David S. Millercaacf052012-07-31 15:06:50 -07002539 INIT_LIST_HEAD(&rt->rt_uncached);
David S. Miller14e50e52007-05-24 18:17:54 -07002540 }
2541
David S. Miller2774c132011-03-01 14:59:04 -08002542 dst_release(dst_orig);
2543
2544 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -07002545}
2546
David S. Miller9d6ec932011-03-12 01:12:47 -05002547struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
Eric Dumazet6f9c9612015-09-25 07:39:10 -07002548 const struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002549{
David S. Miller9d6ec932011-03-12 01:12:47 -05002550 struct rtable *rt = __ip_route_output_key(net, flp4);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002551
David S. Millerb23dd4f2011-03-02 14:31:35 -08002552 if (IS_ERR(rt))
2553 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002554
David S. Miller56157872011-05-02 14:37:45 -07002555 if (flp4->flowi4_proto)
Steffen Klassertf92ee612014-09-16 10:08:40 +02002556 rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
2557 flowi4_to_flowi(flp4),
2558 sk, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002559
David S. Millerb23dd4f2011-03-02 14:31:35 -08002560 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002561}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002562EXPORT_SYMBOL_GPL(ip_route_output_flow);
2563
David Ahern3765d352017-05-25 10:42:36 -07002564/* called with rcu_read_lock held */
David Ahernc36ba662015-09-02 13:58:36 -07002565static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id,
Eric W. Biederman15e47302012-09-07 20:12:54 +00002566 struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
Roopa Prabhuba52d612017-05-31 22:53:25 -07002567 u32 seq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002568{
Roopa Prabhuba52d612017-05-31 22:53:25 -07002569 struct rtable *rt = skb_rtable(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002570 struct rtmsg *r;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002571 struct nlmsghdr *nlh;
Steffen Klassert2bc8ca42011-10-11 01:12:02 +00002572 unsigned long expires = 0;
David S. Millerf1850712012-07-10 07:26:01 -07002573 u32 error;
Julian Anastasov521f5492012-07-20 12:02:08 +03002574 u32 metrics[RTAX_MAX];
Thomas Grafbe403ea2006-08-17 18:15:17 -07002575
David Ahernd3166e02017-05-25 10:42:35 -07002576 nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), 0);
Ian Morris51456b22015-04-03 09:17:26 +01002577 if (!nlh)
Patrick McHardy26932562007-01-31 23:16:40 -08002578 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002579
2580 r = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002581 r->rtm_family = AF_INET;
2582 r->rtm_dst_len = 32;
2583 r->rtm_src_len = 0;
David Millerd6c0a4f2012-07-01 02:02:59 +00002584 r->rtm_tos = fl4->flowi4_tos;
David Ahern8a430ed2017-01-11 15:42:17 -08002585 r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT;
David Ahernc36ba662015-09-02 13:58:36 -07002586 if (nla_put_u32(skb, RTA_TABLE, table_id))
David S. Millerf3756b72012-04-01 20:39:02 -04002587 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002588 r->rtm_type = rt->rt_type;
2589 r->rtm_scope = RT_SCOPE_UNIVERSE;
2590 r->rtm_protocol = RTPROT_UNSPEC;
2591 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2592 if (rt->rt_flags & RTCF_NOTIFY)
2593 r->rtm_flags |= RTM_F_NOTIFY;
Hannes Frederic Sowadf4d9252015-01-23 12:01:26 +01002594 if (IPCB(skb)->flags & IPSKB_DOREDIRECT)
2595 r->rtm_flags |= RTCF_DOREDIRECT;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002596
Jiri Benc930345e2015-03-29 16:59:25 +02002597 if (nla_put_in_addr(skb, RTA_DST, dst))
David S. Millerf3756b72012-04-01 20:39:02 -04002598 goto nla_put_failure;
David Miller1a00fee2012-07-01 02:02:56 +00002599 if (src) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002600 r->rtm_src_len = 32;
Jiri Benc930345e2015-03-29 16:59:25 +02002601 if (nla_put_in_addr(skb, RTA_SRC, src))
David S. Millerf3756b72012-04-01 20:39:02 -04002602 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002603 }
David S. Millerf3756b72012-04-01 20:39:02 -04002604 if (rt->dst.dev &&
2605 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2606 goto nla_put_failure;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002607#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf3756b72012-04-01 20:39:02 -04002608 if (rt->dst.tclassid &&
2609 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2610 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002611#endif
David S. Miller41347dc2012-06-28 04:05:27 -07002612 if (!rt_is_input_route(rt) &&
David Millerd6c0a4f2012-07-01 02:02:59 +00002613 fl4->saddr != src) {
Jiri Benc930345e2015-03-29 16:59:25 +02002614 if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
David S. Millerf3756b72012-04-01 20:39:02 -04002615 goto nla_put_failure;
2616 }
Julian Anastasov155e8332012-10-08 11:41:18 +00002617 if (rt->rt_uses_gateway &&
Jiri Benc930345e2015-03-29 16:59:25 +02002618 nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gateway))
David S. Millerf3756b72012-04-01 20:39:02 -04002619 goto nla_put_failure;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002620
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002621 expires = rt->dst.expires;
2622 if (expires) {
2623 unsigned long now = jiffies;
2624
2625 if (time_before(now, expires))
2626 expires -= now;
2627 else
2628 expires = 0;
2629 }
2630
Julian Anastasov521f5492012-07-20 12:02:08 +03002631 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002632 if (rt->rt_pmtu && expires)
Julian Anastasov521f5492012-07-20 12:02:08 +03002633 metrics[RTAX_MTU - 1] = rt->rt_pmtu;
2634 if (rtnetlink_put_metrics(skb, metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -07002635 goto nla_put_failure;
2636
David Millerb4869882012-07-01 02:03:01 +00002637 if (fl4->flowi4_mark &&
stephen hemminger68aaed52012-10-10 08:27:25 +00002638 nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
David S. Millerf3756b72012-04-01 20:39:02 -04002639 goto nla_put_failure;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002640
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002641 if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
2642 nla_put_u32(skb, RTA_UID,
2643 from_kuid_munged(current_user_ns(), fl4->flowi4_uid)))
2644 goto nla_put_failure;
2645
Changli Gaod8d1f302010-06-10 23:31:35 -07002646 error = rt->dst.error;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002647
David S. Millerc7537962010-11-11 17:07:48 -08002648 if (rt_is_input_route(rt)) {
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002649#ifdef CONFIG_IP_MROUTE
2650 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2651 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2652 int err = ipmr_get_route(net, skb,
2653 fl4->saddr, fl4->daddr,
David Ahern9f09eae2017-01-06 17:39:06 -08002654 r, portid);
Nikolay Aleksandrov2cf75072016-09-25 23:08:31 +02002655
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002656 if (err <= 0) {
David Ahern0c8d8032017-01-05 19:32:46 -08002657 if (err == 0)
2658 return 0;
2659 goto nla_put_failure;
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002660 }
2661 } else
2662#endif
Julian Anastasov91146152014-04-13 18:08:02 +03002663 if (nla_put_u32(skb, RTA_IIF, skb->dev->ifindex))
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002664 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002665 }
2666
David S. Millerf1850712012-07-10 07:26:01 -07002667 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002668 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002669
Johannes Berg053c0952015-01-16 22:09:00 +01002670 nlmsg_end(skb, nlh);
2671 return 0;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002672
2673nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002674 nlmsg_cancel(skb, nlh);
2675 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002676}
2677
David Ahernc21ef3e2017-04-16 09:48:24 -07002678static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2679 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002680{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002681 struct net *net = sock_net(in_skb->sk);
Thomas Grafd889ce32006-08-17 18:15:44 -07002682 struct rtmsg *rtm;
2683 struct nlattr *tb[RTA_MAX+1];
David Ahern3765d352017-05-25 10:42:36 -07002684 struct fib_result res = {};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002685 struct rtable *rt = NULL;
David Millerd6c0a4f2012-07-01 02:02:59 +00002686 struct flowi4 fl4;
Al Viro9e12bb22006-09-26 21:25:20 -07002687 __be32 dst = 0;
2688 __be32 src = 0;
2689 u32 iif;
Thomas Grafd889ce32006-08-17 18:15:44 -07002690 int err;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002691 int mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002692 struct sk_buff *skb;
David Ahernc36ba662015-09-02 13:58:36 -07002693 u32 table_id = RT_TABLE_MAIN;
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002694 kuid_t uid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002695
Johannes Bergfceb6432017-04-12 14:34:07 +02002696 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy,
David Ahernc21ef3e2017-04-16 09:48:24 -07002697 extack);
Thomas Grafd889ce32006-08-17 18:15:44 -07002698 if (err < 0)
2699 goto errout;
2700
2701 rtm = nlmsg_data(nlh);
2702
Linus Torvalds1da177e2005-04-16 15:20:36 -07002703 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Ian Morris51456b22015-04-03 09:17:26 +01002704 if (!skb) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002705 err = -ENOBUFS;
2706 goto errout;
2707 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002708
2709 /* Reserve room for dummy headers, this skb can pass
2710 through good chunk of routing engine.
2711 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002712 skb_reset_mac_header(skb);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07002713 skb_reset_network_header(skb);
Stephen Hemmingerd2c962b2006-04-17 17:27:11 -07002714
Jiri Benc67b61f62015-03-29 16:59:26 +02002715 src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
2716 dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
Thomas Grafd889ce32006-08-17 18:15:44 -07002717 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002718 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002719 if (tb[RTA_UID])
2720 uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID]));
2721 else
2722 uid = (iif ? INVALID_UID : current_uid());
Linus Torvalds1da177e2005-04-16 15:20:36 -07002723
Florian Laryschbbadb9a2017-04-07 14:42:20 +02002724 /* Bugfix: need to give ip_route_input enough of an IP header to
2725 * not gag.
2726 */
2727 ip_hdr(skb)->protocol = IPPROTO_UDP;
2728 ip_hdr(skb)->saddr = src;
2729 ip_hdr(skb)->daddr = dst;
2730
2731 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2732
David Millerd6c0a4f2012-07-01 02:02:59 +00002733 memset(&fl4, 0, sizeof(fl4));
2734 fl4.daddr = dst;
2735 fl4.saddr = src;
2736 fl4.flowi4_tos = rtm->rtm_tos;
2737 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
2738 fl4.flowi4_mark = mark;
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002739 fl4.flowi4_uid = uid;
David Millerd6c0a4f2012-07-01 02:02:59 +00002740
David Ahern3765d352017-05-25 10:42:36 -07002741 rcu_read_lock();
2742
Linus Torvalds1da177e2005-04-16 15:20:36 -07002743 if (iif) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002744 struct net_device *dev;
2745
David Ahern3765d352017-05-25 10:42:36 -07002746 dev = dev_get_by_index_rcu(net, iif);
Ian Morris51456b22015-04-03 09:17:26 +01002747 if (!dev) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002748 err = -ENODEV;
2749 goto errout_free;
2750 }
2751
Linus Torvalds1da177e2005-04-16 15:20:36 -07002752 skb->protocol = htons(ETH_P_IP);
2753 skb->dev = dev;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002754 skb->mark = mark;
David Ahern3765d352017-05-25 10:42:36 -07002755 err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos,
2756 dev, &res);
Thomas Grafd889ce32006-08-17 18:15:44 -07002757
Eric Dumazet511c3f92009-06-02 05:14:27 +00002758 rt = skb_rtable(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07002759 if (err == 0 && rt->dst.error)
2760 err = -rt->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002761 } else {
Lorenzo Colitti6503a302018-01-11 18:36:26 +09002762 fl4.flowi4_iif = LOOPBACK_IFINDEX;
David Ahern3765d352017-05-25 10:42:36 -07002763 rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002764 err = 0;
2765 if (IS_ERR(rt))
2766 err = PTR_ERR(rt);
Florian Westphal2c87d632017-08-14 00:52:58 +02002767 else
2768 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002769 }
Thomas Grafd889ce32006-08-17 18:15:44 -07002770
Linus Torvalds1da177e2005-04-16 15:20:36 -07002771 if (err)
Thomas Grafd889ce32006-08-17 18:15:44 -07002772 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002773
Linus Torvalds1da177e2005-04-16 15:20:36 -07002774 if (rtm->rtm_flags & RTM_F_NOTIFY)
2775 rt->rt_flags |= RTCF_NOTIFY;
2776
David Ahernc36ba662015-09-02 13:58:36 -07002777 if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
2778 table_id = rt->rt_table_id;
2779
Roopa Prabhubc3aae22017-08-16 12:38:52 -07002780 if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
2781 if (!res.fi) {
2782 err = fib_props[res.type].error;
2783 if (!err)
2784 err = -EHOSTUNREACH;
2785 goto errout_free;
2786 }
Roopa Prabhub6179812017-05-25 10:42:39 -07002787 err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
2788 nlh->nlmsg_seq, RTM_NEWROUTE, table_id,
2789 rt->rt_type, res.prefix, res.prefixlen,
2790 fl4.flowi4_tos, res.fi, 0);
Roopa Prabhubc3aae22017-08-16 12:38:52 -07002791 } else {
Roopa Prabhub6179812017-05-25 10:42:39 -07002792 err = rt_fill_info(net, dst, src, table_id, &fl4, skb,
Roopa Prabhuba52d612017-05-31 22:53:25 -07002793 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq);
Roopa Prabhubc3aae22017-08-16 12:38:52 -07002794 }
David S. Miller7b46a642015-01-18 23:36:08 -05002795 if (err < 0)
Thomas Grafd889ce32006-08-17 18:15:44 -07002796 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002797
David Ahern3765d352017-05-25 10:42:36 -07002798 rcu_read_unlock();
2799
Eric W. Biederman15e47302012-09-07 20:12:54 +00002800 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
Thomas Grafd889ce32006-08-17 18:15:44 -07002801errout:
Thomas Graf2942e902006-08-15 00:30:25 -07002802 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002803
Thomas Grafd889ce32006-08-17 18:15:44 -07002804errout_free:
David Ahern3765d352017-05-25 10:42:36 -07002805 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002806 kfree_skb(skb);
Thomas Grafd889ce32006-08-17 18:15:44 -07002807 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002808}
2809
Linus Torvalds1da177e2005-04-16 15:20:36 -07002810void ip_rt_multicast_event(struct in_device *in_dev)
2811{
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00002812 rt_cache_flush(dev_net(in_dev->dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002813}
2814
2815#ifdef CONFIG_SYSCTL
Gao feng082c7ca2013-02-19 00:43:12 +00002816static int ip_rt_gc_interval __read_mostly = 60 * HZ;
2817static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
2818static int ip_rt_gc_elasticity __read_mostly = 8;
2819
Joe Perchesfe2c6332013-06-11 23:04:25 -07002820static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002821 void __user *buffer,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002822 size_t *lenp, loff_t *ppos)
2823{
Timo Teräs5aad1de2013-05-27 20:46:33 +00002824 struct net *net = (struct net *)__ctl->extra1;
2825
Linus Torvalds1da177e2005-04-16 15:20:36 -07002826 if (write) {
Timo Teräs5aad1de2013-05-27 20:46:33 +00002827 rt_cache_flush(net);
2828 fnhe_genid_bump(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002829 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002830 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002831
2832 return -EINVAL;
2833}
2834
Joe Perchesfe2c6332013-06-11 23:04:25 -07002835static struct ctl_table ipv4_route_table[] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002836 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002837 .procname = "gc_thresh",
2838 .data = &ipv4_dst_ops.gc_thresh,
2839 .maxlen = sizeof(int),
2840 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002841 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002842 },
2843 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002844 .procname = "max_size",
2845 .data = &ip_rt_max_size,
2846 .maxlen = sizeof(int),
2847 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002848 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002849 },
2850 {
2851 /* Deprecated. Use gc_min_interval_ms */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002852
Linus Torvalds1da177e2005-04-16 15:20:36 -07002853 .procname = "gc_min_interval",
2854 .data = &ip_rt_gc_min_interval,
2855 .maxlen = sizeof(int),
2856 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002857 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002858 },
2859 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002860 .procname = "gc_min_interval_ms",
2861 .data = &ip_rt_gc_min_interval,
2862 .maxlen = sizeof(int),
2863 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002864 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002865 },
2866 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002867 .procname = "gc_timeout",
2868 .data = &ip_rt_gc_timeout,
2869 .maxlen = sizeof(int),
2870 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002871 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002872 },
2873 {
Eric Dumazet9f28a2f2011-12-21 15:47:16 -05002874 .procname = "gc_interval",
2875 .data = &ip_rt_gc_interval,
2876 .maxlen = sizeof(int),
2877 .mode = 0644,
2878 .proc_handler = proc_dointvec_jiffies,
2879 },
2880 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002881 .procname = "redirect_load",
2882 .data = &ip_rt_redirect_load,
2883 .maxlen = sizeof(int),
2884 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002885 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002886 },
2887 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002888 .procname = "redirect_number",
2889 .data = &ip_rt_redirect_number,
2890 .maxlen = sizeof(int),
2891 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002892 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002893 },
2894 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002895 .procname = "redirect_silence",
2896 .data = &ip_rt_redirect_silence,
2897 .maxlen = sizeof(int),
2898 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002899 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002900 },
2901 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002902 .procname = "error_cost",
2903 .data = &ip_rt_error_cost,
2904 .maxlen = sizeof(int),
2905 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002906 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002907 },
2908 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002909 .procname = "error_burst",
2910 .data = &ip_rt_error_burst,
2911 .maxlen = sizeof(int),
2912 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002913 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002914 },
2915 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002916 .procname = "gc_elasticity",
2917 .data = &ip_rt_gc_elasticity,
2918 .maxlen = sizeof(int),
2919 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002920 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002921 },
2922 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002923 .procname = "mtu_expires",
2924 .data = &ip_rt_mtu_expires,
2925 .maxlen = sizeof(int),
2926 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002927 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002928 },
2929 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002930 .procname = "min_pmtu",
2931 .data = &ip_rt_min_pmtu,
2932 .maxlen = sizeof(int),
2933 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002934 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002935 },
2936 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002937 .procname = "min_adv_mss",
2938 .data = &ip_rt_min_advmss,
2939 .maxlen = sizeof(int),
2940 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002941 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002942 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002943 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002944};
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002945
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002946static struct ctl_table ipv4_route_flush_table[] = {
2947 {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002948 .procname = "flush",
2949 .maxlen = sizeof(int),
2950 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002951 .proc_handler = ipv4_sysctl_rtcache_flush,
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002952 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002953 { },
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002954};
2955
2956static __net_init int sysctl_route_net_init(struct net *net)
2957{
2958 struct ctl_table *tbl;
2959
2960 tbl = ipv4_route_flush_table;
Octavian Purdila09ad9bc2009-11-25 15:14:13 -08002961 if (!net_eq(net, &init_net)) {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002962 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
Ian Morris51456b22015-04-03 09:17:26 +01002963 if (!tbl)
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002964 goto err_dup;
Eric W. Biederman464dc802012-11-16 03:02:59 +00002965
2966 /* Don't export sysctls to unprivileged users */
2967 if (net->user_ns != &init_user_ns)
2968 tbl[0].procname = NULL;
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002969 }
2970 tbl[0].extra1 = net;
2971
Eric W. Biedermanec8f23c2012-04-19 13:44:49 +00002972 net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
Ian Morris51456b22015-04-03 09:17:26 +01002973 if (!net->ipv4.route_hdr)
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002974 goto err_reg;
2975 return 0;
2976
2977err_reg:
2978 if (tbl != ipv4_route_flush_table)
2979 kfree(tbl);
2980err_dup:
2981 return -ENOMEM;
2982}
2983
2984static __net_exit void sysctl_route_net_exit(struct net *net)
2985{
2986 struct ctl_table *tbl;
2987
2988 tbl = net->ipv4.route_hdr->ctl_table_arg;
2989 unregister_net_sysctl_table(net->ipv4.route_hdr);
2990 BUG_ON(tbl == ipv4_route_flush_table);
2991 kfree(tbl);
2992}
2993
2994static __net_initdata struct pernet_operations sysctl_route_ops = {
2995 .init = sysctl_route_net_init,
2996 .exit = sysctl_route_net_exit,
2997};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002998#endif
2999
Neil Horman3ee94372010-05-08 01:57:52 -07003000static __net_init int rt_genid_init(struct net *net)
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003001{
fan.duca4c3fc2013-07-30 08:33:53 +08003002 atomic_set(&net->ipv4.rt_genid, 0);
Timo Teräs5aad1de2013-05-27 20:46:33 +00003003 atomic_set(&net->fnhe_genid, 0);
Jason A. Donenfeld7aed9f72017-06-07 23:01:20 -04003004 atomic_set(&net->ipv4.dev_addr_genid, get_random_int());
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003005 return 0;
3006}
3007
Neil Horman3ee94372010-05-08 01:57:52 -07003008static __net_initdata struct pernet_operations rt_genid_ops = {
3009 .init = rt_genid_init,
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003010};
3011
David S. Millerc3426b42012-06-09 16:27:05 -07003012static int __net_init ipv4_inetpeer_init(struct net *net)
3013{
3014 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3015
3016 if (!bp)
3017 return -ENOMEM;
3018 inet_peer_base_init(bp);
3019 net->ipv4.peers = bp;
3020 return 0;
3021}
3022
3023static void __net_exit ipv4_inetpeer_exit(struct net *net)
3024{
3025 struct inet_peer_base *bp = net->ipv4.peers;
3026
3027 net->ipv4.peers = NULL;
David S. Miller56a6b242012-06-09 16:32:41 -07003028 inetpeer_invalidate_tree(bp);
David S. Millerc3426b42012-06-09 16:27:05 -07003029 kfree(bp);
3030}
3031
3032static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
3033 .init = ipv4_inetpeer_init,
3034 .exit = ipv4_inetpeer_exit,
3035};
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07003036
Patrick McHardyc7066f72011-01-14 13:36:42 +01003037#ifdef CONFIG_IP_ROUTE_CLASSID
Tejun Heo7d720c32010-02-16 15:20:26 +00003038struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
Patrick McHardyc7066f72011-01-14 13:36:42 +01003039#endif /* CONFIG_IP_ROUTE_CLASSID */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003040
Linus Torvalds1da177e2005-04-16 15:20:36 -07003041int __init ip_rt_init(void)
3042{
Eric Dumazet5055c372015-01-14 15:17:06 -08003043 int cpu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003044
Eric Dumazet73f156a2014-06-02 05:26:03 -07003045 ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
3046 if (!ip_idents)
3047 panic("IP: failed to allocate ip_idents\n");
3048
3049 prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
3050
Eric Dumazet355b5902015-05-01 10:37:49 -07003051 ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL);
3052 if (!ip_tstamps)
3053 panic("IP: failed to allocate ip_tstamps\n");
3054
Eric Dumazet5055c372015-01-14 15:17:06 -08003055 for_each_possible_cpu(cpu) {
3056 struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
3057
3058 INIT_LIST_HEAD(&ul->head);
3059 spin_lock_init(&ul->lock);
3060 }
Patrick McHardyc7066f72011-01-14 13:36:42 +01003061#ifdef CONFIG_IP_ROUTE_CLASSID
Ingo Molnar0dcec8c2009-02-25 14:07:33 +01003062 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
Linus Torvalds1da177e2005-04-16 15:20:36 -07003063 if (!ip_rt_acct)
3064 panic("IP: failed to allocate ip_rt_acct\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003065#endif
3066
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07003067 ipv4_dst_ops.kmem_cachep =
3068 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09003069 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003070
David S. Miller14e50e52007-05-24 18:17:54 -07003071 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
3072
Eric Dumazetfc66f952010-10-08 06:37:34 +00003073 if (dst_entries_init(&ipv4_dst_ops) < 0)
3074 panic("IP: failed to allocate ipv4_dst_ops counter\n");
3075
3076 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
3077 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
3078
David S. Miller89aef892012-07-17 11:00:09 -07003079 ipv4_dst_ops.gc_thresh = ~0;
3080 ip_rt_max_size = INT_MAX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003081
Linus Torvalds1da177e2005-04-16 15:20:36 -07003082 devinet_init();
3083 ip_fib_init();
3084
Denis V. Lunev73b38712008-02-28 20:51:18 -08003085 if (ip_rt_proc_init())
Joe Perches058bd4d2012-03-11 18:36:11 +00003086 pr_err("Unable to create route proc files\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003087#ifdef CONFIG_XFRM
3088 xfrm_init();
Steffen Klassert703fb942012-11-13 08:52:24 +01003089 xfrm4_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003090#endif
Florian Westphal394f51a2017-08-15 16:34:44 +02003091 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL,
3092 RTNL_FLAG_DOIT_UNLOCKED);
Thomas Graf63f34442007-03-22 11:55:17 -07003093
Denis V. Lunev39a23e72008-07-05 19:02:33 -07003094#ifdef CONFIG_SYSCTL
3095 register_pernet_subsys(&sysctl_route_ops);
3096#endif
Neil Horman3ee94372010-05-08 01:57:52 -07003097 register_pernet_subsys(&rt_genid_ops);
David S. Millerc3426b42012-06-09 16:27:05 -07003098 register_pernet_subsys(&ipv4_inetpeer_ops);
Tim Hansen1bcdca32017-10-04 15:59:49 -04003099 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003100}
3101
Al Viroa1bc6eb2008-07-30 06:32:52 -04003102#ifdef CONFIG_SYSCTL
Al Viroeeb61f72008-07-27 08:59:33 +01003103/*
3104 * We really need to sanitize the damn ipv4 init order, then all
3105 * this nonsense will go away.
3106 */
3107void __init ip_static_sysctl_init(void)
3108{
Eric W. Biederman4e5ca782012-04-19 13:32:39 +00003109 register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
Al Viroeeb61f72008-07-27 08:59:33 +01003110}
Al Viroa1bc6eb2008-07-30 06:32:52 -04003111#endif