blob: 709ffe67d1de1609be7d3e4a98d9314b01e5f265 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * ROUTE - implementation of the IP router.
7 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07008 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
12 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 *
14 * Fixes:
15 * Alan Cox : Verify area fixes.
16 * Alan Cox : cli() protects routing changes
17 * Rui Oliveira : ICMP routing table updates
18 * (rco@di.uminho.pt) Routing table insertion and update
19 * Linus Torvalds : Rewrote bits to be sensible
20 * Alan Cox : Added BSD route gw semantics
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090021 * Alan Cox : Super /proc >4K
Linus Torvalds1da177e2005-04-16 15:20:36 -070022 * Alan Cox : MTU in route table
23 * Alan Cox : MSS actually. Also added the window
24 * clamper.
25 * Sam Lantinga : Fixed route matching in rt_del()
26 * Alan Cox : Routing cache support.
27 * Alan Cox : Removed compatibility cruft.
28 * Alan Cox : RTF_REJECT support.
29 * Alan Cox : TCP irtt support.
30 * Jonathan Naylor : Added Metric support.
31 * Miquel van Smoorenburg : BSD API fixes.
32 * Miquel van Smoorenburg : Metrics.
33 * Alan Cox : Use __u32 properly
34 * Alan Cox : Aligned routing errors more closely with BSD
35 * our system is still very different.
36 * Alan Cox : Faster /proc handling
37 * Alexey Kuznetsov : Massive rework to support tree based routing,
38 * routing caches and better behaviour.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090039 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 * Olaf Erb : irtt wasn't being copied right.
41 * Bjorn Ekwall : Kerneld route support.
42 * Alan Cox : Multicast fixed (I hope)
43 * Pavel Krauz : Limited broadcast fixed
44 * Mike McLagan : Routing by source
45 * Alexey Kuznetsov : End of old history. Split to fib.c and
46 * route.c and rewritten from scratch.
47 * Andi Kleen : Load-limit warning messages.
48 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
49 * Vitaly E. Lavrov : Race condition in ip_route_input_slow.
50 * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow.
51 * Vladimir V. Ivanov : IP rule info (flowid) is really useful.
52 * Marc Boucher : routing by fwmark
53 * Robert Olsson : Added rt_cache statistics
54 * Arnaldo C. Melo : Convert proc stuff to seq_file
Eric Dumazetbb1d23b2005-07-05 15:00:32 -070055 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
Ilia Sotnikovcef26852006-03-25 01:38:55 -080056 * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect
57 * Ilia Sotnikov : Removed TOS from hash calculations
Linus Torvalds1da177e2005-04-16 15:20:36 -070058 *
59 * This program is free software; you can redistribute it and/or
60 * modify it under the terms of the GNU General Public License
61 * as published by the Free Software Foundation; either version
62 * 2 of the License, or (at your option) any later version.
63 */
64
Joe Perchesafd465032012-03-12 07:03:32 +000065#define pr_fmt(fmt) "IPv4: " fmt
66
Linus Torvalds1da177e2005-04-16 15:20:36 -070067#include <linux/module.h>
Linus Torvalds7c0f6ba2016-12-24 11:46:01 -080068#include <linux/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070069#include <linux/bitops.h>
70#include <linux/types.h>
71#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070072#include <linux/mm.h>
73#include <linux/string.h>
74#include <linux/socket.h>
75#include <linux/sockios.h>
76#include <linux/errno.h>
77#include <linux/in.h>
78#include <linux/inet.h>
79#include <linux/netdevice.h>
80#include <linux/proc_fs.h>
81#include <linux/init.h>
82#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070083#include <linux/inetdevice.h>
84#include <linux/igmp.h>
85#include <linux/pkt_sched.h>
86#include <linux/mroute.h>
87#include <linux/netfilter_ipv4.h>
88#include <linux/random.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070089#include <linux/rcupdate.h>
90#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090091#include <linux/slab.h>
Eric Dumazet73f156a2014-06-02 05:26:03 -070092#include <linux/jhash.h>
Herbert Xu352e5122007-11-13 21:34:06 -080093#include <net/dst.h>
Thomas Graf1b7179d2015-07-21 10:43:59 +020094#include <net/dst_metadata.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020095#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070096#include <net/protocol.h>
97#include <net/ip.h>
98#include <net/route.h>
99#include <net/inetpeer.h>
100#include <net/sock.h>
101#include <net/ip_fib.h>
102#include <net/arp.h>
103#include <net/tcp.h>
104#include <net/icmp.h>
105#include <net/xfrm.h>
Roopa Prabhu571e7222015-07-21 10:43:47 +0200106#include <net/lwtunnel.h>
Tom Tucker8d717402006-07-30 20:43:36 -0700107#include <net/netevent.h>
Thomas Graf63f34442007-03-22 11:55:17 -0700108#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109#ifdef CONFIG_SYSCTL
110#include <linux/sysctl.h>
Shan Wei7426a562012-04-18 18:05:46 +0000111#include <linux/kmemleak.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112#endif
David S. Miller6e5714e2011-08-03 20:50:44 -0700113#include <net/secure_seq.h>
Thomas Graf1b7179d2015-07-21 10:43:59 +0200114#include <net/ip_tunnels.h>
David Ahern385add92015-09-29 20:07:13 -0700115#include <net/l3mdev.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116
David S. Miller68a5e3d2011-03-11 20:07:33 -0500117#define RT_FL_TOS(oldflp4) \
Julian Anastasovf61759e2011-12-02 11:39:42 +0000118 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120#define RT_GC_TIMEOUT (300*HZ)
121
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122static int ip_rt_max_size;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700123static int ip_rt_redirect_number __read_mostly = 9;
124static int ip_rt_redirect_load __read_mostly = HZ / 50;
125static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
126static int ip_rt_error_cost __read_mostly = HZ;
127static int ip_rt_error_burst __read_mostly = 5 * HZ;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700128static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
129static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
130static int ip_rt_min_advmss __read_mostly = 256;
Eric Dumazet9f28a2f2011-12-21 15:47:16 -0500131
Xin Longdeed49d2016-02-18 21:21:19 +0800132static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133/*
134 * Interface to generic destination cache.
135 */
136
137static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -0800138static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
Steffen Klassertebb762f2011-11-23 02:12:51 +0000139static unsigned int ipv4_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
141static void ipv4_link_failure(struct sk_buff *skb);
David S. Miller6700c272012-07-17 03:29:28 -0700142static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
143 struct sk_buff *skb, u32 mtu);
144static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
145 struct sk_buff *skb);
David S. Millercaacf052012-07-31 15:06:50 -0700146static void ipv4_dst_destroy(struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147
David S. Miller62fa8a82011-01-26 20:51:05 -0800148static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
149{
David S. Miller31248732012-07-10 07:08:18 -0700150 WARN_ON(1);
151 return NULL;
David S. Miller62fa8a82011-01-26 20:51:05 -0800152}
153
David S. Millerf894cbf2012-07-02 21:52:24 -0700154static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
155 struct sk_buff *skb,
156 const void *daddr);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700157
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158static struct dst_ops ipv4_dst_ops = {
159 .family = AF_INET,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160 .check = ipv4_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800161 .default_advmss = ipv4_default_advmss,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000162 .mtu = ipv4_mtu,
David S. Miller62fa8a82011-01-26 20:51:05 -0800163 .cow_metrics = ipv4_cow_metrics,
David S. Millercaacf052012-07-31 15:06:50 -0700164 .destroy = ipv4_dst_destroy,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700165 .negative_advice = ipv4_negative_advice,
166 .link_failure = ipv4_link_failure,
167 .update_pmtu = ip_rt_update_pmtu,
David S. Millere47a1852012-07-11 20:55:47 -0700168 .redirect = ip_do_redirect,
Eric W. Biedermanb92dacd2015-10-07 16:48:37 -0500169 .local_out = __ip_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700170 .neigh_lookup = ipv4_neigh_lookup,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171};
172
173#define ECN_OR_COST(class) TC_PRIO_##class
174
Philippe De Muyter4839c522007-07-09 15:32:57 -0700175const __u8 ip_tos2prio[16] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176 TC_PRIO_BESTEFFORT,
Dan Siemon4a2b9c32011-03-15 13:56:07 +0000177 ECN_OR_COST(BESTEFFORT),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178 TC_PRIO_BESTEFFORT,
179 ECN_OR_COST(BESTEFFORT),
180 TC_PRIO_BULK,
181 ECN_OR_COST(BULK),
182 TC_PRIO_BULK,
183 ECN_OR_COST(BULK),
184 TC_PRIO_INTERACTIVE,
185 ECN_OR_COST(INTERACTIVE),
186 TC_PRIO_INTERACTIVE,
187 ECN_OR_COST(INTERACTIVE),
188 TC_PRIO_INTERACTIVE_BULK,
189 ECN_OR_COST(INTERACTIVE_BULK),
190 TC_PRIO_INTERACTIVE_BULK,
191 ECN_OR_COST(INTERACTIVE_BULK)
192};
Amir Vadaid4a96862012-04-04 21:33:28 +0000193EXPORT_SYMBOL(ip_tos2prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194
Eric Dumazet2f970d82006-01-17 02:54:36 -0800195static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
Christoph Lameter3ed66e92014-04-07 15:39:40 -0700196#define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197
Linus Torvalds1da177e2005-04-16 15:20:36 -0700198#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
200{
Eric Dumazet29e75252008-01-31 17:05:09 -0800201 if (*pos)
David S. Miller89aef892012-07-17 11:00:09 -0700202 return NULL;
Eric Dumazet29e75252008-01-31 17:05:09 -0800203 return SEQ_START_TOKEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204}
205
206static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
207{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 ++*pos;
David S. Miller89aef892012-07-17 11:00:09 -0700209 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210}
211
212static void rt_cache_seq_stop(struct seq_file *seq, void *v)
213{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214}
215
216static int rt_cache_seq_show(struct seq_file *seq, void *v)
217{
218 if (v == SEQ_START_TOKEN)
219 seq_printf(seq, "%-127s\n",
220 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
221 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
222 "HHUptod\tSpecDst");
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900223 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700224}
225
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700226static const struct seq_operations rt_cache_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227 .start = rt_cache_seq_start,
228 .next = rt_cache_seq_next,
229 .stop = rt_cache_seq_stop,
230 .show = rt_cache_seq_show,
231};
232
233static int rt_cache_seq_open(struct inode *inode, struct file *file)
234{
David S. Miller89aef892012-07-17 11:00:09 -0700235 return seq_open(file, &rt_cache_seq_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236}
237
Arjan van de Ven9a321442007-02-12 00:55:35 -0800238static const struct file_operations rt_cache_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239 .owner = THIS_MODULE,
240 .open = rt_cache_seq_open,
241 .read = seq_read,
242 .llseek = seq_lseek,
David S. Miller89aef892012-07-17 11:00:09 -0700243 .release = seq_release,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244};
245
246
247static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
248{
249 int cpu;
250
251 if (*pos == 0)
252 return SEQ_START_TOKEN;
253
Rusty Russell0f23174a2008-12-29 12:23:42 +0000254 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 if (!cpu_possible(cpu))
256 continue;
257 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800258 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 }
260 return NULL;
261}
262
263static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
264{
265 int cpu;
266
Rusty Russell0f23174a2008-12-29 12:23:42 +0000267 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268 if (!cpu_possible(cpu))
269 continue;
270 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800271 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272 }
273 return NULL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900274
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275}
276
277static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
278{
279
280}
281
282static int rt_cpu_seq_show(struct seq_file *seq, void *v)
283{
284 struct rt_cache_stat *st = v;
285
286 if (v == SEQ_START_TOKEN) {
Olaf Rempel5bec0032005-04-28 12:16:08 -0700287 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700288 return 0;
289 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900290
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
292 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
Eric Dumazetfc66f952010-10-08 06:37:34 +0000293 dst_entries_get_slow(&ipv4_dst_ops),
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700294 0, /* st->in_hit */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295 st->in_slow_tot,
296 st->in_slow_mc,
297 st->in_no_route,
298 st->in_brd,
299 st->in_martian_dst,
300 st->in_martian_src,
301
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700302 0, /* st->out_hit */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303 st->out_slow_tot,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900304 st->out_slow_mc,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700306 0, /* st->gc_total */
307 0, /* st->gc_ignored */
308 0, /* st->gc_goal_miss */
309 0, /* st->gc_dst_overflow */
310 0, /* st->in_hlist_search */
311 0 /* st->out_hlist_search */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312 );
313 return 0;
314}
315
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700316static const struct seq_operations rt_cpu_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317 .start = rt_cpu_seq_start,
318 .next = rt_cpu_seq_next,
319 .stop = rt_cpu_seq_stop,
320 .show = rt_cpu_seq_show,
321};
322
323
324static int rt_cpu_seq_open(struct inode *inode, struct file *file)
325{
326 return seq_open(file, &rt_cpu_seq_ops);
327}
328
Arjan van de Ven9a321442007-02-12 00:55:35 -0800329static const struct file_operations rt_cpu_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330 .owner = THIS_MODULE,
331 .open = rt_cpu_seq_open,
332 .read = seq_read,
333 .llseek = seq_lseek,
334 .release = seq_release,
335};
336
Patrick McHardyc7066f72011-01-14 13:36:42 +0100337#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800338static int rt_acct_proc_show(struct seq_file *m, void *v)
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800339{
Alexey Dobriyana661c412009-11-25 15:40:35 -0800340 struct ip_rt_acct *dst, *src;
341 unsigned int i, j;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800342
Alexey Dobriyana661c412009-11-25 15:40:35 -0800343 dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
344 if (!dst)
345 return -ENOMEM;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800346
Alexey Dobriyana661c412009-11-25 15:40:35 -0800347 for_each_possible_cpu(i) {
348 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
349 for (j = 0; j < 256; j++) {
350 dst[j].o_bytes += src[j].o_bytes;
351 dst[j].o_packets += src[j].o_packets;
352 dst[j].i_bytes += src[j].i_bytes;
353 dst[j].i_packets += src[j].i_packets;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800354 }
355 }
Alexey Dobriyana661c412009-11-25 15:40:35 -0800356
357 seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
358 kfree(dst);
359 return 0;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800360}
Alexey Dobriyana661c412009-11-25 15:40:35 -0800361
362static int rt_acct_proc_open(struct inode *inode, struct file *file)
363{
364 return single_open(file, rt_acct_proc_show, NULL);
365}
366
367static const struct file_operations rt_acct_proc_fops = {
368 .owner = THIS_MODULE,
369 .open = rt_acct_proc_open,
370 .read = seq_read,
371 .llseek = seq_lseek,
372 .release = single_release,
373};
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800374#endif
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800375
Denis V. Lunev73b38712008-02-28 20:51:18 -0800376static int __net_init ip_rt_do_proc_init(struct net *net)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800377{
378 struct proc_dir_entry *pde;
379
Gao fengd4beaa62013-02-18 01:34:54 +0000380 pde = proc_create("rt_cache", S_IRUGO, net->proc_net,
381 &rt_cache_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800382 if (!pde)
383 goto err1;
384
Wang Chen77020722008-02-28 14:14:25 -0800385 pde = proc_create("rt_cache", S_IRUGO,
386 net->proc_net_stat, &rt_cpu_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800387 if (!pde)
388 goto err2;
389
Patrick McHardyc7066f72011-01-14 13:36:42 +0100390#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800391 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800392 if (!pde)
393 goto err3;
394#endif
395 return 0;
396
Patrick McHardyc7066f72011-01-14 13:36:42 +0100397#ifdef CONFIG_IP_ROUTE_CLASSID
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800398err3:
399 remove_proc_entry("rt_cache", net->proc_net_stat);
400#endif
401err2:
402 remove_proc_entry("rt_cache", net->proc_net);
403err1:
404 return -ENOMEM;
405}
Denis V. Lunev73b38712008-02-28 20:51:18 -0800406
407static void __net_exit ip_rt_do_proc_exit(struct net *net)
408{
409 remove_proc_entry("rt_cache", net->proc_net_stat);
410 remove_proc_entry("rt_cache", net->proc_net);
Patrick McHardyc7066f72011-01-14 13:36:42 +0100411#ifdef CONFIG_IP_ROUTE_CLASSID
Denis V. Lunev73b38712008-02-28 20:51:18 -0800412 remove_proc_entry("rt_acct", net->proc_net);
Alexey Dobriyan0a931ac2010-01-17 03:32:50 +0000413#endif
Denis V. Lunev73b38712008-02-28 20:51:18 -0800414}
415
416static struct pernet_operations ip_rt_proc_ops __net_initdata = {
417 .init = ip_rt_do_proc_init,
418 .exit = ip_rt_do_proc_exit,
419};
420
421static int __init ip_rt_proc_init(void)
422{
423 return register_pernet_subsys(&ip_rt_proc_ops);
424}
425
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800426#else
Denis V. Lunev73b38712008-02-28 20:51:18 -0800427static inline int ip_rt_proc_init(void)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800428{
429 return 0;
430}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431#endif /* CONFIG_PROC_FS */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900432
Eric Dumazet4331deb2012-07-25 05:11:23 +0000433static inline bool rt_is_expired(const struct rtable *rth)
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700434{
fan.duca4c3fc2013-07-30 08:33:53 +0800435 return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev));
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700436}
437
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +0000438void rt_cache_flush(struct net *net)
Eric Dumazet29e75252008-01-31 17:05:09 -0800439{
fan.duca4c3fc2013-07-30 08:33:53 +0800440 rt_genid_bump_ipv4(net);
Eric Dumazet98376382010-03-08 03:20:00 +0000441}
442
David S. Millerf894cbf2012-07-02 21:52:24 -0700443static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
444 struct sk_buff *skb,
445 const void *daddr)
David Miller3769cff2011-07-11 22:44:24 +0000446{
David S. Millerd3aaeb32011-07-18 00:40:17 -0700447 struct net_device *dev = dst->dev;
448 const __be32 *pkey = daddr;
David S. Miller39232972012-01-26 15:22:32 -0500449 const struct rtable *rt;
David Miller3769cff2011-07-11 22:44:24 +0000450 struct neighbour *n;
451
David S. Miller39232972012-01-26 15:22:32 -0500452 rt = (const struct rtable *) dst;
David S. Millera263b302012-07-02 02:02:15 -0700453 if (rt->rt_gateway)
David S. Miller39232972012-01-26 15:22:32 -0500454 pkey = (const __be32 *) &rt->rt_gateway;
David S. Millerf894cbf2012-07-02 21:52:24 -0700455 else if (skb)
456 pkey = &ip_hdr(skb)->daddr;
David S. Millerd3aaeb32011-07-18 00:40:17 -0700457
David S. Miller80703d22012-02-15 17:48:35 -0500458 n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700459 if (n)
460 return n;
David Miller32092ec2011-07-25 00:01:41 +0000461 return neigh_create(&arp_tbl, pkey, dev);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700462}
463
Eric Dumazet04ca6972014-07-26 08:58:10 +0200464#define IP_IDENTS_SZ 2048u
Eric Dumazet04ca6972014-07-26 08:58:10 +0200465
Eric Dumazet355b590c2015-05-01 10:37:49 -0700466static atomic_t *ip_idents __read_mostly;
467static u32 *ip_tstamps __read_mostly;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200468
469/* In order to protect privacy, we add a perturbation to identifiers
470 * if one generator is seldom used. This makes hard for an attacker
471 * to infer how many packets were sent between two points in time.
472 */
473u32 ip_idents_reserve(u32 hash, int segs)
474{
Eric Dumazet355b590c2015-05-01 10:37:49 -0700475 u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
476 atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
477 u32 old = ACCESS_ONCE(*p_tstamp);
Eric Dumazet04ca6972014-07-26 08:58:10 +0200478 u32 now = (u32)jiffies;
Eric Dumazetadb03112016-09-20 18:06:17 -0700479 u32 new, delta = 0;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200480
Eric Dumazet355b590c2015-05-01 10:37:49 -0700481 if (old != now && cmpxchg(p_tstamp, old, now) == old)
Eric Dumazet04ca6972014-07-26 08:58:10 +0200482 delta = prandom_u32_max(now - old);
483
Eric Dumazetadb03112016-09-20 18:06:17 -0700484 /* Do not use atomic_add_return() as it makes UBSAN unhappy */
485 do {
486 old = (u32)atomic_read(p_id);
487 new = old + delta + segs;
488 } while (atomic_cmpxchg(p_id, old, new) != old);
489
490 return new - segs;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200491}
492EXPORT_SYMBOL(ip_idents_reserve);
Eric Dumazet73f156a2014-06-02 05:26:03 -0700493
Hannes Frederic Sowab6a77192015-03-25 17:07:44 +0100494void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495{
Eric Dumazet73f156a2014-06-02 05:26:03 -0700496 static u32 ip_idents_hashrnd __read_mostly;
497 u32 hash, id;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498
Eric Dumazet73f156a2014-06-02 05:26:03 -0700499 net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500
Eric Dumazet04ca6972014-07-26 08:58:10 +0200501 hash = jhash_3words((__force u32)iph->daddr,
502 (__force u32)iph->saddr,
Hannes Frederic Sowab6a77192015-03-25 17:07:44 +0100503 iph->protocol ^ net_hash_mix(net),
Eric Dumazet04ca6972014-07-26 08:58:10 +0200504 ip_idents_hashrnd);
Eric Dumazet73f156a2014-06-02 05:26:03 -0700505 id = ip_idents_reserve(hash, segs);
506 iph->id = htons(id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700507}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000508EXPORT_SYMBOL(__ip_select_ident);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900510static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
511 const struct sock *sk,
David S. Miller4895c772012-07-17 04:19:00 -0700512 const struct iphdr *iph,
513 int oif, u8 tos,
514 u8 prot, u32 mark, int flow_flags)
515{
516 if (sk) {
517 const struct inet_sock *inet = inet_sk(sk);
518
519 oif = sk->sk_bound_dev_if;
520 mark = sk->sk_mark;
521 tos = RT_CONN_FLAGS(sk);
522 prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
523 }
524 flowi4_init_output(fl4, oif, mark, tos,
525 RT_SCOPE_UNIVERSE, prot,
526 flow_flags,
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900527 iph->daddr, iph->saddr, 0, 0,
528 sock_net_uid(net, sk));
David S. Miller4895c772012-07-17 04:19:00 -0700529}
530
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200531static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
532 const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700533{
Lorenzo Colittid109e612016-11-30 02:56:47 +0900534 const struct net *net = dev_net(skb->dev);
David S. Miller4895c772012-07-17 04:19:00 -0700535 const struct iphdr *iph = ip_hdr(skb);
536 int oif = skb->dev->ifindex;
537 u8 tos = RT_TOS(iph->tos);
538 u8 prot = iph->protocol;
539 u32 mark = skb->mark;
540
Lorenzo Colittid109e612016-11-30 02:56:47 +0900541 __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0);
David S. Miller4895c772012-07-17 04:19:00 -0700542}
543
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200544static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700545{
546 const struct inet_sock *inet = inet_sk(sk);
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200547 const struct ip_options_rcu *inet_opt;
David S. Miller4895c772012-07-17 04:19:00 -0700548 __be32 daddr = inet->inet_daddr;
549
550 rcu_read_lock();
551 inet_opt = rcu_dereference(inet->inet_opt);
552 if (inet_opt && inet_opt->opt.srr)
553 daddr = inet_opt->opt.faddr;
554 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
555 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
556 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
557 inet_sk_flowi_flags(sk),
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900558 daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
David S. Miller4895c772012-07-17 04:19:00 -0700559 rcu_read_unlock();
560}
561
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200562static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
563 const struct sk_buff *skb)
David S. Miller4895c772012-07-17 04:19:00 -0700564{
565 if (skb)
566 build_skb_flow_key(fl4, skb, sk);
567 else
568 build_sk_flow_key(fl4, sk);
569}
570
David S. Millerc5038a82012-07-31 15:02:02 -0700571static inline void rt_free(struct rtable *rt)
572{
573 call_rcu(&rt->dst.rcu_head, dst_rcu_free);
574}
575
576static DEFINE_SPINLOCK(fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700577
Timo Teräs2ffae992013-06-27 10:27:05 +0300578static void fnhe_flush_routes(struct fib_nh_exception *fnhe)
579{
580 struct rtable *rt;
581
582 rt = rcu_dereference(fnhe->fnhe_rth_input);
583 if (rt) {
584 RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL);
585 rt_free(rt);
586 }
587 rt = rcu_dereference(fnhe->fnhe_rth_output);
588 if (rt) {
589 RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL);
590 rt_free(rt);
591 }
592}
593
Julian Anastasovaee06da2012-07-18 10:15:35 +0000594static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
David S. Miller4895c772012-07-17 04:19:00 -0700595{
596 struct fib_nh_exception *fnhe, *oldest;
597
598 oldest = rcu_dereference(hash->chain);
599 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
600 fnhe = rcu_dereference(fnhe->fnhe_next)) {
601 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
602 oldest = fnhe;
603 }
Timo Teräs2ffae992013-06-27 10:27:05 +0300604 fnhe_flush_routes(oldest);
David S. Miller4895c772012-07-17 04:19:00 -0700605 return oldest;
606}
607
David S. Millerd3a25c92012-07-17 13:23:08 -0700608static inline u32 fnhe_hashfun(__be32 daddr)
609{
Eric Dumazetd546c622014-09-04 08:21:31 -0700610 static u32 fnhe_hashrnd __read_mostly;
David S. Millerd3a25c92012-07-17 13:23:08 -0700611 u32 hval;
612
Eric Dumazetd546c622014-09-04 08:21:31 -0700613 net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd));
614 hval = jhash_1word((__force u32) daddr, fnhe_hashrnd);
615 return hash_32(hval, FNHE_HASH_SHIFT);
David S. Millerd3a25c92012-07-17 13:23:08 -0700616}
617
Timo Teräs387aa652013-05-27 20:46:31 +0000618static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
619{
620 rt->rt_pmtu = fnhe->fnhe_pmtu;
621 rt->dst.expires = fnhe->fnhe_expires;
622
623 if (fnhe->fnhe_gw) {
624 rt->rt_flags |= RTCF_REDIRECTED;
625 rt->rt_gateway = fnhe->fnhe_gw;
626 rt->rt_uses_gateway = 1;
627 }
628}
629
Julian Anastasovaee06da2012-07-18 10:15:35 +0000630static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
631 u32 pmtu, unsigned long expires)
David S. Miller4895c772012-07-17 04:19:00 -0700632{
Julian Anastasovaee06da2012-07-18 10:15:35 +0000633 struct fnhe_hash_bucket *hash;
David S. Miller4895c772012-07-17 04:19:00 -0700634 struct fib_nh_exception *fnhe;
Timo Teräs387aa652013-05-27 20:46:31 +0000635 struct rtable *rt;
636 unsigned int i;
David S. Miller4895c772012-07-17 04:19:00 -0700637 int depth;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000638 u32 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -0700639
David S. Millerc5038a82012-07-31 15:02:02 -0700640 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000641
Eric Dumazetcaa41522014-09-03 22:21:56 -0700642 hash = rcu_dereference(nh->nh_exceptions);
David S. Miller4895c772012-07-17 04:19:00 -0700643 if (!hash) {
Julian Anastasovaee06da2012-07-18 10:15:35 +0000644 hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC);
David S. Miller4895c772012-07-17 04:19:00 -0700645 if (!hash)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000646 goto out_unlock;
Eric Dumazetcaa41522014-09-03 22:21:56 -0700647 rcu_assign_pointer(nh->nh_exceptions, hash);
David S. Miller4895c772012-07-17 04:19:00 -0700648 }
649
David S. Miller4895c772012-07-17 04:19:00 -0700650 hash += hval;
651
652 depth = 0;
653 for (fnhe = rcu_dereference(hash->chain); fnhe;
654 fnhe = rcu_dereference(fnhe->fnhe_next)) {
655 if (fnhe->fnhe_daddr == daddr)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000656 break;
David S. Miller4895c772012-07-17 04:19:00 -0700657 depth++;
658 }
659
Julian Anastasovaee06da2012-07-18 10:15:35 +0000660 if (fnhe) {
661 if (gw)
662 fnhe->fnhe_gw = gw;
663 if (pmtu) {
664 fnhe->fnhe_pmtu = pmtu;
Timo Teräs387aa652013-05-27 20:46:31 +0000665 fnhe->fnhe_expires = max(1UL, expires);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000666 }
Timo Teräs387aa652013-05-27 20:46:31 +0000667 /* Update all cached dsts too */
Timo Teräs2ffae992013-06-27 10:27:05 +0300668 rt = rcu_dereference(fnhe->fnhe_rth_input);
669 if (rt)
670 fill_route_from_fnhe(rt, fnhe);
671 rt = rcu_dereference(fnhe->fnhe_rth_output);
Timo Teräs387aa652013-05-27 20:46:31 +0000672 if (rt)
673 fill_route_from_fnhe(rt, fnhe);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000674 } else {
675 if (depth > FNHE_RECLAIM_DEPTH)
676 fnhe = fnhe_oldest(hash);
677 else {
678 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
679 if (!fnhe)
680 goto out_unlock;
681
682 fnhe->fnhe_next = hash->chain;
683 rcu_assign_pointer(hash->chain, fnhe);
684 }
Timo Teräs5aad1de2013-05-27 20:46:33 +0000685 fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev));
Julian Anastasovaee06da2012-07-18 10:15:35 +0000686 fnhe->fnhe_daddr = daddr;
687 fnhe->fnhe_gw = gw;
688 fnhe->fnhe_pmtu = pmtu;
689 fnhe->fnhe_expires = expires;
Timo Teräs387aa652013-05-27 20:46:31 +0000690
691 /* Exception created; mark the cached routes for the nexthop
692 * stale, so anyone caching it rechecks if this exception
693 * applies to them.
694 */
Timo Teräs2ffae992013-06-27 10:27:05 +0300695 rt = rcu_dereference(nh->nh_rth_input);
696 if (rt)
697 rt->dst.obsolete = DST_OBSOLETE_KILL;
698
Timo Teräs387aa652013-05-27 20:46:31 +0000699 for_each_possible_cpu(i) {
700 struct rtable __rcu **prt;
701 prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i);
702 rt = rcu_dereference(*prt);
703 if (rt)
704 rt->dst.obsolete = DST_OBSOLETE_KILL;
705 }
David S. Miller4895c772012-07-17 04:19:00 -0700706 }
David S. Miller4895c772012-07-17 04:19:00 -0700707
David S. Miller4895c772012-07-17 04:19:00 -0700708 fnhe->fnhe_stamp = jiffies;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000709
710out_unlock:
David S. Millerc5038a82012-07-31 15:02:02 -0700711 spin_unlock_bh(&fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700712}
713
David S. Millerceb33202012-07-17 11:31:28 -0700714static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
715 bool kill_route)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716{
David S. Millere47a1852012-07-11 20:55:47 -0700717 __be32 new_gw = icmp_hdr(skb)->un.gateway;
David S. Miller94206122012-07-11 20:38:08 -0700718 __be32 old_gw = ip_hdr(skb)->saddr;
David S. Millere47a1852012-07-11 20:55:47 -0700719 struct net_device *dev = skb->dev;
David S. Millere47a1852012-07-11 20:55:47 -0700720 struct in_device *in_dev;
David S. Miller4895c772012-07-17 04:19:00 -0700721 struct fib_result res;
David S. Millere47a1852012-07-11 20:55:47 -0700722 struct neighbour *n;
Denis V. Lunev317805b2008-02-28 20:50:06 -0800723 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700724
David S. Miller94206122012-07-11 20:38:08 -0700725 switch (icmp_hdr(skb)->code & 7) {
726 case ICMP_REDIR_NET:
727 case ICMP_REDIR_NETTOS:
728 case ICMP_REDIR_HOST:
729 case ICMP_REDIR_HOSTTOS:
730 break;
731
732 default:
733 return;
734 }
735
David S. Millere47a1852012-07-11 20:55:47 -0700736 if (rt->rt_gateway != old_gw)
737 return;
738
739 in_dev = __in_dev_get_rcu(dev);
740 if (!in_dev)
741 return;
742
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900743 net = dev_net(dev);
Joe Perches9d4fb272009-11-23 10:41:23 -0800744 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
745 ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
746 ipv4_is_zeronet(new_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747 goto reject_redirect;
748
749 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
750 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
751 goto reject_redirect;
752 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
753 goto reject_redirect;
754 } else {
Denis V. Lunev317805b2008-02-28 20:50:06 -0800755 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756 goto reject_redirect;
757 }
758
Stephen Suryaputra Lin969447f2016-11-10 11:16:15 -0500759 n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
760 if (!n)
761 n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
WANG Cong2c1a4312014-09-24 17:07:53 -0700762 if (!IS_ERR(n)) {
David S. Millere47a1852012-07-11 20:55:47 -0700763 if (!(n->nud_state & NUD_VALID)) {
764 neigh_event_send(n, NULL);
765 } else {
Andy Gospodarek0eeb0752015-06-23 13:45:37 -0400766 if (fib_lookup(net, fl4, &res, 0) == 0) {
David S. Miller4895c772012-07-17 04:19:00 -0700767 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -0700768
Julian Anastasovaee06da2012-07-18 10:15:35 +0000769 update_or_create_fnhe(nh, fl4->daddr, new_gw,
Xin Longdeed49d2016-02-18 21:21:19 +0800770 0, jiffies + ip_rt_gc_timeout);
David S. Miller4895c772012-07-17 04:19:00 -0700771 }
David S. Millerceb33202012-07-17 11:31:28 -0700772 if (kill_route)
773 rt->dst.obsolete = DST_OBSOLETE_KILL;
David S. Millere47a1852012-07-11 20:55:47 -0700774 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
775 }
776 neigh_release(n);
777 }
778 return;
779
780reject_redirect:
781#ifdef CONFIG_IP_ROUTE_VERBOSE
David S. Miller99ee0382012-07-12 07:40:05 -0700782 if (IN_DEV_LOG_MARTIANS(in_dev)) {
783 const struct iphdr *iph = (const struct iphdr *) skb->data;
784 __be32 daddr = iph->daddr;
785 __be32 saddr = iph->saddr;
786
David S. Millere47a1852012-07-11 20:55:47 -0700787 net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
788 " Advised path = %pI4 -> %pI4\n",
789 &old_gw, dev->name, &new_gw,
790 &saddr, &daddr);
David S. Miller99ee0382012-07-12 07:40:05 -0700791 }
David S. Millere47a1852012-07-11 20:55:47 -0700792#endif
793 ;
794}
795
David S. Miller4895c772012-07-17 04:19:00 -0700796static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
797{
798 struct rtable *rt;
799 struct flowi4 fl4;
Michal Kubecekf96ef982013-05-28 08:26:49 +0200800 const struct iphdr *iph = (const struct iphdr *) skb->data;
Lorenzo Colitti7d995692016-12-23 00:33:57 +0900801 struct net *net = dev_net(skb->dev);
Michal Kubecekf96ef982013-05-28 08:26:49 +0200802 int oif = skb->dev->ifindex;
803 u8 tos = RT_TOS(iph->tos);
804 u8 prot = iph->protocol;
805 u32 mark = skb->mark;
David S. Miller4895c772012-07-17 04:19:00 -0700806
807 rt = (struct rtable *) dst;
808
Lorenzo Colitti7d995692016-12-23 00:33:57 +0900809 __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
David S. Millerceb33202012-07-17 11:31:28 -0700810 __ip_do_redirect(rt, skb, &fl4, true);
David S. Miller4895c772012-07-17 04:19:00 -0700811}
812
Linus Torvalds1da177e2005-04-16 15:20:36 -0700813static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
814{
Eric Dumazetee6b9672008-03-05 18:30:47 -0800815 struct rtable *rt = (struct rtable *)dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700816 struct dst_entry *ret = dst;
817
818 if (rt) {
Timo Teräsd11a4dc2010-03-18 23:20:20 +0000819 if (dst->obsolete > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700820 ip_rt_put(rt);
821 ret = NULL;
David S. Miller59436342012-07-10 06:58:42 -0700822 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
823 rt->dst.expires) {
David S. Miller89aef892012-07-17 11:00:09 -0700824 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825 ret = NULL;
826 }
827 }
828 return ret;
829}
830
831/*
832 * Algorithm:
833 * 1. The first ip_rt_redirect_number redirects are sent
834 * with exponential backoff, then we stop sending them at all,
835 * assuming that the host ignores our redirects.
836 * 2. If we did not see packets requiring redirects
837 * during ip_rt_redirect_silence, we assume that the host
838 * forgot redirected route and start to send redirects again.
839 *
840 * This algorithm is much cheaper and more intelligent than dumb load limiting
841 * in icmp.c.
842 *
843 * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
844 * and "frag. need" (breaks PMTU discovery) in icmp.c.
845 */
846
847void ip_rt_send_redirect(struct sk_buff *skb)
848{
Eric Dumazet511c3f92009-06-02 05:14:27 +0000849 struct rtable *rt = skb_rtable(skb);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700850 struct in_device *in_dev;
David S. Miller92d86822011-02-04 15:55:25 -0800851 struct inet_peer *peer;
David S. Miller1d861aa2012-07-10 03:58:16 -0700852 struct net *net;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700853 int log_martians;
David Ahern192132b2015-08-27 16:07:03 -0700854 int vif;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700855
Eric Dumazet30038fc2009-08-28 23:52:01 -0700856 rcu_read_lock();
Changli Gaod8d1f302010-06-10 23:31:35 -0700857 in_dev = __in_dev_get_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700858 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
859 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860 return;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700861 }
862 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
David Ahern385add92015-09-29 20:07:13 -0700863 vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700864 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865
David S. Miller1d861aa2012-07-10 03:58:16 -0700866 net = dev_net(rt->dst.dev);
David Ahern192132b2015-08-27 16:07:03 -0700867 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800868 if (!peer) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000869 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
870 rt_nexthop(rt, ip_hdr(skb)->daddr));
David S. Miller92d86822011-02-04 15:55:25 -0800871 return;
872 }
873
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874 /* No redirected packets during ip_rt_redirect_silence;
875 * reset the algorithm.
876 */
David S. Miller92d86822011-02-04 15:55:25 -0800877 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
878 peer->rate_tokens = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700879
880 /* Too many ignored redirects; do not send anything
Changli Gaod8d1f302010-06-10 23:31:35 -0700881 * set dst.rate_last to the last seen redirected packet.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700882 */
David S. Miller92d86822011-02-04 15:55:25 -0800883 if (peer->rate_tokens >= ip_rt_redirect_number) {
884 peer->rate_last = jiffies;
David S. Miller1d861aa2012-07-10 03:58:16 -0700885 goto out_put_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886 }
887
888 /* Check for load limit; set rate_last to the latest sent
889 * redirect.
890 */
David S. Miller92d86822011-02-04 15:55:25 -0800891 if (peer->rate_tokens == 0 ||
Li Yewang14fb8a72006-12-18 00:26:35 -0800892 time_after(jiffies,
David S. Miller92d86822011-02-04 15:55:25 -0800893 (peer->rate_last +
894 (ip_rt_redirect_load << peer->rate_tokens)))) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000895 __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
896
897 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
David S. Miller92d86822011-02-04 15:55:25 -0800898 peer->rate_last = jiffies;
899 ++peer->rate_tokens;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900#ifdef CONFIG_IP_ROUTE_VERBOSE
Eric Dumazet30038fc2009-08-28 23:52:01 -0700901 if (log_martians &&
Joe Perchese87cc472012-05-13 21:56:26 +0000902 peer->rate_tokens == ip_rt_redirect_number)
903 net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
David S. Miller92101b32012-07-23 16:29:00 -0700904 &ip_hdr(skb)->saddr, inet_iif(skb),
Julian Anastasove81da0e2012-10-08 11:41:15 +0000905 &ip_hdr(skb)->daddr, &gw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906#endif
907 }
David S. Miller1d861aa2012-07-10 03:58:16 -0700908out_put_peer:
909 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910}
911
912static int ip_error(struct sk_buff *skb)
913{
David S. Miller251da412012-06-26 16:27:09 -0700914 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
Eric Dumazet511c3f92009-06-02 05:14:27 +0000915 struct rtable *rt = skb_rtable(skb);
David S. Miller92d86822011-02-04 15:55:25 -0800916 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700917 unsigned long now;
David S. Miller251da412012-06-26 16:27:09 -0700918 struct net *net;
David S. Miller92d86822011-02-04 15:55:25 -0800919 bool send;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700920 int code;
921
Eric W. Biederman381c7592015-05-22 04:58:12 -0500922 /* IP on this device is disabled. */
923 if (!in_dev)
924 goto out;
925
David S. Miller251da412012-06-26 16:27:09 -0700926 net = dev_net(rt->dst.dev);
927 if (!IN_DEV_FORWARD(in_dev)) {
928 switch (rt->dst.error) {
929 case EHOSTUNREACH:
Eric Dumazetb45386e2016-04-27 16:44:35 -0700930 __IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS);
David S. Miller251da412012-06-26 16:27:09 -0700931 break;
932
933 case ENETUNREACH:
Eric Dumazetb45386e2016-04-27 16:44:35 -0700934 __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
David S. Miller251da412012-06-26 16:27:09 -0700935 break;
936 }
937 goto out;
938 }
939
Changli Gaod8d1f302010-06-10 23:31:35 -0700940 switch (rt->dst.error) {
Joe Perches4500ebf2011-07-01 09:43:07 +0000941 case EINVAL:
942 default:
943 goto out;
944 case EHOSTUNREACH:
945 code = ICMP_HOST_UNREACH;
946 break;
947 case ENETUNREACH:
948 code = ICMP_NET_UNREACH;
Eric Dumazetb45386e2016-04-27 16:44:35 -0700949 __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
Joe Perches4500ebf2011-07-01 09:43:07 +0000950 break;
951 case EACCES:
952 code = ICMP_PKT_FILTERED;
953 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700954 }
955
David Ahern192132b2015-08-27 16:07:03 -0700956 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
David Ahern385add92015-09-29 20:07:13 -0700957 l3mdev_master_ifindex(skb->dev), 1);
David S. Miller92d86822011-02-04 15:55:25 -0800958
959 send = true;
960 if (peer) {
961 now = jiffies;
962 peer->rate_tokens += now - peer->rate_last;
963 if (peer->rate_tokens > ip_rt_error_burst)
964 peer->rate_tokens = ip_rt_error_burst;
965 peer->rate_last = now;
966 if (peer->rate_tokens >= ip_rt_error_cost)
967 peer->rate_tokens -= ip_rt_error_cost;
968 else
969 send = false;
David S. Miller1d861aa2012-07-10 03:58:16 -0700970 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971 }
David S. Miller92d86822011-02-04 15:55:25 -0800972 if (send)
973 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700974
975out: kfree_skb(skb);
976 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900977}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978
Steffen Klassertd851c122012-10-07 22:47:25 +0000979static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700980{
Steffen Klassertd851c122012-10-07 22:47:25 +0000981 struct dst_entry *dst = &rt->dst;
David S. Miller4895c772012-07-17 04:19:00 -0700982 struct fib_result res;
David S. Miller2c8cec52011-02-09 20:42:07 -0800983
Steffen Klassertfa1e4922013-01-16 20:58:10 +0000984 if (dst_metric_locked(dst, RTAX_MTU))
985 return;
986
Herbert Xucb6ccf02015-04-28 11:43:15 +0800987 if (ipv4_mtu(dst) < mtu)
Li Wei3cdaa5b2015-01-29 16:09:03 +0800988 return;
989
David S. Miller59436342012-07-10 06:58:42 -0700990 if (mtu < ip_rt_min_pmtu)
991 mtu = ip_rt_min_pmtu;
Eric Dumazetfe6fe792011-06-08 06:07:07 +0000992
Timo Teräsf0162292013-05-27 20:46:32 +0000993 if (rt->rt_pmtu == mtu &&
994 time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
995 return;
996
Eric Dumazetc5ae7d42012-08-28 12:33:07 +0000997 rcu_read_lock();
Andy Gospodarek0eeb0752015-06-23 13:45:37 -0400998 if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
David S. Miller4895c772012-07-17 04:19:00 -0700999 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -07001000
Julian Anastasovaee06da2012-07-18 10:15:35 +00001001 update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
1002 jiffies + ip_rt_mtu_expires);
David S. Miller4895c772012-07-17 04:19:00 -07001003 }
Eric Dumazetc5ae7d42012-08-28 12:33:07 +00001004 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005}
1006
David S. Miller4895c772012-07-17 04:19:00 -07001007static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1008 struct sk_buff *skb, u32 mtu)
1009{
1010 struct rtable *rt = (struct rtable *) dst;
1011 struct flowi4 fl4;
1012
1013 ip_rt_build_flow_key(&fl4, sk, skb);
Steffen Klassertd851c122012-10-07 22:47:25 +00001014 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller4895c772012-07-17 04:19:00 -07001015}
1016
David S. Miller36393392012-06-14 22:21:46 -07001017void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
1018 int oif, u32 mark, u8 protocol, int flow_flags)
1019{
David S. Miller4895c772012-07-17 04:19:00 -07001020 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Miller36393392012-06-14 22:21:46 -07001021 struct flowi4 fl4;
1022 struct rtable *rt;
1023
Lorenzo Colitti1b3c61d2014-05-13 10:17:34 -07001024 if (!mark)
1025 mark = IP4_REPLY_MARK(net, skb->mark);
1026
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001027 __build_flow_key(net, &fl4, NULL, iph, oif,
David S. Miller4895c772012-07-17 04:19:00 -07001028 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Miller36393392012-06-14 22:21:46 -07001029 rt = __ip_route_output_key(net, &fl4);
1030 if (!IS_ERR(rt)) {
David S. Miller4895c772012-07-17 04:19:00 -07001031 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller36393392012-06-14 22:21:46 -07001032 ip_rt_put(rt);
1033 }
1034}
1035EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
1036
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001037static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
David S. Miller36393392012-06-14 22:21:46 -07001038{
David S. Miller4895c772012-07-17 04:19:00 -07001039 const struct iphdr *iph = (const struct iphdr *) skb->data;
1040 struct flowi4 fl4;
1041 struct rtable *rt;
David S. Miller36393392012-06-14 22:21:46 -07001042
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001043 __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0);
Lorenzo Colitti1b3c61d2014-05-13 10:17:34 -07001044
1045 if (!fl4.flowi4_mark)
1046 fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
1047
David S. Miller4895c772012-07-17 04:19:00 -07001048 rt = __ip_route_output_key(sock_net(sk), &fl4);
1049 if (!IS_ERR(rt)) {
1050 __ip_rt_update_pmtu(rt, &fl4, mtu);
1051 ip_rt_put(rt);
1052 }
David S. Miller36393392012-06-14 22:21:46 -07001053}
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001054
1055void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1056{
1057 const struct iphdr *iph = (const struct iphdr *) skb->data;
1058 struct flowi4 fl4;
1059 struct rtable *rt;
Eric Dumazet7f502362014-06-30 01:26:23 -07001060 struct dst_entry *odst = NULL;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001061 bool new = false;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001062 struct net *net = sock_net(sk);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001063
1064 bh_lock_sock(sk);
Hannes Frederic Sowa482fc602013-11-05 02:24:17 +01001065
1066 if (!ip_sk_accept_pmtu(sk))
1067 goto out;
1068
Eric Dumazet7f502362014-06-30 01:26:23 -07001069 odst = sk_dst_get(sk);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001070
Eric Dumazet7f502362014-06-30 01:26:23 -07001071 if (sock_owned_by_user(sk) || !odst) {
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001072 __ipv4_sk_update_pmtu(skb, sk, mtu);
1073 goto out;
1074 }
1075
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001076 __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001077
Eric Dumazet7f502362014-06-30 01:26:23 -07001078 rt = (struct rtable *)odst;
Ian Morris51456b22015-04-03 09:17:26 +01001079 if (odst->obsolete && !odst->ops->check(odst, 0)) {
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001080 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1081 if (IS_ERR(rt))
1082 goto out;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001083
1084 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001085 }
1086
1087 __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
1088
Eric Dumazet7f502362014-06-30 01:26:23 -07001089 if (!dst_check(&rt->dst, 0)) {
Steffen Klassertb44108d2013-01-22 00:01:28 +00001090 if (new)
1091 dst_release(&rt->dst);
1092
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001093 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1094 if (IS_ERR(rt))
1095 goto out;
1096
Steffen Klassertb44108d2013-01-22 00:01:28 +00001097 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001098 }
1099
Steffen Klassertb44108d2013-01-22 00:01:28 +00001100 if (new)
Eric Dumazet7f502362014-06-30 01:26:23 -07001101 sk_dst_set(sk, &rt->dst);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001102
1103out:
1104 bh_unlock_sock(sk);
Eric Dumazet7f502362014-06-30 01:26:23 -07001105 dst_release(odst);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001106}
David S. Miller36393392012-06-14 22:21:46 -07001107EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
David S. Millerf39925d2011-02-09 22:00:16 -08001108
David S. Millerb42597e2012-07-11 21:25:45 -07001109void ipv4_redirect(struct sk_buff *skb, struct net *net,
1110 int oif, u32 mark, u8 protocol, int flow_flags)
1111{
David S. Miller4895c772012-07-17 04:19:00 -07001112 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Millerb42597e2012-07-11 21:25:45 -07001113 struct flowi4 fl4;
1114 struct rtable *rt;
1115
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001116 __build_flow_key(net, &fl4, NULL, iph, oif,
David S. Miller4895c772012-07-17 04:19:00 -07001117 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Millerb42597e2012-07-11 21:25:45 -07001118 rt = __ip_route_output_key(net, &fl4);
1119 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001120 __ip_do_redirect(rt, skb, &fl4, false);
David S. Millerb42597e2012-07-11 21:25:45 -07001121 ip_rt_put(rt);
1122 }
1123}
1124EXPORT_SYMBOL_GPL(ipv4_redirect);
1125
1126void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
1127{
David S. Miller4895c772012-07-17 04:19:00 -07001128 const struct iphdr *iph = (const struct iphdr *) skb->data;
1129 struct flowi4 fl4;
1130 struct rtable *rt;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001131 struct net *net = sock_net(sk);
David S. Millerb42597e2012-07-11 21:25:45 -07001132
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001133 __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
1134 rt = __ip_route_output_key(net, &fl4);
David S. Miller4895c772012-07-17 04:19:00 -07001135 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001136 __ip_do_redirect(rt, skb, &fl4, false);
David S. Miller4895c772012-07-17 04:19:00 -07001137 ip_rt_put(rt);
1138 }
David S. Millerb42597e2012-07-11 21:25:45 -07001139}
1140EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1141
David S. Millerefbc368d2011-12-01 13:38:59 -05001142static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1143{
1144 struct rtable *rt = (struct rtable *) dst;
1145
David S. Millerceb33202012-07-17 11:31:28 -07001146 /* All IPV4 dsts are created with ->obsolete set to the value
1147 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1148 * into this function always.
1149 *
Timo Teräs387aa652013-05-27 20:46:31 +00001150 * When a PMTU/redirect information update invalidates a route,
1151 * this is indicated by setting obsolete to DST_OBSOLETE_KILL or
1152 * DST_OBSOLETE_DEAD by dst_free().
David S. Millerceb33202012-07-17 11:31:28 -07001153 */
Timo Teräs387aa652013-05-27 20:46:31 +00001154 if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
David S. Millerefbc368d2011-12-01 13:38:59 -05001155 return NULL;
Timo Teräsd11a4dc2010-03-18 23:20:20 +00001156 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001157}
1158
Linus Torvalds1da177e2005-04-16 15:20:36 -07001159static void ipv4_link_failure(struct sk_buff *skb)
1160{
1161 struct rtable *rt;
1162
1163 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1164
Eric Dumazet511c3f92009-06-02 05:14:27 +00001165 rt = skb_rtable(skb);
David S. Miller59436342012-07-10 06:58:42 -07001166 if (rt)
1167 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001168}
1169
Eric W. Biedermanede20592015-10-07 16:48:47 -05001170static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171{
Joe Perches91df42b2012-05-15 14:11:54 +00001172 pr_debug("%s: %pI4 -> %pI4, %s\n",
1173 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1174 skb->dev ? skb->dev->name : "?");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175 kfree_skb(skb);
Dave Jonesc378a9c2011-05-21 07:16:42 +00001176 WARN_ON(1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177 return 0;
1178}
1179
1180/*
1181 We do not cache source address of outgoing interface,
1182 because it is used only by IP RR, TS and SRR options,
1183 so that it out of fast path.
1184
1185 BTW remember: "addr" is allowed to be not aligned
1186 in IP options!
1187 */
1188
David S. Miller8e363602011-05-13 17:29:41 -04001189void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190{
Al Viroa61ced52006-09-26 21:27:54 -07001191 __be32 src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192
David S. Millerc7537962010-11-11 17:07:48 -08001193 if (rt_is_output_route(rt))
David S. Millerc5be24f2011-05-13 18:01:21 -04001194 src = ip_hdr(skb)->saddr;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001195 else {
David S. Miller8e363602011-05-13 17:29:41 -04001196 struct fib_result res;
1197 struct flowi4 fl4;
1198 struct iphdr *iph;
1199
1200 iph = ip_hdr(skb);
1201
1202 memset(&fl4, 0, sizeof(fl4));
1203 fl4.daddr = iph->daddr;
1204 fl4.saddr = iph->saddr;
Julian Anastasovb0fe4a32011-07-23 02:00:41 +00001205 fl4.flowi4_tos = RT_TOS(iph->tos);
David S. Miller8e363602011-05-13 17:29:41 -04001206 fl4.flowi4_oif = rt->dst.dev->ifindex;
1207 fl4.flowi4_iif = skb->dev->ifindex;
1208 fl4.flowi4_mark = skb->mark;
David S. Miller5e2b61f2011-03-04 21:47:09 -08001209
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001210 rcu_read_lock();
Andy Gospodarek0eeb0752015-06-23 13:45:37 -04001211 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0)
David S. Miller436c3b62011-03-24 17:42:21 -07001212 src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001213 else
David S. Millerf8126f12012-07-13 05:03:45 -07001214 src = inet_select_addr(rt->dst.dev,
1215 rt_nexthop(rt, iph->daddr),
1216 RT_SCOPE_UNIVERSE);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001217 rcu_read_unlock();
1218 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219 memcpy(addr, &src, 4);
1220}
1221
Patrick McHardyc7066f72011-01-14 13:36:42 +01001222#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001223static void set_class_tag(struct rtable *rt, u32 tag)
1224{
Changli Gaod8d1f302010-06-10 23:31:35 -07001225 if (!(rt->dst.tclassid & 0xFFFF))
1226 rt->dst.tclassid |= tag & 0xFFFF;
1227 if (!(rt->dst.tclassid & 0xFFFF0000))
1228 rt->dst.tclassid |= tag & 0xFFFF0000;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001229}
1230#endif
1231
David S. Miller0dbaee32010-12-13 12:52:14 -08001232static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1233{
1234 unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS);
1235
1236 if (advmss == 0) {
1237 advmss = max_t(unsigned int, dst->dev->mtu - 40,
1238 ip_rt_min_advmss);
1239 if (advmss > 65535 - 40)
1240 advmss = 65535 - 40;
1241 }
1242 return advmss;
1243}
1244
Steffen Klassertebb762f2011-11-23 02:12:51 +00001245static unsigned int ipv4_mtu(const struct dst_entry *dst)
David S. Millerd33e4552010-12-14 13:01:14 -08001246{
Steffen Klassert261663b2011-11-23 02:14:50 +00001247 const struct rtable *rt = (const struct rtable *) dst;
David S. Miller59436342012-07-10 06:58:42 -07001248 unsigned int mtu = rt->rt_pmtu;
1249
Alexander Duyck98d75c32012-08-27 06:30:01 +00001250 if (!mtu || time_after_eq(jiffies, rt->dst.expires))
David S. Miller59436342012-07-10 06:58:42 -07001251 mtu = dst_metric_raw(dst, RTAX_MTU);
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001252
Steffen Klassert38d523e2013-01-16 20:55:01 +00001253 if (mtu)
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001254 return mtu;
1255
1256 mtu = dst->dev->mtu;
David S. Millerd33e4552010-12-14 13:01:14 -08001257
1258 if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
Julian Anastasov155e8332012-10-08 11:41:18 +00001259 if (rt->rt_uses_gateway && mtu > 576)
David S. Millerd33e4552010-12-14 13:01:14 -08001260 mtu = 576;
1261 }
1262
Roopa Prabhu14972cb2016-08-24 20:10:43 -07001263 mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
1264
1265 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
David S. Millerd33e4552010-12-14 13:01:14 -08001266}
1267
David S. Millerf2bb4be2012-07-17 12:20:47 -07001268static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
David S. Miller4895c772012-07-17 04:19:00 -07001269{
Eric Dumazetcaa41522014-09-03 22:21:56 -07001270 struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions);
David S. Miller4895c772012-07-17 04:19:00 -07001271 struct fib_nh_exception *fnhe;
1272 u32 hval;
1273
David S. Millerf2bb4be2012-07-17 12:20:47 -07001274 if (!hash)
1275 return NULL;
1276
David S. Millerd3a25c92012-07-17 13:23:08 -07001277 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -07001278
1279 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1280 fnhe = rcu_dereference(fnhe->fnhe_next)) {
David S. Millerf2bb4be2012-07-17 12:20:47 -07001281 if (fnhe->fnhe_daddr == daddr)
1282 return fnhe;
1283 }
1284 return NULL;
1285}
David S. Miller4895c772012-07-17 04:19:00 -07001286
David S. Millercaacf052012-07-31 15:06:50 -07001287static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001288 __be32 daddr)
1289{
David S. Millercaacf052012-07-31 15:06:50 -07001290 bool ret = false;
1291
David S. Millerc5038a82012-07-31 15:02:02 -07001292 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +00001293
David S. Millerc5038a82012-07-31 15:02:02 -07001294 if (daddr == fnhe->fnhe_daddr) {
Timo Teräs2ffae992013-06-27 10:27:05 +03001295 struct rtable __rcu **porig;
1296 struct rtable *orig;
Timo Teräs5aad1de2013-05-27 20:46:33 +00001297 int genid = fnhe_genid(dev_net(rt->dst.dev));
Timo Teräs2ffae992013-06-27 10:27:05 +03001298
1299 if (rt_is_input_route(rt))
1300 porig = &fnhe->fnhe_rth_input;
1301 else
1302 porig = &fnhe->fnhe_rth_output;
1303 orig = rcu_dereference(*porig);
Timo Teräs5aad1de2013-05-27 20:46:33 +00001304
1305 if (fnhe->fnhe_genid != genid) {
1306 fnhe->fnhe_genid = genid;
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001307 fnhe->fnhe_gw = 0;
1308 fnhe->fnhe_pmtu = 0;
1309 fnhe->fnhe_expires = 0;
Timo Teräs2ffae992013-06-27 10:27:05 +03001310 fnhe_flush_routes(fnhe);
1311 orig = NULL;
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001312 }
Timo Teräs387aa652013-05-27 20:46:31 +00001313 fill_route_from_fnhe(rt, fnhe);
1314 if (!rt->rt_gateway)
Julian Anastasov155e8332012-10-08 11:41:18 +00001315 rt->rt_gateway = daddr;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001316
Timo Teräs2ffae992013-06-27 10:27:05 +03001317 if (!(rt->dst.flags & DST_NOCACHE)) {
1318 rcu_assign_pointer(*porig, rt);
1319 if (orig)
1320 rt_free(orig);
1321 ret = true;
1322 }
David S. Millerc5038a82012-07-31 15:02:02 -07001323
1324 fnhe->fnhe_stamp = jiffies;
David S. Millerc5038a82012-07-31 15:02:02 -07001325 }
1326 spin_unlock_bh(&fnhe_lock);
David S. Millercaacf052012-07-31 15:06:50 -07001327
1328 return ret;
Eric Dumazet54764bb2012-07-31 01:08:23 +00001329}
1330
David S. Millercaacf052012-07-31 15:06:50 -07001331static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
David S. Millerf2bb4be2012-07-17 12:20:47 -07001332{
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001333 struct rtable *orig, *prev, **p;
David S. Millercaacf052012-07-31 15:06:50 -07001334 bool ret = true;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001335
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001336 if (rt_is_input_route(rt)) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001337 p = (struct rtable **)&nh->nh_rth_input;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001338 } else {
Christoph Lameter903ceff2014-08-17 12:30:35 -05001339 p = (struct rtable **)raw_cpu_ptr(nh->nh_pcpu_rth_output);
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001340 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001341 orig = *p;
1342
1343 prev = cmpxchg(p, orig, rt);
1344 if (prev == orig) {
David S. Millerf2bb4be2012-07-17 12:20:47 -07001345 if (orig)
Eric Dumazet54764bb2012-07-31 01:08:23 +00001346 rt_free(orig);
Julian Anastasov155e8332012-10-08 11:41:18 +00001347 } else
David S. Millercaacf052012-07-31 15:06:50 -07001348 ret = false;
David S. Millercaacf052012-07-31 15:06:50 -07001349
1350 return ret;
1351}
1352
Eric Dumazet5055c372015-01-14 15:17:06 -08001353struct uncached_list {
1354 spinlock_t lock;
1355 struct list_head head;
1356};
1357
1358static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
David S. Millercaacf052012-07-31 15:06:50 -07001359
1360static void rt_add_uncached_list(struct rtable *rt)
1361{
Eric Dumazet5055c372015-01-14 15:17:06 -08001362 struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list);
1363
1364 rt->rt_uncached_list = ul;
1365
1366 spin_lock_bh(&ul->lock);
1367 list_add_tail(&rt->rt_uncached, &ul->head);
1368 spin_unlock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001369}
1370
1371static void ipv4_dst_destroy(struct dst_entry *dst)
1372{
1373 struct rtable *rt = (struct rtable *) dst;
1374
Eric Dumazet78df76a2012-08-24 05:40:47 +00001375 if (!list_empty(&rt->rt_uncached)) {
Eric Dumazet5055c372015-01-14 15:17:06 -08001376 struct uncached_list *ul = rt->rt_uncached_list;
1377
1378 spin_lock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001379 list_del(&rt->rt_uncached);
Eric Dumazet5055c372015-01-14 15:17:06 -08001380 spin_unlock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001381 }
1382}
1383
1384void rt_flush_dev(struct net_device *dev)
1385{
Eric Dumazet5055c372015-01-14 15:17:06 -08001386 struct net *net = dev_net(dev);
1387 struct rtable *rt;
1388 int cpu;
David S. Millercaacf052012-07-31 15:06:50 -07001389
Eric Dumazet5055c372015-01-14 15:17:06 -08001390 for_each_possible_cpu(cpu) {
1391 struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
1392
1393 spin_lock_bh(&ul->lock);
1394 list_for_each_entry(rt, &ul->head, rt_uncached) {
David S. Millercaacf052012-07-31 15:06:50 -07001395 if (rt->dst.dev != dev)
1396 continue;
1397 rt->dst.dev = net->loopback_dev;
1398 dev_hold(rt->dst.dev);
1399 dev_put(dev);
1400 }
Eric Dumazet5055c372015-01-14 15:17:06 -08001401 spin_unlock_bh(&ul->lock);
David S. Miller4895c772012-07-17 04:19:00 -07001402 }
1403}
1404
Eric Dumazet4331deb2012-07-25 05:11:23 +00001405static bool rt_cache_valid(const struct rtable *rt)
David S. Millerd2d68ba92012-07-17 12:58:50 -07001406{
Eric Dumazet4331deb2012-07-25 05:11:23 +00001407 return rt &&
1408 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1409 !rt_is_expired(rt);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001410}
1411
David S. Millerf2bb4be2012-07-17 12:20:47 -07001412static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
David S. Miller5e2b61f2011-03-04 21:47:09 -08001413 const struct fib_result *res,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001414 struct fib_nh_exception *fnhe,
David S. Miller982721f2011-02-16 21:44:24 -08001415 struct fib_info *fi, u16 type, u32 itag)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001416{
David S. Millercaacf052012-07-31 15:06:50 -07001417 bool cached = false;
1418
Linus Torvalds1da177e2005-04-16 15:20:36 -07001419 if (fi) {
David S. Miller4895c772012-07-17 04:19:00 -07001420 struct fib_nh *nh = &FIB_RES_NH(*res);
1421
Julian Anastasov155e8332012-10-08 11:41:18 +00001422 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) {
David S. Miller4895c772012-07-17 04:19:00 -07001423 rt->rt_gateway = nh->nh_gw;
Julian Anastasov155e8332012-10-08 11:41:18 +00001424 rt->rt_uses_gateway = 1;
1425 }
David S. Miller28605832012-07-17 14:55:59 -07001426 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
Patrick McHardyc7066f72011-01-14 13:36:42 +01001427#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf2bb4be2012-07-17 12:20:47 -07001428 rt->dst.tclassid = nh->nh_tclassid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001429#endif
Jiri Benc61adedf2015-08-20 13:56:25 +02001430 rt->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
David S. Millerc5038a82012-07-31 15:02:02 -07001431 if (unlikely(fnhe))
David S. Millercaacf052012-07-31 15:06:50 -07001432 cached = rt_bind_exception(rt, fnhe, daddr);
David S. Millerc5038a82012-07-31 15:02:02 -07001433 else if (!(rt->dst.flags & DST_NOCACHE))
David S. Millercaacf052012-07-31 15:06:50 -07001434 cached = rt_cache_route(nh, rt);
Julian Anastasov155e8332012-10-08 11:41:18 +00001435 if (unlikely(!cached)) {
1436 /* Routes we intend to cache in nexthop exception or
1437 * FIB nexthop have the DST_NOCACHE bit clear.
1438 * However, if we are unsuccessful at storing this
1439 * route into the cache we really need to set it.
1440 */
1441 rt->dst.flags |= DST_NOCACHE;
1442 if (!rt->rt_gateway)
1443 rt->rt_gateway = daddr;
1444 rt_add_uncached_list(rt);
1445 }
1446 } else
David S. Millercaacf052012-07-31 15:06:50 -07001447 rt_add_uncached_list(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001448
Patrick McHardyc7066f72011-01-14 13:36:42 +01001449#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001450#ifdef CONFIG_IP_MULTIPLE_TABLES
David S. Miller85b91b02012-07-13 08:21:29 -07001451 set_class_tag(rt, res->tclassid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001452#endif
1453 set_class_tag(rt, itag);
1454#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001455}
1456
David Ahern9ab179d2016-04-07 11:10:06 -07001457struct rtable *rt_dst_alloc(struct net_device *dev,
1458 unsigned int flags, u16 type,
1459 bool nopolicy, bool noxfrm, bool will_cache)
David S. Miller0c4dcd52011-02-17 15:42:37 -08001460{
David Ahernd08c4f32015-09-02 13:58:34 -07001461 struct rtable *rt;
1462
1463 rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
1464 (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
1465 (nopolicy ? DST_NOPOLICY : 0) |
1466 (noxfrm ? DST_NOXFRM : 0));
1467
1468 if (rt) {
1469 rt->rt_genid = rt_genid_ipv4(dev_net(dev));
1470 rt->rt_flags = flags;
1471 rt->rt_type = type;
1472 rt->rt_is_input = 0;
1473 rt->rt_iif = 0;
1474 rt->rt_pmtu = 0;
1475 rt->rt_gateway = 0;
1476 rt->rt_uses_gateway = 0;
David Ahernb7503e02015-09-02 13:58:35 -07001477 rt->rt_table_id = 0;
David Ahernd08c4f32015-09-02 13:58:34 -07001478 INIT_LIST_HEAD(&rt->rt_uncached);
1479
1480 rt->dst.output = ip_output;
1481 if (flags & RTCF_LOCAL)
1482 rt->dst.input = ip_local_deliver;
1483 }
1484
1485 return rt;
David S. Miller0c4dcd52011-02-17 15:42:37 -08001486}
David Ahern9ab179d2016-04-07 11:10:06 -07001487EXPORT_SYMBOL(rt_dst_alloc);
David S. Miller0c4dcd52011-02-17 15:42:37 -08001488
Eric Dumazet96d36222010-06-02 19:21:31 +00001489/* called in rcu_read_lock() section */
Al Viro9e12bb22006-09-26 21:25:20 -07001490static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001491 u8 tos, struct net_device *dev, int our)
1492{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001493 struct rtable *rth;
Eric Dumazet96d36222010-06-02 19:21:31 +00001494 struct in_device *in_dev = __in_dev_get_rcu(dev);
David Ahernd08c4f32015-09-02 13:58:34 -07001495 unsigned int flags = RTCF_MULTICAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001496 u32 itag = 0;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001497 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498
1499 /* Primary sanity checks. */
1500
Ian Morris51456b22015-04-03 09:17:26 +01001501 if (!in_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001502 return -EINVAL;
1503
Jan Engelhardt1e637c72008-01-21 03:18:08 -08001504 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
Thomas Grafd0daebc32012-06-12 00:44:01 +00001505 skb->protocol != htons(ETH_P_IP))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001506 goto e_inval;
1507
Alexander Duyck75fea732015-09-28 11:10:38 -07001508 if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
1509 goto e_inval;
Thomas Grafd0daebc32012-06-12 00:44:01 +00001510
Joe Perchesf97c1e02007-12-16 13:45:43 -08001511 if (ipv4_is_zeronet(saddr)) {
1512 if (!ipv4_is_local_multicast(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001513 goto e_inval;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001514 } else {
David S. Miller9e56e382012-06-28 18:54:02 -07001515 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1516 in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001517 if (err < 0)
1518 goto e_err;
1519 }
David Ahernd08c4f32015-09-02 13:58:34 -07001520 if (our)
1521 flags |= RTCF_LOCAL;
1522
1523 rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001524 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001525 if (!rth)
1526 goto e_nobufs;
1527
Patrick McHardyc7066f72011-01-14 13:36:42 +01001528#ifdef CONFIG_IP_ROUTE_CLASSID
Changli Gaod8d1f302010-06-10 23:31:35 -07001529 rth->dst.tclassid = itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001530#endif
David S. Millercf911662011-04-28 14:31:47 -07001531 rth->dst.output = ip_rt_bug;
David S. Miller9917e1e82012-07-17 14:44:26 -07001532 rth->rt_is_input= 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001533
1534#ifdef CONFIG_IP_MROUTE
Joe Perchesf97c1e02007-12-16 13:45:43 -08001535 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
Changli Gaod8d1f302010-06-10 23:31:35 -07001536 rth->dst.input = ip_mr_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001537#endif
1538 RT_CACHE_STAT_INC(in_slow_mc);
1539
David S. Miller89aef892012-07-17 11:00:09 -07001540 skb_dst_set(skb, &rth->dst);
1541 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001542
1543e_nobufs:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001544 return -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001545e_inval:
Eric Dumazet96d36222010-06-02 19:21:31 +00001546 return -EINVAL;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001547e_err:
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001548 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001549}
1550
1551
1552static void ip_handle_martian_source(struct net_device *dev,
1553 struct in_device *in_dev,
1554 struct sk_buff *skb,
Al Viro9e12bb22006-09-26 21:25:20 -07001555 __be32 daddr,
1556 __be32 saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001557{
1558 RT_CACHE_STAT_INC(in_martian_src);
1559#ifdef CONFIG_IP_ROUTE_VERBOSE
1560 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1561 /*
1562 * RFC1812 recommendation, if source is martian,
1563 * the only hint is MAC header.
1564 */
Joe Perches058bd4d2012-03-11 18:36:11 +00001565 pr_warn("martian source %pI4 from %pI4, on dev %s\n",
Harvey Harrison673d57e2008-10-31 00:53:57 -07001566 &daddr, &saddr, dev->name);
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001567 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
Joe Perches058bd4d2012-03-11 18:36:11 +00001568 print_hex_dump(KERN_WARNING, "ll header: ",
1569 DUMP_PREFIX_OFFSET, 16, 1,
1570 skb_mac_header(skb),
1571 dev->hard_header_len, true);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001572 }
1573 }
1574#endif
1575}
1576
Xin Longdeed49d2016-02-18 21:21:19 +08001577static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
1578{
1579 struct fnhe_hash_bucket *hash;
1580 struct fib_nh_exception *fnhe, __rcu **fnhe_p;
1581 u32 hval = fnhe_hashfun(daddr);
1582
1583 spin_lock_bh(&fnhe_lock);
1584
1585 hash = rcu_dereference_protected(nh->nh_exceptions,
1586 lockdep_is_held(&fnhe_lock));
1587 hash += hval;
1588
1589 fnhe_p = &hash->chain;
1590 fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
1591 while (fnhe) {
1592 if (fnhe->fnhe_daddr == daddr) {
1593 rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
1594 fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
1595 fnhe_flush_routes(fnhe);
1596 kfree_rcu(fnhe, rcu);
1597 break;
1598 }
1599 fnhe_p = &fnhe->fnhe_next;
1600 fnhe = rcu_dereference_protected(fnhe->fnhe_next,
1601 lockdep_is_held(&fnhe_lock));
1602 }
1603
1604 spin_unlock_bh(&fnhe_lock);
1605}
1606
Thomas Grafefd85702016-11-30 17:10:09 +01001607static void set_lwt_redirect(struct rtable *rth)
1608{
1609 if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
1610 rth->dst.lwtstate->orig_output = rth->dst.output;
1611 rth->dst.output = lwtunnel_output;
1612 }
1613
1614 if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
1615 rth->dst.lwtstate->orig_input = rth->dst.input;
1616 rth->dst.input = lwtunnel_input;
1617 }
1618}
1619
Eric Dumazet47360222010-06-03 04:13:21 +00001620/* called in rcu_read_lock() section */
Stephen Hemminger5969f712008-04-10 01:52:09 -07001621static int __mkroute_input(struct sk_buff *skb,
David S. Miller982721f2011-02-16 21:44:24 -08001622 const struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001623 struct in_device *in_dev,
David S. Millerc6cffba2012-07-26 11:14:38 +00001624 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001625{
Timo Teräs2ffae992013-06-27 10:27:05 +03001626 struct fib_nh_exception *fnhe;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001627 struct rtable *rth;
1628 int err;
1629 struct in_device *out_dev;
David S. Millerd2d68ba92012-07-17 12:58:50 -07001630 bool do_cache;
Li RongQingfbdc0ad2014-05-22 16:36:55 +08001631 u32 itag = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001632
1633 /* get a working reference to the output device */
Eric Dumazet47360222010-06-03 04:13:21 +00001634 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
Ian Morris51456b22015-04-03 09:17:26 +01001635 if (!out_dev) {
Joe Perchese87cc472012-05-13 21:56:26 +00001636 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001637 return -EINVAL;
1638 }
1639
Michael Smith5c04c812011-04-07 04:51:50 +00001640 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
David S. Miller9e56e382012-06-28 18:54:02 -07001641 in_dev->dev, in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001642 if (err < 0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001643 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001644 saddr);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001645
Linus Torvalds1da177e2005-04-16 15:20:36 -07001646 goto cleanup;
1647 }
1648
Julian Anastasove81da0e2012-10-08 11:41:15 +00001649 do_cache = res->fi && !itag;
1650 if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
Hannes Frederic Sowadf4d9252015-01-23 12:01:26 +01001651 skb->protocol == htons(ETH_P_IP) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001652 (IN_DEV_SHARED_MEDIA(out_dev) ||
Hannes Frederic Sowadf4d9252015-01-23 12:01:26 +01001653 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1654 IPCB(skb)->flags |= IPSKB_DOREDIRECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001655
1656 if (skb->protocol != htons(ETH_P_IP)) {
1657 /* Not IP (i.e. ARP). Do not create route, if it is
1658 * invalid for proxy arp. DNAT routes are always valid.
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001659 *
1660 * Proxy arp feature have been extended to allow, ARP
1661 * replies back to the same interface, to support
1662 * Private VLAN switch technologies. See arp.c.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001663 */
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001664 if (out_dev == in_dev &&
1665 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001666 err = -EINVAL;
1667 goto cleanup;
1668 }
1669 }
1670
Timo Teräs2ffae992013-06-27 10:27:05 +03001671 fnhe = find_exception(&FIB_RES_NH(*res), daddr);
Julian Anastasove81da0e2012-10-08 11:41:15 +00001672 if (do_cache) {
Xin Longdeed49d2016-02-18 21:21:19 +08001673 if (fnhe) {
Timo Teräs2ffae992013-06-27 10:27:05 +03001674 rth = rcu_dereference(fnhe->fnhe_rth_input);
Xin Longdeed49d2016-02-18 21:21:19 +08001675 if (rth && rth->dst.expires &&
1676 time_after(jiffies, rth->dst.expires)) {
1677 ip_del_fnhe(&FIB_RES_NH(*res), daddr);
1678 fnhe = NULL;
1679 } else {
1680 goto rt_cache;
1681 }
1682 }
Timo Teräs2ffae992013-06-27 10:27:05 +03001683
Xin Longdeed49d2016-02-18 21:21:19 +08001684 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1685
1686rt_cache:
Julian Anastasove81da0e2012-10-08 11:41:15 +00001687 if (rt_cache_valid(rth)) {
1688 skb_dst_set_noref(skb, &rth->dst);
1689 goto out;
David S. Millerd2d68ba92012-07-17 12:58:50 -07001690 }
1691 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001692
David Ahernd08c4f32015-09-02 13:58:34 -07001693 rth = rt_dst_alloc(out_dev->dev, 0, res->type,
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001694 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerd2d68ba92012-07-17 12:58:50 -07001695 IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001696 if (!rth) {
1697 err = -ENOBUFS;
1698 goto cleanup;
1699 }
1700
David S. Miller9917e1e82012-07-17 14:44:26 -07001701 rth->rt_is_input = 1;
David Ahernb7503e02015-09-02 13:58:35 -07001702 if (res->table)
1703 rth->rt_table_id = res->table->tb_id;
Duan Jionga6254862014-02-17 15:23:43 +08001704 RT_CACHE_STAT_INC(in_slow_tot);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001705
Changli Gaod8d1f302010-06-10 23:31:35 -07001706 rth->dst.input = ip_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001707
Timo Teräs2ffae992013-06-27 10:27:05 +03001708 rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag);
Thomas Grafefd85702016-11-30 17:10:09 +01001709 set_lwt_redirect(rth);
David S. Millerc6cffba2012-07-26 11:14:38 +00001710 skb_dst_set(skb, &rth->dst);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001711out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001712 err = 0;
1713 cleanup:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001714 return err;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001715}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001716
Peter Nørlund79a13152015-09-30 10:12:22 +02001717#ifdef CONFIG_IP_ROUTE_MULTIPATH
1718
1719/* To make ICMP packets follow the right flow, the multipath hash is
1720 * calculated from the inner IP addresses in reverse order.
1721 */
1722static int ip_multipath_icmp_hash(struct sk_buff *skb)
1723{
1724 const struct iphdr *outer_iph = ip_hdr(skb);
1725 struct icmphdr _icmph;
1726 const struct icmphdr *icmph;
1727 struct iphdr _inner_iph;
1728 const struct iphdr *inner_iph;
1729
1730 if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
1731 goto standard_hash;
1732
1733 icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
1734 &_icmph);
1735 if (!icmph)
1736 goto standard_hash;
1737
1738 if (icmph->type != ICMP_DEST_UNREACH &&
1739 icmph->type != ICMP_REDIRECT &&
1740 icmph->type != ICMP_TIME_EXCEEDED &&
1741 icmph->type != ICMP_PARAMETERPROB) {
1742 goto standard_hash;
1743 }
1744
1745 inner_iph = skb_header_pointer(skb,
1746 outer_iph->ihl * 4 + sizeof(_icmph),
1747 sizeof(_inner_iph), &_inner_iph);
1748 if (!inner_iph)
1749 goto standard_hash;
1750
1751 return fib_multipath_hash(inner_iph->daddr, inner_iph->saddr);
1752
1753standard_hash:
1754 return fib_multipath_hash(outer_iph->saddr, outer_iph->daddr);
1755}
1756
1757#endif /* CONFIG_IP_ROUTE_MULTIPATH */
1758
Stephen Hemminger5969f712008-04-10 01:52:09 -07001759static int ip_mkroute_input(struct sk_buff *skb,
1760 struct fib_result *res,
David S. Miller68a5e3d2011-03-11 20:07:33 -05001761 const struct flowi4 *fl4,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001762 struct in_device *in_dev,
1763 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001765#ifdef CONFIG_IP_ROUTE_MULTIPATH
Peter Nørlund0e884c72015-09-30 10:12:21 +02001766 if (res->fi && res->fi->fib_nhs > 1) {
1767 int h;
1768
Peter Nørlund79a13152015-09-30 10:12:22 +02001769 if (unlikely(ip_hdr(skb)->protocol == IPPROTO_ICMP))
1770 h = ip_multipath_icmp_hash(skb);
1771 else
1772 h = fib_multipath_hash(saddr, daddr);
Peter Nørlund0e884c72015-09-30 10:12:21 +02001773 fib_select_multipath(res, h);
1774 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001775#endif
1776
1777 /* create a routing cache entry */
David S. Millerc6cffba2012-07-26 11:14:38 +00001778 return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001779}
1780
Linus Torvalds1da177e2005-04-16 15:20:36 -07001781/*
1782 * NOTE. We drop all the packets that has local source
1783 * addresses, because every properly looped back packet
1784 * must have correct destination already attached by output routine.
1785 *
1786 * Such approach solves two big problems:
1787 * 1. Not simplex devices are handled properly.
1788 * 2. IP spoofing attempts are filtered with 100% of guarantee.
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001789 * called with rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001790 */
1791
Al Viro9e12bb22006-09-26 21:25:20 -07001792static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
David S. Millerc10237e2012-06-27 17:05:06 -07001793 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001794{
1795 struct fib_result res;
Eric Dumazet96d36222010-06-02 19:21:31 +00001796 struct in_device *in_dev = __in_dev_get_rcu(dev);
Thomas Graf1b7179d2015-07-21 10:43:59 +02001797 struct ip_tunnel_info *tun_info;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001798 struct flowi4 fl4;
Eric Dumazet95c96172012-04-15 05:58:06 +00001799 unsigned int flags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001800 u32 itag = 0;
Eric Dumazet95c96172012-04-15 05:58:06 +00001801 struct rtable *rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802 int err = -EINVAL;
Daniel Baluta5e73ea12012-04-15 01:34:41 +00001803 struct net *net = dev_net(dev);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001804 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001805
1806 /* IP on this device is disabled. */
1807
1808 if (!in_dev)
1809 goto out;
1810
1811 /* Check for the most weird martians, which can be not detected
1812 by fib_lookup.
1813 */
1814
Jiri Benc61adedf2015-08-20 13:56:25 +02001815 tun_info = skb_tunnel_info(skb);
Jiri Benc46fa0622015-08-28 20:48:19 +02001816 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
Thomas Graf1b7179d2015-07-21 10:43:59 +02001817 fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id;
1818 else
1819 fl4.flowi4_tun_key.tun_id = 0;
Thomas Graff38a9eb2015-07-21 10:43:56 +02001820 skb_dst_drop(skb);
1821
Thomas Grafd0daebc32012-06-12 00:44:01 +00001822 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001823 goto martian_source;
1824
David S. Millerd2d68ba92012-07-17 12:58:50 -07001825 res.fi = NULL;
David Ahernbde6f9d2015-09-16 10:16:39 -06001826 res.table = NULL;
Andy Walls27a954b2010-10-17 15:11:22 +00001827 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001828 goto brd_input;
1829
1830 /* Accept zero addresses only to limited broadcast;
1831 * I even do not know to fix it or not. Waiting for complains :-)
1832 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001833 if (ipv4_is_zeronet(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001834 goto martian_source;
1835
Thomas Grafd0daebc32012-06-12 00:44:01 +00001836 if (ipv4_is_zeronet(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001837 goto martian_destination;
1838
Eric Dumazet9eb43e72012-08-03 21:27:25 +00001839 /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
1840 * and call it once if daddr or/and saddr are loopback addresses
1841 */
1842 if (ipv4_is_loopback(daddr)) {
1843 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00001844 goto martian_destination;
Eric Dumazet9eb43e72012-08-03 21:27:25 +00001845 } else if (ipv4_is_loopback(saddr)) {
1846 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00001847 goto martian_source;
1848 }
1849
Linus Torvalds1da177e2005-04-16 15:20:36 -07001850 /*
1851 * Now we are ready to route packet.
1852 */
David S. Miller68a5e3d2011-03-11 20:07:33 -05001853 fl4.flowi4_oif = 0;
David Aherne0d56fd2016-09-10 12:09:57 -07001854 fl4.flowi4_iif = dev->ifindex;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001855 fl4.flowi4_mark = skb->mark;
1856 fl4.flowi4_tos = tos;
1857 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
David Ahernb84f7872015-09-29 19:07:07 -07001858 fl4.flowi4_flags = 0;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001859 fl4.daddr = daddr;
1860 fl4.saddr = saddr;
Andy Gospodarek0eeb0752015-06-23 13:45:37 -04001861 err = fib_lookup(net, &fl4, &res, 0);
Duan Jiongcd0f0b92014-02-14 18:26:22 +08001862 if (err != 0) {
1863 if (!IN_DEV_FORWARD(in_dev))
1864 err = -EHOSTUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001865 goto no_route;
Duan Jiongcd0f0b92014-02-14 18:26:22 +08001866 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001867
1868 if (res.type == RTN_BROADCAST)
1869 goto brd_input;
1870
1871 if (res.type == RTN_LOCAL) {
Michael Smith5c04c812011-04-07 04:51:50 +00001872 err = fib_validate_source(skb, saddr, daddr, tos,
Cong Wang0d5edc62014-04-15 16:25:35 -07001873 0, dev, in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001874 if (err < 0)
David Ahern0d753962015-09-28 11:10:44 -07001875 goto martian_source;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001876 goto local_input;
1877 }
1878
Duan Jiongcd0f0b92014-02-14 18:26:22 +08001879 if (!IN_DEV_FORWARD(in_dev)) {
1880 err = -EHOSTUNREACH;
David S. Miller251da412012-06-26 16:27:09 -07001881 goto no_route;
Duan Jiongcd0f0b92014-02-14 18:26:22 +08001882 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001883 if (res.type != RTN_UNICAST)
1884 goto martian_destination;
1885
David S. Miller68a5e3d2011-03-11 20:07:33 -05001886 err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001887out: return err;
1888
1889brd_input:
1890 if (skb->protocol != htons(ETH_P_IP))
1891 goto e_inval;
1892
David S. Miller41347dc2012-06-28 04:05:27 -07001893 if (!ipv4_is_zeronet(saddr)) {
David S. Miller9e56e382012-06-28 18:54:02 -07001894 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1895 in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001896 if (err < 0)
David Ahern0d753962015-09-28 11:10:44 -07001897 goto martian_source;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001898 }
1899 flags |= RTCF_BROADCAST;
1900 res.type = RTN_BROADCAST;
1901 RT_CACHE_STAT_INC(in_brd);
1902
1903local_input:
David S. Millerd2d68ba92012-07-17 12:58:50 -07001904 do_cache = false;
1905 if (res.fi) {
David S. Millerfe3edf42012-07-23 13:22:20 -07001906 if (!itag) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001907 rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001908 if (rt_cache_valid(rth)) {
David S. Millerc6cffba2012-07-26 11:14:38 +00001909 skb_dst_set_noref(skb, &rth->dst);
1910 err = 0;
1911 goto out;
David S. Millerd2d68ba92012-07-17 12:58:50 -07001912 }
1913 do_cache = true;
1914 }
1915 }
1916
David Ahernf5a0aab2016-12-29 15:29:03 -08001917 rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
1918 flags | RTCF_LOCAL, res.type,
David S. Millerd2d68ba92012-07-17 12:58:50 -07001919 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001920 if (!rth)
1921 goto e_nobufs;
1922
Changli Gaod8d1f302010-06-10 23:31:35 -07001923 rth->dst.output= ip_rt_bug;
David S. Millercf911662011-04-28 14:31:47 -07001924#ifdef CONFIG_IP_ROUTE_CLASSID
1925 rth->dst.tclassid = itag;
1926#endif
David S. Miller9917e1e82012-07-17 14:44:26 -07001927 rth->rt_is_input = 1;
David Ahernb7503e02015-09-02 13:58:35 -07001928 if (res.table)
1929 rth->rt_table_id = res.table->tb_id;
Roopa Prabhu571e7222015-07-21 10:43:47 +02001930
Duan Jionga6254862014-02-17 15:23:43 +08001931 RT_CACHE_STAT_INC(in_slow_tot);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001932 if (res.type == RTN_UNREACHABLE) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001933 rth->dst.input= ip_error;
1934 rth->dst.error= -err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001935 rth->rt_flags &= ~RTCF_LOCAL;
1936 }
Thomas Grafefd85702016-11-30 17:10:09 +01001937
Alexei Starovoitovdcdfdf52013-11-19 19:12:34 -08001938 if (do_cache) {
Thomas Grafefd85702016-11-30 17:10:09 +01001939 struct fib_nh *nh = &FIB_RES_NH(res);
1940
1941 rth->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
1942 if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
1943 WARN_ON(rth->dst.input == lwtunnel_input);
1944 rth->dst.lwtstate->orig_input = rth->dst.input;
1945 rth->dst.input = lwtunnel_input;
1946 }
1947
1948 if (unlikely(!rt_cache_route(nh, rth))) {
Alexei Starovoitovdcdfdf52013-11-19 19:12:34 -08001949 rth->dst.flags |= DST_NOCACHE;
1950 rt_add_uncached_list(rth);
1951 }
1952 }
David S. Miller89aef892012-07-17 11:00:09 -07001953 skb_dst_set(skb, &rth->dst);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001954 err = 0;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001955 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001956
1957no_route:
1958 RT_CACHE_STAT_INC(in_no_route);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001959 res.type = RTN_UNREACHABLE;
Nicolas Cavallarifa19c2b02014-10-30 10:09:53 +01001960 res.fi = NULL;
David Ahernbde6f9d2015-09-16 10:16:39 -06001961 res.table = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001962 goto local_input;
1963
1964 /*
1965 * Do not cache martian addresses: they should be logged (RFC1812)
1966 */
1967martian_destination:
1968 RT_CACHE_STAT_INC(in_martian_dst);
1969#ifdef CONFIG_IP_ROUTE_VERBOSE
Joe Perchese87cc472012-05-13 21:56:26 +00001970 if (IN_DEV_LOG_MARTIANS(in_dev))
1971 net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
1972 &daddr, &saddr, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001973#endif
Dietmar Eggemann2c2910a2005-06-28 13:06:23 -07001974
Linus Torvalds1da177e2005-04-16 15:20:36 -07001975e_inval:
1976 err = -EINVAL;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001977 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001978
1979e_nobufs:
1980 err = -ENOBUFS;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001981 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001982
1983martian_source:
1984 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001985 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001986}
1987
David S. Millerc6cffba2012-07-26 11:14:38 +00001988int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1989 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001990{
Eric Dumazet96d36222010-06-02 19:21:31 +00001991 int res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001992
Eric Dumazet96d36222010-06-02 19:21:31 +00001993 rcu_read_lock();
1994
Linus Torvalds1da177e2005-04-16 15:20:36 -07001995 /* Multicast recognition logic is moved from route cache to here.
1996 The problem was that too many Ethernet cards have broken/missing
1997 hardware multicast filters :-( As result the host on multicasting
1998 network acquires a lot of useless route cache entries, sort of
1999 SDR messages from all the world. Now we try to get rid of them.
2000 Really, provided software IP multicast filter is organized
2001 reasonably (at least, hashed), it does not result in a slowdown
2002 comparing with route cache reject entries.
2003 Note, that multicast routers are not affected, because
2004 route cache entry is created eventually.
2005 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08002006 if (ipv4_is_multicast(daddr)) {
Eric Dumazet96d36222010-06-02 19:21:31 +00002007 struct in_device *in_dev = __in_dev_get_rcu(dev);
David Aherne58e4152016-10-31 15:54:00 -07002008 int our = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002009
David Aherne58e4152016-10-31 15:54:00 -07002010 if (in_dev)
2011 our = ip_check_mc_rcu(in_dev, daddr, saddr,
2012 ip_hdr(skb)->protocol);
2013
2014 /* check l3 master if no match yet */
2015 if ((!in_dev || !our) && netif_is_l3_slave(dev)) {
2016 struct in_device *l3_in_dev;
2017
2018 l3_in_dev = __in_dev_get_rcu(skb->dev);
2019 if (l3_in_dev)
2020 our = ip_check_mc_rcu(l3_in_dev, daddr, saddr,
2021 ip_hdr(skb)->protocol);
2022 }
2023
2024 res = -EINVAL;
2025 if (our
Linus Torvalds1da177e2005-04-16 15:20:36 -07002026#ifdef CONFIG_IP_MROUTE
David Aherne58e4152016-10-31 15:54:00 -07002027 ||
2028 (!ipv4_is_local_multicast(daddr) &&
2029 IN_DEV_MFORWARD(in_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002030#endif
David Aherne58e4152016-10-31 15:54:00 -07002031 ) {
2032 res = ip_route_input_mc(skb, daddr, saddr,
2033 tos, dev, our);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002034 }
2035 rcu_read_unlock();
David Aherne58e4152016-10-31 15:54:00 -07002036 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002037 }
David S. Millerc10237e2012-06-27 17:05:06 -07002038 res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
Eric Dumazet96d36222010-06-02 19:21:31 +00002039 rcu_read_unlock();
2040 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002041}
David S. Millerc6cffba2012-07-26 11:14:38 +00002042EXPORT_SYMBOL(ip_route_input_noref);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002043
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002044/* called with rcu_read_lock() */
David S. Miller982721f2011-02-16 21:44:24 -08002045static struct rtable *__mkroute_output(const struct fib_result *res,
David Miller1a00fee2012-07-01 02:02:56 +00002046 const struct flowi4 *fl4, int orig_oif,
Julian Anastasovf61759e2011-12-02 11:39:42 +00002047 struct net_device *dev_out,
David S. Miller5ada5522011-02-17 15:29:00 -08002048 unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002049{
David S. Miller982721f2011-02-16 21:44:24 -08002050 struct fib_info *fi = res->fi;
David S. Millerf2bb4be2012-07-17 12:20:47 -07002051 struct fib_nh_exception *fnhe;
David S. Miller5ada5522011-02-17 15:29:00 -08002052 struct in_device *in_dev;
David S. Miller982721f2011-02-16 21:44:24 -08002053 u16 type = res->type;
David S. Miller5ada5522011-02-17 15:29:00 -08002054 struct rtable *rth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00002055 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002056
Thomas Grafd0daebc32012-06-12 00:44:01 +00002057 in_dev = __in_dev_get_rcu(dev_out);
2058 if (!in_dev)
David S. Miller5ada5522011-02-17 15:29:00 -08002059 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002060
Thomas Grafd0daebc32012-06-12 00:44:01 +00002061 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
David Ahern5f02ce242016-09-10 12:09:54 -07002062 if (ipv4_is_loopback(fl4->saddr) &&
2063 !(dev_out->flags & IFF_LOOPBACK) &&
2064 !netif_is_l3_master(dev_out))
Thomas Grafd0daebc32012-06-12 00:44:01 +00002065 return ERR_PTR(-EINVAL);
2066
David S. Miller68a5e3d2011-03-11 20:07:33 -05002067 if (ipv4_is_lbcast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08002068 type = RTN_BROADCAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002069 else if (ipv4_is_multicast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08002070 type = RTN_MULTICAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002071 else if (ipv4_is_zeronet(fl4->daddr))
David S. Miller5ada5522011-02-17 15:29:00 -08002072 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002073
2074 if (dev_out->flags & IFF_LOOPBACK)
2075 flags |= RTCF_LOCAL;
2076
Julian Anastasov63617422012-11-22 23:04:14 +02002077 do_cache = true;
David S. Miller982721f2011-02-16 21:44:24 -08002078 if (type == RTN_BROADCAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002079 flags |= RTCF_BROADCAST | RTCF_LOCAL;
David S. Miller982721f2011-02-16 21:44:24 -08002080 fi = NULL;
2081 } else if (type == RTN_MULTICAST) {
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00002082 flags |= RTCF_MULTICAST | RTCF_LOCAL;
David S. Miller813b3b52011-04-28 14:48:42 -07002083 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
2084 fl4->flowi4_proto))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002085 flags &= ~RTCF_LOCAL;
Julian Anastasov63617422012-11-22 23:04:14 +02002086 else
2087 do_cache = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002088 /* If multicast route do not exist use
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00002089 * default one, but do not gateway in this case.
2090 * Yes, it is hack.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002091 */
David S. Miller982721f2011-02-16 21:44:24 -08002092 if (fi && res->prefixlen < 4)
2093 fi = NULL;
Chris Friesend6d5e992016-04-08 15:21:30 -06002094 } else if ((type == RTN_LOCAL) && (orig_oif != 0) &&
2095 (orig_oif != dev_out->ifindex)) {
2096 /* For local routes that require a particular output interface
2097 * we do not want to cache the result. Caching the result
2098 * causes incorrect behaviour when there are multiple source
2099 * addresses on the interface, the end result being that if the
2100 * intended recipient is waiting on that interface for the
2101 * packet he won't receive it because it will be delivered on
2102 * the loopback interface and the IP_PKTINFO ipi_ifindex will
2103 * be set to the loopback interface as well.
2104 */
2105 fi = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002106 }
2107
David S. Millerf2bb4be2012-07-17 12:20:47 -07002108 fnhe = NULL;
Julian Anastasov63617422012-11-22 23:04:14 +02002109 do_cache &= fi != NULL;
2110 if (do_cache) {
David S. Millerc5038a82012-07-31 15:02:02 -07002111 struct rtable __rcu **prth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00002112 struct fib_nh *nh = &FIB_RES_NH(*res);
Eric Dumazetd26b3a72012-07-31 05:45:30 +00002113
Julian Anastasovc92b9652012-10-08 11:41:19 +00002114 fnhe = find_exception(nh, fl4->daddr);
Xin Longdeed49d2016-02-18 21:21:19 +08002115 if (fnhe) {
Timo Teräs2ffae992013-06-27 10:27:05 +03002116 prth = &fnhe->fnhe_rth_output;
Xin Longdeed49d2016-02-18 21:21:19 +08002117 rth = rcu_dereference(*prth);
2118 if (rth && rth->dst.expires &&
2119 time_after(jiffies, rth->dst.expires)) {
2120 ip_del_fnhe(nh, fl4->daddr);
2121 fnhe = NULL;
2122 } else {
2123 goto rt_cache;
Julian Anastasovc92b9652012-10-08 11:41:19 +00002124 }
Julian Anastasovc92b9652012-10-08 11:41:19 +00002125 }
Xin Longdeed49d2016-02-18 21:21:19 +08002126
2127 if (unlikely(fl4->flowi4_flags &
2128 FLOWI_FLAG_KNOWN_NH &&
2129 !(nh->nh_gw &&
2130 nh->nh_scope == RT_SCOPE_LINK))) {
2131 do_cache = false;
2132 goto add;
2133 }
2134 prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
David S. Millerc5038a82012-07-31 15:02:02 -07002135 rth = rcu_dereference(*prth);
Xin Longdeed49d2016-02-18 21:21:19 +08002136
2137rt_cache:
David S. Millerc5038a82012-07-31 15:02:02 -07002138 if (rt_cache_valid(rth)) {
2139 dst_hold(&rth->dst);
2140 return rth;
David S. Millerf2bb4be2012-07-17 12:20:47 -07002141 }
2142 }
Julian Anastasovc92b9652012-10-08 11:41:19 +00002143
2144add:
David Ahernd08c4f32015-09-02 13:58:34 -07002145 rth = rt_dst_alloc(dev_out, flags, type,
David S. Miller5c1e6aa2011-04-28 14:13:38 -07002146 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerf2bb4be2012-07-17 12:20:47 -07002147 IN_DEV_CONF_GET(in_dev, NOXFRM),
Julian Anastasovc92b9652012-10-08 11:41:19 +00002148 do_cache);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00002149 if (!rth)
David S. Miller5ada5522011-02-17 15:29:00 -08002150 return ERR_PTR(-ENOBUFS);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00002151
David S. Miller13378ca2012-07-23 13:57:45 -07002152 rth->rt_iif = orig_oif ? : 0;
David Ahernb7503e02015-09-02 13:58:35 -07002153 if (res->table)
2154 rth->rt_table_id = res->table->tb_id;
2155
Linus Torvalds1da177e2005-04-16 15:20:36 -07002156 RT_CACHE_STAT_INC(out_slow_tot);
2157
Linus Torvalds1da177e2005-04-16 15:20:36 -07002158 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002159 if (flags & RTCF_LOCAL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07002160 !(dev_out->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002161 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002162 RT_CACHE_STAT_INC(out_slow_mc);
2163 }
2164#ifdef CONFIG_IP_MROUTE
David S. Miller982721f2011-02-16 21:44:24 -08002165 if (type == RTN_MULTICAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002166 if (IN_DEV_MFORWARD(in_dev) &&
David S. Miller813b3b52011-04-28 14:48:42 -07002167 !ipv4_is_local_multicast(fl4->daddr)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002168 rth->dst.input = ip_mr_input;
2169 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002170 }
2171 }
2172#endif
2173 }
2174
David S. Millerf2bb4be2012-07-17 12:20:47 -07002175 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
Thomas Grafefd85702016-11-30 17:10:09 +01002176 set_lwt_redirect(rth);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002177
David S. Miller5ada5522011-02-17 15:29:00 -08002178 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002179}
2180
Linus Torvalds1da177e2005-04-16 15:20:36 -07002181/*
2182 * Major route resolver routine.
2183 */
2184
Peter Nørlund79a13152015-09-30 10:12:22 +02002185struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
2186 int mp_hash)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002187{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002188 struct net_device *dev_out = NULL;
Julian Anastasovf61759e2011-12-02 11:39:42 +00002189 __u8 tos = RT_FL_TOS(fl4);
David S. Miller813b3b52011-04-28 14:48:42 -07002190 unsigned int flags = 0;
2191 struct fib_result res;
David S. Miller5ada5522011-02-17 15:29:00 -08002192 struct rtable *rth;
David S. Miller813b3b52011-04-28 14:48:42 -07002193 int orig_oif;
Nikola Forró0315e382015-09-17 16:01:32 +02002194 int err = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002195
David S. Miller85b91b02012-07-13 08:21:29 -07002196 res.tclassid = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002197 res.fi = NULL;
David S. Miller8b96d222012-06-11 02:01:56 -07002198 res.table = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002199
David S. Miller813b3b52011-04-28 14:48:42 -07002200 orig_oif = fl4->flowi4_oif;
2201
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002202 fl4->flowi4_iif = LOOPBACK_IFINDEX;
David S. Miller813b3b52011-04-28 14:48:42 -07002203 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
2204 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
2205 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
David S. Miller44713b62011-03-04 21:24:47 -08002206
David S. Miller010c2702011-02-17 15:37:09 -08002207 rcu_read_lock();
David S. Miller813b3b52011-04-28 14:48:42 -07002208 if (fl4->saddr) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08002209 rth = ERR_PTR(-EINVAL);
David S. Miller813b3b52011-04-28 14:48:42 -07002210 if (ipv4_is_multicast(fl4->saddr) ||
2211 ipv4_is_lbcast(fl4->saddr) ||
2212 ipv4_is_zeronet(fl4->saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002213 goto out;
2214
Linus Torvalds1da177e2005-04-16 15:20:36 -07002215 /* I removed check for oif == dev_out->oif here.
2216 It was wrong for two reasons:
Denis V. Lunev1ab35272008-01-22 22:04:30 -08002217 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
2218 is assigned to multiple interfaces.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002219 2. Moreover, we are allowed to send packets with saddr
2220 of another iface. --ANK
2221 */
2222
David S. Miller813b3b52011-04-28 14:48:42 -07002223 if (fl4->flowi4_oif == 0 &&
2224 (ipv4_is_multicast(fl4->daddr) ||
2225 ipv4_is_lbcast(fl4->daddr))) {
Julian Anastasova210d012008-10-01 07:28:28 -07002226 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07002227 dev_out = __ip_dev_find(net, fl4->saddr, false);
Ian Morris51456b22015-04-03 09:17:26 +01002228 if (!dev_out)
Julian Anastasova210d012008-10-01 07:28:28 -07002229 goto out;
2230
Linus Torvalds1da177e2005-04-16 15:20:36 -07002231 /* Special hack: user can direct multicasts
2232 and limited broadcast via necessary interface
2233 without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
2234 This hack is not just for fun, it allows
2235 vic,vat and friends to work.
2236 They bind socket to loopback, set ttl to zero
2237 and expect that it will work.
2238 From the viewpoint of routing cache they are broken,
2239 because we are not allowed to build multicast path
2240 with loopback source addr (look, routing cache
2241 cannot know, that ttl is zero, so that packet
2242 will not leave this host and route is valid).
2243 Luckily, this hack is good workaround.
2244 */
2245
David S. Miller813b3b52011-04-28 14:48:42 -07002246 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002247 goto make_route;
2248 }
Julian Anastasova210d012008-10-01 07:28:28 -07002249
David S. Miller813b3b52011-04-28 14:48:42 -07002250 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
Julian Anastasova210d012008-10-01 07:28:28 -07002251 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07002252 if (!__ip_dev_find(net, fl4->saddr, false))
Julian Anastasova210d012008-10-01 07:28:28 -07002253 goto out;
Julian Anastasova210d012008-10-01 07:28:28 -07002254 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002255 }
2256
2257
David S. Miller813b3b52011-04-28 14:48:42 -07002258 if (fl4->flowi4_oif) {
2259 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002260 rth = ERR_PTR(-ENODEV);
Ian Morris51456b22015-04-03 09:17:26 +01002261 if (!dev_out)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002262 goto out;
Herbert Xue5ed6392005-10-03 14:35:55 -07002263
2264 /* RACE: Check return value of inet_select_addr instead. */
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002265 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08002266 rth = ERR_PTR(-ENETUNREACH);
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002267 goto out;
2268 }
David S. Miller813b3b52011-04-28 14:48:42 -07002269 if (ipv4_is_local_multicast(fl4->daddr) ||
Andrew Lunn6a211652015-05-01 16:39:54 +02002270 ipv4_is_lbcast(fl4->daddr) ||
2271 fl4->flowi4_proto == IPPROTO_IGMP) {
David S. Miller813b3b52011-04-28 14:48:42 -07002272 if (!fl4->saddr)
2273 fl4->saddr = inet_select_addr(dev_out, 0,
2274 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002275 goto make_route;
2276 }
Jiri Benc0a7e2262013-10-04 17:04:48 +02002277 if (!fl4->saddr) {
David S. Miller813b3b52011-04-28 14:48:42 -07002278 if (ipv4_is_multicast(fl4->daddr))
2279 fl4->saddr = inet_select_addr(dev_out, 0,
2280 fl4->flowi4_scope);
2281 else if (!fl4->daddr)
2282 fl4->saddr = inet_select_addr(dev_out, 0,
2283 RT_SCOPE_HOST);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002284 }
2285 }
2286
David S. Miller813b3b52011-04-28 14:48:42 -07002287 if (!fl4->daddr) {
2288 fl4->daddr = fl4->saddr;
2289 if (!fl4->daddr)
2290 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
Denis V. Lunevb40afd02008-01-22 22:06:19 -08002291 dev_out = net->loopback_dev;
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002292 fl4->flowi4_oif = LOOPBACK_IFINDEX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002293 res.type = RTN_LOCAL;
2294 flags |= RTCF_LOCAL;
2295 goto make_route;
2296 }
2297
Nikola Forró0315e382015-09-17 16:01:32 +02002298 err = fib_lookup(net, fl4, &res, 0);
2299 if (err) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002300 res.fi = NULL;
David S. Miller8b96d222012-06-11 02:01:56 -07002301 res.table = NULL;
David Ahern6104e112016-10-12 13:20:11 -07002302 if (fl4->flowi4_oif &&
David Aherne58e4152016-10-31 15:54:00 -07002303 (ipv4_is_multicast(fl4->daddr) ||
2304 !netif_index_is_l3_master(net, fl4->flowi4_oif))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002305 /* Apparently, routing tables are wrong. Assume,
2306 that the destination is on link.
2307
2308 WHY? DW.
2309 Because we are allowed to send to iface
2310 even if it has NO routes and NO assigned
2311 addresses. When oif is specified, routing
2312 tables are looked up with only one purpose:
2313 to catch if destination is gatewayed, rather than
2314 direct. Moreover, if MSG_DONTROUTE is set,
2315 we send packet, ignoring both routing tables
2316 and ifaddr state. --ANK
2317
2318
2319 We could make it even if oif is unknown,
2320 likely IPv6, but we do not.
2321 */
2322
David S. Miller813b3b52011-04-28 14:48:42 -07002323 if (fl4->saddr == 0)
2324 fl4->saddr = inet_select_addr(dev_out, 0,
2325 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002326 res.type = RTN_UNICAST;
2327 goto make_route;
2328 }
Nikola Forró0315e382015-09-17 16:01:32 +02002329 rth = ERR_PTR(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002330 goto out;
2331 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002332
2333 if (res.type == RTN_LOCAL) {
David S. Miller813b3b52011-04-28 14:48:42 -07002334 if (!fl4->saddr) {
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002335 if (res.fi->fib_prefsrc)
David S. Miller813b3b52011-04-28 14:48:42 -07002336 fl4->saddr = res.fi->fib_prefsrc;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002337 else
David S. Miller813b3b52011-04-28 14:48:42 -07002338 fl4->saddr = fl4->daddr;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002339 }
David Ahern5f02ce242016-09-10 12:09:54 -07002340
2341 /* L3 master device is the loopback for that domain */
2342 dev_out = l3mdev_master_dev_rcu(dev_out) ? : net->loopback_dev;
David S. Miller813b3b52011-04-28 14:48:42 -07002343 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002344 flags |= RTCF_LOCAL;
2345 goto make_route;
2346 }
2347
David Ahern3ce58d82015-10-05 08:51:25 -07002348 fib_select_path(net, &res, fl4, mp_hash);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002349
Linus Torvalds1da177e2005-04-16 15:20:36 -07002350 dev_out = FIB_RES_DEV(res);
David S. Miller813b3b52011-04-28 14:48:42 -07002351 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002352
2353
2354make_route:
David Miller1a00fee2012-07-01 02:02:56 +00002355 rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002356
David S. Miller010c2702011-02-17 15:37:09 -08002357out:
2358 rcu_read_unlock();
David S. Millerb23dd4f2011-03-02 14:31:35 -08002359 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002360}
Peter Nørlund79a13152015-09-30 10:12:22 +02002361EXPORT_SYMBOL_GPL(__ip_route_output_key_hash);
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002362
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002363static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2364{
2365 return NULL;
2366}
2367
Steffen Klassertebb762f2011-11-23 02:12:51 +00002368static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
Roland Dreierec831ea2011-01-31 13:16:00 -08002369{
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002370 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2371
2372 return mtu ? : dst->dev->mtu;
Roland Dreierec831ea2011-01-31 13:16:00 -08002373}
2374
David S. Miller6700c272012-07-17 03:29:28 -07002375static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2376 struct sk_buff *skb, u32 mtu)
David S. Miller14e50e52007-05-24 18:17:54 -07002377{
2378}
2379
David S. Miller6700c272012-07-17 03:29:28 -07002380static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2381 struct sk_buff *skb)
David S. Millerb587ee32012-07-12 00:39:24 -07002382{
2383}
2384
Held Bernhard0972ddb2011-04-24 22:07:32 +00002385static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2386 unsigned long old)
2387{
2388 return NULL;
2389}
2390
David S. Miller14e50e52007-05-24 18:17:54 -07002391static struct dst_ops ipv4_dst_blackhole_ops = {
2392 .family = AF_INET,
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002393 .check = ipv4_blackhole_dst_check,
Steffen Klassertebb762f2011-11-23 02:12:51 +00002394 .mtu = ipv4_blackhole_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -08002395 .default_advmss = ipv4_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -07002396 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
David S. Millerb587ee32012-07-12 00:39:24 -07002397 .redirect = ipv4_rt_blackhole_redirect,
Held Bernhard0972ddb2011-04-24 22:07:32 +00002398 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -07002399 .neigh_lookup = ipv4_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -07002400};
2401
David S. Miller2774c132011-03-01 14:59:04 -08002402struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -07002403{
David S. Miller2774c132011-03-01 14:59:04 -08002404 struct rtable *ort = (struct rtable *) dst_orig;
David S. Millerf5b0a872012-07-19 12:31:33 -07002405 struct rtable *rt;
David S. Miller14e50e52007-05-24 18:17:54 -07002406
David S. Millerf5b0a872012-07-19 12:31:33 -07002407 rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
David S. Miller14e50e52007-05-24 18:17:54 -07002408 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002409 struct dst_entry *new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -07002410
David S. Miller14e50e52007-05-24 18:17:54 -07002411 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -08002412 new->input = dst_discard;
Eric W. Biedermanede20592015-10-07 16:48:47 -05002413 new->output = dst_discard_out;
David S. Miller14e50e52007-05-24 18:17:54 -07002414
Changli Gaod8d1f302010-06-10 23:31:35 -07002415 new->dev = ort->dst.dev;
David S. Miller14e50e52007-05-24 18:17:54 -07002416 if (new->dev)
2417 dev_hold(new->dev);
2418
David S. Miller9917e1e82012-07-17 14:44:26 -07002419 rt->rt_is_input = ort->rt_is_input;
David S. Miller5e2b61f2011-03-04 21:47:09 -08002420 rt->rt_iif = ort->rt_iif;
David S. Miller59436342012-07-10 06:58:42 -07002421 rt->rt_pmtu = ort->rt_pmtu;
David S. Miller14e50e52007-05-24 18:17:54 -07002422
fan.duca4c3fc2013-07-30 08:33:53 +08002423 rt->rt_genid = rt_genid_ipv4(net);
David S. Miller14e50e52007-05-24 18:17:54 -07002424 rt->rt_flags = ort->rt_flags;
2425 rt->rt_type = ort->rt_type;
David S. Miller14e50e52007-05-24 18:17:54 -07002426 rt->rt_gateway = ort->rt_gateway;
Julian Anastasov155e8332012-10-08 11:41:18 +00002427 rt->rt_uses_gateway = ort->rt_uses_gateway;
David S. Miller14e50e52007-05-24 18:17:54 -07002428
David S. Millercaacf052012-07-31 15:06:50 -07002429 INIT_LIST_HEAD(&rt->rt_uncached);
David S. Miller14e50e52007-05-24 18:17:54 -07002430 dst_free(new);
2431 }
2432
David S. Miller2774c132011-03-01 14:59:04 -08002433 dst_release(dst_orig);
2434
2435 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -07002436}
2437
David S. Miller9d6ec932011-03-12 01:12:47 -05002438struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
Eric Dumazet6f9c9612015-09-25 07:39:10 -07002439 const struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002440{
David S. Miller9d6ec932011-03-12 01:12:47 -05002441 struct rtable *rt = __ip_route_output_key(net, flp4);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002442
David S. Millerb23dd4f2011-03-02 14:31:35 -08002443 if (IS_ERR(rt))
2444 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002445
David S. Miller56157872011-05-02 14:37:45 -07002446 if (flp4->flowi4_proto)
Steffen Klassertf92ee612014-09-16 10:08:40 +02002447 rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
2448 flowi4_to_flowi(flp4),
2449 sk, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002450
David S. Millerb23dd4f2011-03-02 14:31:35 -08002451 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002452}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002453EXPORT_SYMBOL_GPL(ip_route_output_flow);
2454
David Ahernc36ba662015-09-02 13:58:36 -07002455static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id,
Eric W. Biederman15e47302012-09-07 20:12:54 +00002456 struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
David S. Millerf1ce3062012-07-12 10:10:17 -07002457 u32 seq, int event, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002458{
Eric Dumazet511c3f92009-06-02 05:14:27 +00002459 struct rtable *rt = skb_rtable(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002460 struct rtmsg *r;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002461 struct nlmsghdr *nlh;
Steffen Klassert2bc8ca42011-10-11 01:12:02 +00002462 unsigned long expires = 0;
David S. Millerf1850712012-07-10 07:26:01 -07002463 u32 error;
Julian Anastasov521f5492012-07-20 12:02:08 +03002464 u32 metrics[RTAX_MAX];
Thomas Grafbe403ea2006-08-17 18:15:17 -07002465
Eric W. Biederman15e47302012-09-07 20:12:54 +00002466 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags);
Ian Morris51456b22015-04-03 09:17:26 +01002467 if (!nlh)
Patrick McHardy26932562007-01-31 23:16:40 -08002468 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002469
2470 r = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002471 r->rtm_family = AF_INET;
2472 r->rtm_dst_len = 32;
2473 r->rtm_src_len = 0;
David Millerd6c0a4f2012-07-01 02:02:59 +00002474 r->rtm_tos = fl4->flowi4_tos;
David Ahern8a430ed2017-01-11 15:42:17 -08002475 r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT;
David Ahernc36ba662015-09-02 13:58:36 -07002476 if (nla_put_u32(skb, RTA_TABLE, table_id))
David S. Millerf3756b72012-04-01 20:39:02 -04002477 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002478 r->rtm_type = rt->rt_type;
2479 r->rtm_scope = RT_SCOPE_UNIVERSE;
2480 r->rtm_protocol = RTPROT_UNSPEC;
2481 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2482 if (rt->rt_flags & RTCF_NOTIFY)
2483 r->rtm_flags |= RTM_F_NOTIFY;
Hannes Frederic Sowadf4d9252015-01-23 12:01:26 +01002484 if (IPCB(skb)->flags & IPSKB_DOREDIRECT)
2485 r->rtm_flags |= RTCF_DOREDIRECT;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002486
Jiri Benc930345e2015-03-29 16:59:25 +02002487 if (nla_put_in_addr(skb, RTA_DST, dst))
David S. Millerf3756b72012-04-01 20:39:02 -04002488 goto nla_put_failure;
David Miller1a00fee2012-07-01 02:02:56 +00002489 if (src) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002490 r->rtm_src_len = 32;
Jiri Benc930345e2015-03-29 16:59:25 +02002491 if (nla_put_in_addr(skb, RTA_SRC, src))
David S. Millerf3756b72012-04-01 20:39:02 -04002492 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002493 }
David S. Millerf3756b72012-04-01 20:39:02 -04002494 if (rt->dst.dev &&
2495 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2496 goto nla_put_failure;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002497#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf3756b72012-04-01 20:39:02 -04002498 if (rt->dst.tclassid &&
2499 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2500 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002501#endif
David S. Miller41347dc2012-06-28 04:05:27 -07002502 if (!rt_is_input_route(rt) &&
David Millerd6c0a4f2012-07-01 02:02:59 +00002503 fl4->saddr != src) {
Jiri Benc930345e2015-03-29 16:59:25 +02002504 if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
David S. Millerf3756b72012-04-01 20:39:02 -04002505 goto nla_put_failure;
2506 }
Julian Anastasov155e8332012-10-08 11:41:18 +00002507 if (rt->rt_uses_gateway &&
Jiri Benc930345e2015-03-29 16:59:25 +02002508 nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gateway))
David S. Millerf3756b72012-04-01 20:39:02 -04002509 goto nla_put_failure;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002510
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002511 expires = rt->dst.expires;
2512 if (expires) {
2513 unsigned long now = jiffies;
2514
2515 if (time_before(now, expires))
2516 expires -= now;
2517 else
2518 expires = 0;
2519 }
2520
Julian Anastasov521f5492012-07-20 12:02:08 +03002521 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002522 if (rt->rt_pmtu && expires)
Julian Anastasov521f5492012-07-20 12:02:08 +03002523 metrics[RTAX_MTU - 1] = rt->rt_pmtu;
2524 if (rtnetlink_put_metrics(skb, metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -07002525 goto nla_put_failure;
2526
David Millerb4869882012-07-01 02:03:01 +00002527 if (fl4->flowi4_mark &&
stephen hemminger68aaed52012-10-10 08:27:25 +00002528 nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
David S. Millerf3756b72012-04-01 20:39:02 -04002529 goto nla_put_failure;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002530
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002531 if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
2532 nla_put_u32(skb, RTA_UID,
2533 from_kuid_munged(current_user_ns(), fl4->flowi4_uid)))
2534 goto nla_put_failure;
2535
Changli Gaod8d1f302010-06-10 23:31:35 -07002536 error = rt->dst.error;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002537
David S. Millerc7537962010-11-11 17:07:48 -08002538 if (rt_is_input_route(rt)) {
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002539#ifdef CONFIG_IP_MROUTE
2540 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2541 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2542 int err = ipmr_get_route(net, skb,
2543 fl4->saddr, fl4->daddr,
Nikolay Aleksandrov2cf75072016-09-25 23:08:31 +02002544 r, nowait, portid);
2545
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002546 if (err <= 0) {
2547 if (!nowait) {
2548 if (err == 0)
2549 return 0;
2550 goto nla_put_failure;
2551 } else {
2552 if (err == -EMSGSIZE)
2553 goto nla_put_failure;
2554 error = err;
2555 }
2556 }
2557 } else
2558#endif
Julian Anastasov91146152014-04-13 18:08:02 +03002559 if (nla_put_u32(skb, RTA_IIF, skb->dev->ifindex))
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002560 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002561 }
2562
David S. Millerf1850712012-07-10 07:26:01 -07002563 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002564 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002565
Johannes Berg053c0952015-01-16 22:09:00 +01002566 nlmsg_end(skb, nlh);
2567 return 0;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002568
2569nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002570 nlmsg_cancel(skb, nlh);
2571 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002572}
2573
Thomas Graf661d2962013-03-21 07:45:29 +00002574static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002575{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002576 struct net *net = sock_net(in_skb->sk);
Thomas Grafd889ce32006-08-17 18:15:44 -07002577 struct rtmsg *rtm;
2578 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002579 struct rtable *rt = NULL;
David Millerd6c0a4f2012-07-01 02:02:59 +00002580 struct flowi4 fl4;
Al Viro9e12bb22006-09-26 21:25:20 -07002581 __be32 dst = 0;
2582 __be32 src = 0;
2583 u32 iif;
Thomas Grafd889ce32006-08-17 18:15:44 -07002584 int err;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002585 int mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002586 struct sk_buff *skb;
David Ahernc36ba662015-09-02 13:58:36 -07002587 u32 table_id = RT_TABLE_MAIN;
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002588 kuid_t uid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002589
Thomas Grafd889ce32006-08-17 18:15:44 -07002590 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2591 if (err < 0)
2592 goto errout;
2593
2594 rtm = nlmsg_data(nlh);
2595
Linus Torvalds1da177e2005-04-16 15:20:36 -07002596 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Ian Morris51456b22015-04-03 09:17:26 +01002597 if (!skb) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002598 err = -ENOBUFS;
2599 goto errout;
2600 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002601
2602 /* Reserve room for dummy headers, this skb can pass
2603 through good chunk of routing engine.
2604 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002605 skb_reset_mac_header(skb);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07002606 skb_reset_network_header(skb);
Stephen Hemmingerd2c962b2006-04-17 17:27:11 -07002607
2608 /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07002609 ip_hdr(skb)->protocol = IPPROTO_ICMP;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002610 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2611
Jiri Benc67b61f62015-03-29 16:59:26 +02002612 src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
2613 dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
Thomas Grafd889ce32006-08-17 18:15:44 -07002614 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002615 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002616 if (tb[RTA_UID])
2617 uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID]));
2618 else
2619 uid = (iif ? INVALID_UID : current_uid());
Linus Torvalds1da177e2005-04-16 15:20:36 -07002620
David Millerd6c0a4f2012-07-01 02:02:59 +00002621 memset(&fl4, 0, sizeof(fl4));
2622 fl4.daddr = dst;
2623 fl4.saddr = src;
2624 fl4.flowi4_tos = rtm->rtm_tos;
2625 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
2626 fl4.flowi4_mark = mark;
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002627 fl4.flowi4_uid = uid;
David Millerd6c0a4f2012-07-01 02:02:59 +00002628
Linus Torvalds1da177e2005-04-16 15:20:36 -07002629 if (iif) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002630 struct net_device *dev;
2631
Denis V. Lunev19375042008-02-28 20:52:04 -08002632 dev = __dev_get_by_index(net, iif);
Ian Morris51456b22015-04-03 09:17:26 +01002633 if (!dev) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002634 err = -ENODEV;
2635 goto errout_free;
2636 }
2637
Linus Torvalds1da177e2005-04-16 15:20:36 -07002638 skb->protocol = htons(ETH_P_IP);
2639 skb->dev = dev;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002640 skb->mark = mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002641 local_bh_disable();
2642 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2643 local_bh_enable();
Thomas Grafd889ce32006-08-17 18:15:44 -07002644
Eric Dumazet511c3f92009-06-02 05:14:27 +00002645 rt = skb_rtable(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07002646 if (err == 0 && rt->dst.error)
2647 err = -rt->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002648 } else {
David S. Miller9d6ec932011-03-12 01:12:47 -05002649 rt = ip_route_output_key(net, &fl4);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002650
2651 err = 0;
2652 if (IS_ERR(rt))
2653 err = PTR_ERR(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002654 }
Thomas Grafd889ce32006-08-17 18:15:44 -07002655
Linus Torvalds1da177e2005-04-16 15:20:36 -07002656 if (err)
Thomas Grafd889ce32006-08-17 18:15:44 -07002657 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002658
Changli Gaod8d1f302010-06-10 23:31:35 -07002659 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002660 if (rtm->rtm_flags & RTM_F_NOTIFY)
2661 rt->rt_flags |= RTCF_NOTIFY;
2662
David Ahernc36ba662015-09-02 13:58:36 -07002663 if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
2664 table_id = rt->rt_table_id;
2665
2666 err = rt_fill_info(net, dst, src, table_id, &fl4, skb,
Eric W. Biederman15e47302012-09-07 20:12:54 +00002667 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
Denis V. Lunev19375042008-02-28 20:52:04 -08002668 RTM_NEWROUTE, 0, 0);
David S. Miller7b46a642015-01-18 23:36:08 -05002669 if (err < 0)
Thomas Grafd889ce32006-08-17 18:15:44 -07002670 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002671
Eric W. Biederman15e47302012-09-07 20:12:54 +00002672 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
Thomas Grafd889ce32006-08-17 18:15:44 -07002673errout:
Thomas Graf2942e902006-08-15 00:30:25 -07002674 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002675
Thomas Grafd889ce32006-08-17 18:15:44 -07002676errout_free:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002677 kfree_skb(skb);
Thomas Grafd889ce32006-08-17 18:15:44 -07002678 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002679}
2680
Linus Torvalds1da177e2005-04-16 15:20:36 -07002681void ip_rt_multicast_event(struct in_device *in_dev)
2682{
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00002683 rt_cache_flush(dev_net(in_dev->dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002684}
2685
2686#ifdef CONFIG_SYSCTL
Gao feng082c7ca2013-02-19 00:43:12 +00002687static int ip_rt_gc_interval __read_mostly = 60 * HZ;
2688static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
2689static int ip_rt_gc_elasticity __read_mostly = 8;
2690
Joe Perchesfe2c6332013-06-11 23:04:25 -07002691static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002692 void __user *buffer,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002693 size_t *lenp, loff_t *ppos)
2694{
Timo Teräs5aad1de2013-05-27 20:46:33 +00002695 struct net *net = (struct net *)__ctl->extra1;
2696
Linus Torvalds1da177e2005-04-16 15:20:36 -07002697 if (write) {
Timo Teräs5aad1de2013-05-27 20:46:33 +00002698 rt_cache_flush(net);
2699 fnhe_genid_bump(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002700 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002701 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002702
2703 return -EINVAL;
2704}
2705
Joe Perchesfe2c6332013-06-11 23:04:25 -07002706static struct ctl_table ipv4_route_table[] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002707 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002708 .procname = "gc_thresh",
2709 .data = &ipv4_dst_ops.gc_thresh,
2710 .maxlen = sizeof(int),
2711 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002712 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002713 },
2714 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002715 .procname = "max_size",
2716 .data = &ip_rt_max_size,
2717 .maxlen = sizeof(int),
2718 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002719 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002720 },
2721 {
2722 /* Deprecated. Use gc_min_interval_ms */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002723
Linus Torvalds1da177e2005-04-16 15:20:36 -07002724 .procname = "gc_min_interval",
2725 .data = &ip_rt_gc_min_interval,
2726 .maxlen = sizeof(int),
2727 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002728 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002729 },
2730 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002731 .procname = "gc_min_interval_ms",
2732 .data = &ip_rt_gc_min_interval,
2733 .maxlen = sizeof(int),
2734 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002735 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002736 },
2737 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002738 .procname = "gc_timeout",
2739 .data = &ip_rt_gc_timeout,
2740 .maxlen = sizeof(int),
2741 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002742 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002743 },
2744 {
Eric Dumazet9f28a2f2011-12-21 15:47:16 -05002745 .procname = "gc_interval",
2746 .data = &ip_rt_gc_interval,
2747 .maxlen = sizeof(int),
2748 .mode = 0644,
2749 .proc_handler = proc_dointvec_jiffies,
2750 },
2751 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002752 .procname = "redirect_load",
2753 .data = &ip_rt_redirect_load,
2754 .maxlen = sizeof(int),
2755 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002756 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002757 },
2758 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002759 .procname = "redirect_number",
2760 .data = &ip_rt_redirect_number,
2761 .maxlen = sizeof(int),
2762 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002763 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002764 },
2765 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002766 .procname = "redirect_silence",
2767 .data = &ip_rt_redirect_silence,
2768 .maxlen = sizeof(int),
2769 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002770 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002771 },
2772 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002773 .procname = "error_cost",
2774 .data = &ip_rt_error_cost,
2775 .maxlen = sizeof(int),
2776 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002777 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002778 },
2779 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002780 .procname = "error_burst",
2781 .data = &ip_rt_error_burst,
2782 .maxlen = sizeof(int),
2783 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002784 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002785 },
2786 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002787 .procname = "gc_elasticity",
2788 .data = &ip_rt_gc_elasticity,
2789 .maxlen = sizeof(int),
2790 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002791 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002792 },
2793 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002794 .procname = "mtu_expires",
2795 .data = &ip_rt_mtu_expires,
2796 .maxlen = sizeof(int),
2797 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002798 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002799 },
2800 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002801 .procname = "min_pmtu",
2802 .data = &ip_rt_min_pmtu,
2803 .maxlen = sizeof(int),
2804 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002805 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002806 },
2807 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002808 .procname = "min_adv_mss",
2809 .data = &ip_rt_min_advmss,
2810 .maxlen = sizeof(int),
2811 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002812 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002813 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002814 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002815};
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002816
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002817static struct ctl_table ipv4_route_flush_table[] = {
2818 {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002819 .procname = "flush",
2820 .maxlen = sizeof(int),
2821 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002822 .proc_handler = ipv4_sysctl_rtcache_flush,
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002823 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002824 { },
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002825};
2826
2827static __net_init int sysctl_route_net_init(struct net *net)
2828{
2829 struct ctl_table *tbl;
2830
2831 tbl = ipv4_route_flush_table;
Octavian Purdila09ad9bc2009-11-25 15:14:13 -08002832 if (!net_eq(net, &init_net)) {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002833 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
Ian Morris51456b22015-04-03 09:17:26 +01002834 if (!tbl)
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002835 goto err_dup;
Eric W. Biederman464dc802012-11-16 03:02:59 +00002836
2837 /* Don't export sysctls to unprivileged users */
2838 if (net->user_ns != &init_user_ns)
2839 tbl[0].procname = NULL;
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002840 }
2841 tbl[0].extra1 = net;
2842
Eric W. Biedermanec8f23c2012-04-19 13:44:49 +00002843 net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
Ian Morris51456b22015-04-03 09:17:26 +01002844 if (!net->ipv4.route_hdr)
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002845 goto err_reg;
2846 return 0;
2847
2848err_reg:
2849 if (tbl != ipv4_route_flush_table)
2850 kfree(tbl);
2851err_dup:
2852 return -ENOMEM;
2853}
2854
2855static __net_exit void sysctl_route_net_exit(struct net *net)
2856{
2857 struct ctl_table *tbl;
2858
2859 tbl = net->ipv4.route_hdr->ctl_table_arg;
2860 unregister_net_sysctl_table(net->ipv4.route_hdr);
2861 BUG_ON(tbl == ipv4_route_flush_table);
2862 kfree(tbl);
2863}
2864
2865static __net_initdata struct pernet_operations sysctl_route_ops = {
2866 .init = sysctl_route_net_init,
2867 .exit = sysctl_route_net_exit,
2868};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002869#endif
2870
Neil Horman3ee94372010-05-08 01:57:52 -07002871static __net_init int rt_genid_init(struct net *net)
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002872{
fan.duca4c3fc2013-07-30 08:33:53 +08002873 atomic_set(&net->ipv4.rt_genid, 0);
Timo Teräs5aad1de2013-05-27 20:46:33 +00002874 atomic_set(&net->fnhe_genid, 0);
David S. Miller436c3b62011-03-24 17:42:21 -07002875 get_random_bytes(&net->ipv4.dev_addr_genid,
2876 sizeof(net->ipv4.dev_addr_genid));
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002877 return 0;
2878}
2879
Neil Horman3ee94372010-05-08 01:57:52 -07002880static __net_initdata struct pernet_operations rt_genid_ops = {
2881 .init = rt_genid_init,
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002882};
2883
David S. Millerc3426b42012-06-09 16:27:05 -07002884static int __net_init ipv4_inetpeer_init(struct net *net)
2885{
2886 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2887
2888 if (!bp)
2889 return -ENOMEM;
2890 inet_peer_base_init(bp);
2891 net->ipv4.peers = bp;
2892 return 0;
2893}
2894
2895static void __net_exit ipv4_inetpeer_exit(struct net *net)
2896{
2897 struct inet_peer_base *bp = net->ipv4.peers;
2898
2899 net->ipv4.peers = NULL;
David S. Miller56a6b242012-06-09 16:32:41 -07002900 inetpeer_invalidate_tree(bp);
David S. Millerc3426b42012-06-09 16:27:05 -07002901 kfree(bp);
2902}
2903
2904static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
2905 .init = ipv4_inetpeer_init,
2906 .exit = ipv4_inetpeer_exit,
2907};
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002908
Patrick McHardyc7066f72011-01-14 13:36:42 +01002909#ifdef CONFIG_IP_ROUTE_CLASSID
Tejun Heo7d720c32010-02-16 15:20:26 +00002910struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002911#endif /* CONFIG_IP_ROUTE_CLASSID */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002912
Linus Torvalds1da177e2005-04-16 15:20:36 -07002913int __init ip_rt_init(void)
2914{
Eric Dumazet424c4b72005-07-05 14:58:19 -07002915 int rc = 0;
Eric Dumazet5055c372015-01-14 15:17:06 -08002916 int cpu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002917
Eric Dumazet73f156a2014-06-02 05:26:03 -07002918 ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
2919 if (!ip_idents)
2920 panic("IP: failed to allocate ip_idents\n");
2921
2922 prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
2923
Eric Dumazet355b590c2015-05-01 10:37:49 -07002924 ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL);
2925 if (!ip_tstamps)
2926 panic("IP: failed to allocate ip_tstamps\n");
2927
Eric Dumazet5055c372015-01-14 15:17:06 -08002928 for_each_possible_cpu(cpu) {
2929 struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
2930
2931 INIT_LIST_HEAD(&ul->head);
2932 spin_lock_init(&ul->lock);
2933 }
Patrick McHardyc7066f72011-01-14 13:36:42 +01002934#ifdef CONFIG_IP_ROUTE_CLASSID
Ingo Molnar0dcec8c2009-02-25 14:07:33 +01002935 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002936 if (!ip_rt_acct)
2937 panic("IP: failed to allocate ip_rt_acct\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002938#endif
2939
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002940 ipv4_dst_ops.kmem_cachep =
2941 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09002942 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002943
David S. Miller14e50e52007-05-24 18:17:54 -07002944 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
2945
Eric Dumazetfc66f952010-10-08 06:37:34 +00002946 if (dst_entries_init(&ipv4_dst_ops) < 0)
2947 panic("IP: failed to allocate ipv4_dst_ops counter\n");
2948
2949 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
2950 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
2951
David S. Miller89aef892012-07-17 11:00:09 -07002952 ipv4_dst_ops.gc_thresh = ~0;
2953 ip_rt_max_size = INT_MAX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002954
Linus Torvalds1da177e2005-04-16 15:20:36 -07002955 devinet_init();
2956 ip_fib_init();
2957
Denis V. Lunev73b38712008-02-28 20:51:18 -08002958 if (ip_rt_proc_init())
Joe Perches058bd4d2012-03-11 18:36:11 +00002959 pr_err("Unable to create route proc files\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002960#ifdef CONFIG_XFRM
2961 xfrm_init();
Steffen Klassert703fb942012-11-13 08:52:24 +01002962 xfrm4_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002963#endif
Greg Rosec7ac8672011-06-10 01:27:09 +00002964 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
Thomas Graf63f34442007-03-22 11:55:17 -07002965
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002966#ifdef CONFIG_SYSCTL
2967 register_pernet_subsys(&sysctl_route_ops);
2968#endif
Neil Horman3ee94372010-05-08 01:57:52 -07002969 register_pernet_subsys(&rt_genid_ops);
David S. Millerc3426b42012-06-09 16:27:05 -07002970 register_pernet_subsys(&ipv4_inetpeer_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002971 return rc;
2972}
2973
Al Viroa1bc6eb2008-07-30 06:32:52 -04002974#ifdef CONFIG_SYSCTL
Al Viroeeb61f72008-07-27 08:59:33 +01002975/*
2976 * We really need to sanitize the damn ipv4 init order, then all
2977 * this nonsense will go away.
2978 */
2979void __init ip_static_sysctl_init(void)
2980{
Eric W. Biederman4e5ca782012-04-19 13:32:39 +00002981 register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
Al Viroeeb61f72008-07-27 08:59:33 +01002982}
Al Viroa1bc6eb2008-07-30 06:32:52 -04002983#endif