blob: 259cbeee9a8be89d5132cb7d781ca214e5eafc00 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * ROUTE - implementation of the IP router.
7 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07008 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
12 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 *
14 * Fixes:
15 * Alan Cox : Verify area fixes.
16 * Alan Cox : cli() protects routing changes
17 * Rui Oliveira : ICMP routing table updates
18 * (rco@di.uminho.pt) Routing table insertion and update
19 * Linus Torvalds : Rewrote bits to be sensible
20 * Alan Cox : Added BSD route gw semantics
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090021 * Alan Cox : Super /proc >4K
Linus Torvalds1da177e2005-04-16 15:20:36 -070022 * Alan Cox : MTU in route table
23 * Alan Cox : MSS actually. Also added the window
24 * clamper.
25 * Sam Lantinga : Fixed route matching in rt_del()
26 * Alan Cox : Routing cache support.
27 * Alan Cox : Removed compatibility cruft.
28 * Alan Cox : RTF_REJECT support.
29 * Alan Cox : TCP irtt support.
30 * Jonathan Naylor : Added Metric support.
31 * Miquel van Smoorenburg : BSD API fixes.
32 * Miquel van Smoorenburg : Metrics.
33 * Alan Cox : Use __u32 properly
34 * Alan Cox : Aligned routing errors more closely with BSD
35 * our system is still very different.
36 * Alan Cox : Faster /proc handling
37 * Alexey Kuznetsov : Massive rework to support tree based routing,
38 * routing caches and better behaviour.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090039 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 * Olaf Erb : irtt wasn't being copied right.
41 * Bjorn Ekwall : Kerneld route support.
42 * Alan Cox : Multicast fixed (I hope)
43 * Pavel Krauz : Limited broadcast fixed
44 * Mike McLagan : Routing by source
45 * Alexey Kuznetsov : End of old history. Split to fib.c and
46 * route.c and rewritten from scratch.
47 * Andi Kleen : Load-limit warning messages.
48 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
49 * Vitaly E. Lavrov : Race condition in ip_route_input_slow.
50 * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow.
51 * Vladimir V. Ivanov : IP rule info (flowid) is really useful.
52 * Marc Boucher : routing by fwmark
53 * Robert Olsson : Added rt_cache statistics
54 * Arnaldo C. Melo : Convert proc stuff to seq_file
Eric Dumazetbb1d23b2005-07-05 15:00:32 -070055 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
Ilia Sotnikovcef26852006-03-25 01:38:55 -080056 * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect
57 * Ilia Sotnikov : Removed TOS from hash calculations
Linus Torvalds1da177e2005-04-16 15:20:36 -070058 *
59 * This program is free software; you can redistribute it and/or
60 * modify it under the terms of the GNU General Public License
61 * as published by the Free Software Foundation; either version
62 * 2 of the License, or (at your option) any later version.
63 */
64
Joe Perchesafd465032012-03-12 07:03:32 +000065#define pr_fmt(fmt) "IPv4: " fmt
66
Linus Torvalds1da177e2005-04-16 15:20:36 -070067#include <linux/module.h>
68#include <asm/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070069#include <linux/bitops.h>
70#include <linux/types.h>
71#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070072#include <linux/mm.h>
73#include <linux/string.h>
74#include <linux/socket.h>
75#include <linux/sockios.h>
76#include <linux/errno.h>
77#include <linux/in.h>
78#include <linux/inet.h>
79#include <linux/netdevice.h>
80#include <linux/proc_fs.h>
81#include <linux/init.h>
82#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070083#include <linux/inetdevice.h>
84#include <linux/igmp.h>
85#include <linux/pkt_sched.h>
86#include <linux/mroute.h>
87#include <linux/netfilter_ipv4.h>
88#include <linux/random.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070089#include <linux/rcupdate.h>
90#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090091#include <linux/slab.h>
Herbert Xu352e5122007-11-13 21:34:06 -080092#include <net/dst.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020093#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070094#include <net/protocol.h>
95#include <net/ip.h>
96#include <net/route.h>
97#include <net/inetpeer.h>
98#include <net/sock.h>
99#include <net/ip_fib.h>
100#include <net/arp.h>
101#include <net/tcp.h>
102#include <net/icmp.h>
103#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -0700104#include <net/netevent.h>
Thomas Graf63f34442007-03-22 11:55:17 -0700105#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106#ifdef CONFIG_SYSCTL
107#include <linux/sysctl.h>
Shan Wei7426a562012-04-18 18:05:46 +0000108#include <linux/kmemleak.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109#endif
David S. Miller6e5714e2011-08-03 20:50:44 -0700110#include <net/secure_seq.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111
David S. Miller68a5e3d2011-03-11 20:07:33 -0500112#define RT_FL_TOS(oldflp4) \
Julian Anastasovf61759e2011-12-02 11:39:42 +0000113 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114
115#define IP_MAX_MTU 0xFFF0
116
117#define RT_GC_TIMEOUT (300*HZ)
118
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119static int ip_rt_max_size;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700120static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
Eric Dumazet9f28a2f2011-12-21 15:47:16 -0500121static int ip_rt_gc_interval __read_mostly = 60 * HZ;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700122static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
123static int ip_rt_redirect_number __read_mostly = 9;
124static int ip_rt_redirect_load __read_mostly = HZ / 50;
125static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
126static int ip_rt_error_cost __read_mostly = HZ;
127static int ip_rt_error_burst __read_mostly = 5 * HZ;
128static int ip_rt_gc_elasticity __read_mostly = 8;
129static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
130static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
131static int ip_rt_min_advmss __read_mostly = 256;
Eric Dumazet9f28a2f2011-12-21 15:47:16 -0500132
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133/*
134 * Interface to generic destination cache.
135 */
136
137static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -0800138static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
Steffen Klassertebb762f2011-11-23 02:12:51 +0000139static unsigned int ipv4_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
141static void ipv4_link_failure(struct sk_buff *skb);
David S. Miller6700c272012-07-17 03:29:28 -0700142static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
143 struct sk_buff *skb, u32 mtu);
144static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
145 struct sk_buff *skb);
David S. Millercaacf052012-07-31 15:06:50 -0700146static void ipv4_dst_destroy(struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147
Eric Dumazet72cdd1d2010-11-11 07:14:07 +0000148static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
149 int how)
150{
151}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152
David S. Miller62fa8a82011-01-26 20:51:05 -0800153static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
154{
David S. Miller31248732012-07-10 07:08:18 -0700155 WARN_ON(1);
156 return NULL;
David S. Miller62fa8a82011-01-26 20:51:05 -0800157}
158
David S. Millerf894cbf2012-07-02 21:52:24 -0700159static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
160 struct sk_buff *skb,
161 const void *daddr);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700162
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163static struct dst_ops ipv4_dst_ops = {
164 .family = AF_INET,
Harvey Harrison09640e632009-02-01 00:45:17 -0800165 .protocol = cpu_to_be16(ETH_P_IP),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166 .check = ipv4_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800167 .default_advmss = ipv4_default_advmss,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000168 .mtu = ipv4_mtu,
David S. Miller62fa8a82011-01-26 20:51:05 -0800169 .cow_metrics = ipv4_cow_metrics,
David S. Millercaacf052012-07-31 15:06:50 -0700170 .destroy = ipv4_dst_destroy,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171 .ifdown = ipv4_dst_ifdown,
172 .negative_advice = ipv4_negative_advice,
173 .link_failure = ipv4_link_failure,
174 .update_pmtu = ip_rt_update_pmtu,
David S. Millere47a1852012-07-11 20:55:47 -0700175 .redirect = ip_do_redirect,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700176 .local_out = __ip_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700177 .neigh_lookup = ipv4_neigh_lookup,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178};
179
180#define ECN_OR_COST(class) TC_PRIO_##class
181
Philippe De Muyter4839c522007-07-09 15:32:57 -0700182const __u8 ip_tos2prio[16] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183 TC_PRIO_BESTEFFORT,
Dan Siemon4a2b9c32011-03-15 13:56:07 +0000184 ECN_OR_COST(BESTEFFORT),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185 TC_PRIO_BESTEFFORT,
186 ECN_OR_COST(BESTEFFORT),
187 TC_PRIO_BULK,
188 ECN_OR_COST(BULK),
189 TC_PRIO_BULK,
190 ECN_OR_COST(BULK),
191 TC_PRIO_INTERACTIVE,
192 ECN_OR_COST(INTERACTIVE),
193 TC_PRIO_INTERACTIVE,
194 ECN_OR_COST(INTERACTIVE),
195 TC_PRIO_INTERACTIVE_BULK,
196 ECN_OR_COST(INTERACTIVE_BULK),
197 TC_PRIO_INTERACTIVE_BULK,
198 ECN_OR_COST(INTERACTIVE_BULK)
199};
Amir Vadaid4a96862012-04-04 21:33:28 +0000200EXPORT_SYMBOL(ip_tos2prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201
Eric Dumazet2f970d82006-01-17 02:54:36 -0800202static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
Eric Dumazet27f39c73e2010-05-19 22:07:23 +0000203#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
207{
Eric Dumazet29e75252008-01-31 17:05:09 -0800208 if (*pos)
David S. Miller89aef892012-07-17 11:00:09 -0700209 return NULL;
Eric Dumazet29e75252008-01-31 17:05:09 -0800210 return SEQ_START_TOKEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211}
212
213static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
214{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215 ++*pos;
David S. Miller89aef892012-07-17 11:00:09 -0700216 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217}
218
219static void rt_cache_seq_stop(struct seq_file *seq, void *v)
220{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221}
222
223static int rt_cache_seq_show(struct seq_file *seq, void *v)
224{
225 if (v == SEQ_START_TOKEN)
226 seq_printf(seq, "%-127s\n",
227 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
228 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
229 "HHUptod\tSpecDst");
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900230 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231}
232
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700233static const struct seq_operations rt_cache_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234 .start = rt_cache_seq_start,
235 .next = rt_cache_seq_next,
236 .stop = rt_cache_seq_stop,
237 .show = rt_cache_seq_show,
238};
239
240static int rt_cache_seq_open(struct inode *inode, struct file *file)
241{
David S. Miller89aef892012-07-17 11:00:09 -0700242 return seq_open(file, &rt_cache_seq_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243}
244
Arjan van de Ven9a321442007-02-12 00:55:35 -0800245static const struct file_operations rt_cache_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246 .owner = THIS_MODULE,
247 .open = rt_cache_seq_open,
248 .read = seq_read,
249 .llseek = seq_lseek,
David S. Miller89aef892012-07-17 11:00:09 -0700250 .release = seq_release,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251};
252
253
254static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
255{
256 int cpu;
257
258 if (*pos == 0)
259 return SEQ_START_TOKEN;
260
Rusty Russell0f23174a2008-12-29 12:23:42 +0000261 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262 if (!cpu_possible(cpu))
263 continue;
264 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800265 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266 }
267 return NULL;
268}
269
270static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
271{
272 int cpu;
273
Rusty Russell0f23174a2008-12-29 12:23:42 +0000274 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 if (!cpu_possible(cpu))
276 continue;
277 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800278 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279 }
280 return NULL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900281
Linus Torvalds1da177e2005-04-16 15:20:36 -0700282}
283
284static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
285{
286
287}
288
289static int rt_cpu_seq_show(struct seq_file *seq, void *v)
290{
291 struct rt_cache_stat *st = v;
292
293 if (v == SEQ_START_TOKEN) {
Olaf Rempel5bec0032005-04-28 12:16:08 -0700294 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295 return 0;
296 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900297
Linus Torvalds1da177e2005-04-16 15:20:36 -0700298 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
299 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
Eric Dumazetfc66f952010-10-08 06:37:34 +0000300 dst_entries_get_slow(&ipv4_dst_ops),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700301 st->in_hit,
302 st->in_slow_tot,
303 st->in_slow_mc,
304 st->in_no_route,
305 st->in_brd,
306 st->in_martian_dst,
307 st->in_martian_src,
308
309 st->out_hit,
310 st->out_slow_tot,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900311 st->out_slow_mc,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312
313 st->gc_total,
314 st->gc_ignored,
315 st->gc_goal_miss,
316 st->gc_dst_overflow,
317 st->in_hlist_search,
318 st->out_hlist_search
319 );
320 return 0;
321}
322
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700323static const struct seq_operations rt_cpu_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324 .start = rt_cpu_seq_start,
325 .next = rt_cpu_seq_next,
326 .stop = rt_cpu_seq_stop,
327 .show = rt_cpu_seq_show,
328};
329
330
331static int rt_cpu_seq_open(struct inode *inode, struct file *file)
332{
333 return seq_open(file, &rt_cpu_seq_ops);
334}
335
Arjan van de Ven9a321442007-02-12 00:55:35 -0800336static const struct file_operations rt_cpu_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337 .owner = THIS_MODULE,
338 .open = rt_cpu_seq_open,
339 .read = seq_read,
340 .llseek = seq_lseek,
341 .release = seq_release,
342};
343
Patrick McHardyc7066f72011-01-14 13:36:42 +0100344#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800345static int rt_acct_proc_show(struct seq_file *m, void *v)
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800346{
Alexey Dobriyana661c412009-11-25 15:40:35 -0800347 struct ip_rt_acct *dst, *src;
348 unsigned int i, j;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800349
Alexey Dobriyana661c412009-11-25 15:40:35 -0800350 dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
351 if (!dst)
352 return -ENOMEM;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800353
Alexey Dobriyana661c412009-11-25 15:40:35 -0800354 for_each_possible_cpu(i) {
355 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
356 for (j = 0; j < 256; j++) {
357 dst[j].o_bytes += src[j].o_bytes;
358 dst[j].o_packets += src[j].o_packets;
359 dst[j].i_bytes += src[j].i_bytes;
360 dst[j].i_packets += src[j].i_packets;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800361 }
362 }
Alexey Dobriyana661c412009-11-25 15:40:35 -0800363
364 seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
365 kfree(dst);
366 return 0;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800367}
Alexey Dobriyana661c412009-11-25 15:40:35 -0800368
369static int rt_acct_proc_open(struct inode *inode, struct file *file)
370{
371 return single_open(file, rt_acct_proc_show, NULL);
372}
373
374static const struct file_operations rt_acct_proc_fops = {
375 .owner = THIS_MODULE,
376 .open = rt_acct_proc_open,
377 .read = seq_read,
378 .llseek = seq_lseek,
379 .release = single_release,
380};
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800381#endif
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800382
Denis V. Lunev73b38712008-02-28 20:51:18 -0800383static int __net_init ip_rt_do_proc_init(struct net *net)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800384{
385 struct proc_dir_entry *pde;
386
387 pde = proc_net_fops_create(net, "rt_cache", S_IRUGO,
388 &rt_cache_seq_fops);
389 if (!pde)
390 goto err1;
391
Wang Chen77020722008-02-28 14:14:25 -0800392 pde = proc_create("rt_cache", S_IRUGO,
393 net->proc_net_stat, &rt_cpu_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800394 if (!pde)
395 goto err2;
396
Patrick McHardyc7066f72011-01-14 13:36:42 +0100397#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800398 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800399 if (!pde)
400 goto err3;
401#endif
402 return 0;
403
Patrick McHardyc7066f72011-01-14 13:36:42 +0100404#ifdef CONFIG_IP_ROUTE_CLASSID
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800405err3:
406 remove_proc_entry("rt_cache", net->proc_net_stat);
407#endif
408err2:
409 remove_proc_entry("rt_cache", net->proc_net);
410err1:
411 return -ENOMEM;
412}
Denis V. Lunev73b38712008-02-28 20:51:18 -0800413
414static void __net_exit ip_rt_do_proc_exit(struct net *net)
415{
416 remove_proc_entry("rt_cache", net->proc_net_stat);
417 remove_proc_entry("rt_cache", net->proc_net);
Patrick McHardyc7066f72011-01-14 13:36:42 +0100418#ifdef CONFIG_IP_ROUTE_CLASSID
Denis V. Lunev73b38712008-02-28 20:51:18 -0800419 remove_proc_entry("rt_acct", net->proc_net);
Alexey Dobriyan0a931ac2010-01-17 03:32:50 +0000420#endif
Denis V. Lunev73b38712008-02-28 20:51:18 -0800421}
422
423static struct pernet_operations ip_rt_proc_ops __net_initdata = {
424 .init = ip_rt_do_proc_init,
425 .exit = ip_rt_do_proc_exit,
426};
427
428static int __init ip_rt_proc_init(void)
429{
430 return register_pernet_subsys(&ip_rt_proc_ops);
431}
432
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800433#else
Denis V. Lunev73b38712008-02-28 20:51:18 -0800434static inline int ip_rt_proc_init(void)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800435{
436 return 0;
437}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438#endif /* CONFIG_PROC_FS */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900439
Eric Dumazet4331deb2012-07-25 05:11:23 +0000440static inline bool rt_is_expired(const struct rtable *rth)
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700441{
Changli Gaod8d1f302010-06-10 23:31:35 -0700442 return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700443}
444
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +0000445void rt_cache_flush(struct net *net)
Eric Dumazet29e75252008-01-31 17:05:09 -0800446{
Nicolas Dichtelb42664f2012-09-10 22:09:44 +0000447 rt_genid_bump(net);
Eric Dumazet98376382010-03-08 03:20:00 +0000448}
449
David S. Millerf894cbf2012-07-02 21:52:24 -0700450static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
451 struct sk_buff *skb,
452 const void *daddr)
David Miller3769cff2011-07-11 22:44:24 +0000453{
David S. Millerd3aaeb32011-07-18 00:40:17 -0700454 struct net_device *dev = dst->dev;
455 const __be32 *pkey = daddr;
David S. Miller39232972012-01-26 15:22:32 -0500456 const struct rtable *rt;
David Miller3769cff2011-07-11 22:44:24 +0000457 struct neighbour *n;
458
David S. Miller39232972012-01-26 15:22:32 -0500459 rt = (const struct rtable *) dst;
David S. Millera263b302012-07-02 02:02:15 -0700460 if (rt->rt_gateway)
David S. Miller39232972012-01-26 15:22:32 -0500461 pkey = (const __be32 *) &rt->rt_gateway;
David S. Millerf894cbf2012-07-02 21:52:24 -0700462 else if (skb)
463 pkey = &ip_hdr(skb)->daddr;
David S. Millerd3aaeb32011-07-18 00:40:17 -0700464
David S. Miller80703d22012-02-15 17:48:35 -0500465 n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700466 if (n)
467 return n;
David Miller32092ec2011-07-25 00:01:41 +0000468 return neigh_create(&arp_tbl, pkey, dev);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700469}
470
Linus Torvalds1da177e2005-04-16 15:20:36 -0700471/*
472 * Peer allocation may fail only in serious out-of-memory conditions. However
473 * we still can generate some output.
474 * Random ID selection looks a bit dangerous because we have no chances to
475 * select ID being unique in a reasonable period of time.
476 * But broken packet identifier may be better than no packet at all.
477 */
478static void ip_select_fb_ident(struct iphdr *iph)
479{
480 static DEFINE_SPINLOCK(ip_fb_id_lock);
481 static u32 ip_fallback_id;
482 u32 salt;
483
484 spin_lock_bh(&ip_fb_id_lock);
Al Viroe4485152006-09-26 22:15:01 -0700485 salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700486 iph->id = htons(salt & 0xFFFF);
487 ip_fallback_id = salt;
488 spin_unlock_bh(&ip_fb_id_lock);
489}
490
491void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
492{
David S. Miller1d861aa2012-07-10 03:58:16 -0700493 struct net *net = dev_net(dst->dev);
494 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495
David S. Miller1d861aa2012-07-10 03:58:16 -0700496 peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1);
497 if (peer) {
498 iph->id = htons(inet_getid(peer, more));
499 inet_putpeer(peer);
500 return;
501 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502
503 ip_select_fb_ident(iph);
504}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000505EXPORT_SYMBOL(__ip_select_ident);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200507static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk,
David S. Miller4895c772012-07-17 04:19:00 -0700508 const struct iphdr *iph,
509 int oif, u8 tos,
510 u8 prot, u32 mark, int flow_flags)
511{
512 if (sk) {
513 const struct inet_sock *inet = inet_sk(sk);
514
515 oif = sk->sk_bound_dev_if;
516 mark = sk->sk_mark;
517 tos = RT_CONN_FLAGS(sk);
518 prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
519 }
520 flowi4_init_output(fl4, oif, mark, tos,
521 RT_SCOPE_UNIVERSE, prot,
522 flow_flags,
523 iph->daddr, iph->saddr, 0, 0);
524}
525
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200526static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
527 const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700528{
529 const struct iphdr *iph = ip_hdr(skb);
530 int oif = skb->dev->ifindex;
531 u8 tos = RT_TOS(iph->tos);
532 u8 prot = iph->protocol;
533 u32 mark = skb->mark;
534
535 __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0);
536}
537
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200538static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700539{
540 const struct inet_sock *inet = inet_sk(sk);
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200541 const struct ip_options_rcu *inet_opt;
David S. Miller4895c772012-07-17 04:19:00 -0700542 __be32 daddr = inet->inet_daddr;
543
544 rcu_read_lock();
545 inet_opt = rcu_dereference(inet->inet_opt);
546 if (inet_opt && inet_opt->opt.srr)
547 daddr = inet_opt->opt.faddr;
548 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
549 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
550 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
551 inet_sk_flowi_flags(sk),
552 daddr, inet->inet_saddr, 0, 0);
553 rcu_read_unlock();
554}
555
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200556static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
557 const struct sk_buff *skb)
David S. Miller4895c772012-07-17 04:19:00 -0700558{
559 if (skb)
560 build_skb_flow_key(fl4, skb, sk);
561 else
562 build_sk_flow_key(fl4, sk);
563}
564
David S. Millerc5038a82012-07-31 15:02:02 -0700565static inline void rt_free(struct rtable *rt)
566{
567 call_rcu(&rt->dst.rcu_head, dst_rcu_free);
568}
569
570static DEFINE_SPINLOCK(fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700571
Julian Anastasovaee06da2012-07-18 10:15:35 +0000572static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
David S. Miller4895c772012-07-17 04:19:00 -0700573{
574 struct fib_nh_exception *fnhe, *oldest;
David S. Millerc5038a82012-07-31 15:02:02 -0700575 struct rtable *orig;
David S. Miller4895c772012-07-17 04:19:00 -0700576
577 oldest = rcu_dereference(hash->chain);
578 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
579 fnhe = rcu_dereference(fnhe->fnhe_next)) {
580 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
581 oldest = fnhe;
582 }
David S. Millerc5038a82012-07-31 15:02:02 -0700583 orig = rcu_dereference(oldest->fnhe_rth);
584 if (orig) {
585 RCU_INIT_POINTER(oldest->fnhe_rth, NULL);
586 rt_free(orig);
587 }
David S. Miller4895c772012-07-17 04:19:00 -0700588 return oldest;
589}
590
David S. Millerd3a25c92012-07-17 13:23:08 -0700591static inline u32 fnhe_hashfun(__be32 daddr)
592{
593 u32 hval;
594
595 hval = (__force u32) daddr;
596 hval ^= (hval >> 11) ^ (hval >> 22);
597
598 return hval & (FNHE_HASH_SIZE - 1);
599}
600
Julian Anastasovaee06da2012-07-18 10:15:35 +0000601static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
602 u32 pmtu, unsigned long expires)
David S. Miller4895c772012-07-17 04:19:00 -0700603{
Julian Anastasovaee06da2012-07-18 10:15:35 +0000604 struct fnhe_hash_bucket *hash;
David S. Miller4895c772012-07-17 04:19:00 -0700605 struct fib_nh_exception *fnhe;
606 int depth;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000607 u32 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -0700608
David S. Millerc5038a82012-07-31 15:02:02 -0700609 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000610
611 hash = nh->nh_exceptions;
David S. Miller4895c772012-07-17 04:19:00 -0700612 if (!hash) {
Julian Anastasovaee06da2012-07-18 10:15:35 +0000613 hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC);
David S. Miller4895c772012-07-17 04:19:00 -0700614 if (!hash)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000615 goto out_unlock;
616 nh->nh_exceptions = hash;
David S. Miller4895c772012-07-17 04:19:00 -0700617 }
618
David S. Miller4895c772012-07-17 04:19:00 -0700619 hash += hval;
620
621 depth = 0;
622 for (fnhe = rcu_dereference(hash->chain); fnhe;
623 fnhe = rcu_dereference(fnhe->fnhe_next)) {
624 if (fnhe->fnhe_daddr == daddr)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000625 break;
David S. Miller4895c772012-07-17 04:19:00 -0700626 depth++;
627 }
628
Julian Anastasovaee06da2012-07-18 10:15:35 +0000629 if (fnhe) {
630 if (gw)
631 fnhe->fnhe_gw = gw;
632 if (pmtu) {
633 fnhe->fnhe_pmtu = pmtu;
634 fnhe->fnhe_expires = expires;
635 }
636 } else {
637 if (depth > FNHE_RECLAIM_DEPTH)
638 fnhe = fnhe_oldest(hash);
639 else {
640 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
641 if (!fnhe)
642 goto out_unlock;
643
644 fnhe->fnhe_next = hash->chain;
645 rcu_assign_pointer(hash->chain, fnhe);
646 }
647 fnhe->fnhe_daddr = daddr;
648 fnhe->fnhe_gw = gw;
649 fnhe->fnhe_pmtu = pmtu;
650 fnhe->fnhe_expires = expires;
David S. Miller4895c772012-07-17 04:19:00 -0700651 }
David S. Miller4895c772012-07-17 04:19:00 -0700652
David S. Miller4895c772012-07-17 04:19:00 -0700653 fnhe->fnhe_stamp = jiffies;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000654
655out_unlock:
David S. Millerc5038a82012-07-31 15:02:02 -0700656 spin_unlock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000657 return;
David S. Miller4895c772012-07-17 04:19:00 -0700658}
659
David S. Millerceb33202012-07-17 11:31:28 -0700660static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
661 bool kill_route)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700662{
David S. Millere47a1852012-07-11 20:55:47 -0700663 __be32 new_gw = icmp_hdr(skb)->un.gateway;
David S. Miller94206122012-07-11 20:38:08 -0700664 __be32 old_gw = ip_hdr(skb)->saddr;
David S. Millere47a1852012-07-11 20:55:47 -0700665 struct net_device *dev = skb->dev;
David S. Millere47a1852012-07-11 20:55:47 -0700666 struct in_device *in_dev;
David S. Miller4895c772012-07-17 04:19:00 -0700667 struct fib_result res;
David S. Millere47a1852012-07-11 20:55:47 -0700668 struct neighbour *n;
Denis V. Lunev317805b2008-02-28 20:50:06 -0800669 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670
David S. Miller94206122012-07-11 20:38:08 -0700671 switch (icmp_hdr(skb)->code & 7) {
672 case ICMP_REDIR_NET:
673 case ICMP_REDIR_NETTOS:
674 case ICMP_REDIR_HOST:
675 case ICMP_REDIR_HOSTTOS:
676 break;
677
678 default:
679 return;
680 }
681
David S. Millere47a1852012-07-11 20:55:47 -0700682 if (rt->rt_gateway != old_gw)
683 return;
684
685 in_dev = __in_dev_get_rcu(dev);
686 if (!in_dev)
687 return;
688
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900689 net = dev_net(dev);
Joe Perches9d4fb272009-11-23 10:41:23 -0800690 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
691 ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
692 ipv4_is_zeronet(new_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700693 goto reject_redirect;
694
695 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
696 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
697 goto reject_redirect;
698 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
699 goto reject_redirect;
700 } else {
Denis V. Lunev317805b2008-02-28 20:50:06 -0800701 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702 goto reject_redirect;
703 }
704
David S. Miller4895c772012-07-17 04:19:00 -0700705 n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
David S. Millere47a1852012-07-11 20:55:47 -0700706 if (n) {
707 if (!(n->nud_state & NUD_VALID)) {
708 neigh_event_send(n, NULL);
709 } else {
David S. Miller4895c772012-07-17 04:19:00 -0700710 if (fib_lookup(net, fl4, &res) == 0) {
711 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -0700712
Julian Anastasovaee06da2012-07-18 10:15:35 +0000713 update_or_create_fnhe(nh, fl4->daddr, new_gw,
714 0, 0);
David S. Miller4895c772012-07-17 04:19:00 -0700715 }
David S. Millerceb33202012-07-17 11:31:28 -0700716 if (kill_route)
717 rt->dst.obsolete = DST_OBSOLETE_KILL;
David S. Millere47a1852012-07-11 20:55:47 -0700718 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
719 }
720 neigh_release(n);
721 }
722 return;
723
724reject_redirect:
725#ifdef CONFIG_IP_ROUTE_VERBOSE
David S. Miller99ee0382012-07-12 07:40:05 -0700726 if (IN_DEV_LOG_MARTIANS(in_dev)) {
727 const struct iphdr *iph = (const struct iphdr *) skb->data;
728 __be32 daddr = iph->daddr;
729 __be32 saddr = iph->saddr;
730
David S. Millere47a1852012-07-11 20:55:47 -0700731 net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
732 " Advised path = %pI4 -> %pI4\n",
733 &old_gw, dev->name, &new_gw,
734 &saddr, &daddr);
David S. Miller99ee0382012-07-12 07:40:05 -0700735 }
David S. Millere47a1852012-07-11 20:55:47 -0700736#endif
737 ;
738}
739
David S. Miller4895c772012-07-17 04:19:00 -0700740static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
741{
742 struct rtable *rt;
743 struct flowi4 fl4;
744
745 rt = (struct rtable *) dst;
746
747 ip_rt_build_flow_key(&fl4, sk, skb);
David S. Millerceb33202012-07-17 11:31:28 -0700748 __ip_do_redirect(rt, skb, &fl4, true);
David S. Miller4895c772012-07-17 04:19:00 -0700749}
750
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
752{
Eric Dumazetee6b9672008-03-05 18:30:47 -0800753 struct rtable *rt = (struct rtable *)dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754 struct dst_entry *ret = dst;
755
756 if (rt) {
Timo Teräsd11a4dc2010-03-18 23:20:20 +0000757 if (dst->obsolete > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758 ip_rt_put(rt);
759 ret = NULL;
David S. Miller59436342012-07-10 06:58:42 -0700760 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
761 rt->dst.expires) {
David S. Miller89aef892012-07-17 11:00:09 -0700762 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763 ret = NULL;
764 }
765 }
766 return ret;
767}
768
769/*
770 * Algorithm:
771 * 1. The first ip_rt_redirect_number redirects are sent
772 * with exponential backoff, then we stop sending them at all,
773 * assuming that the host ignores our redirects.
774 * 2. If we did not see packets requiring redirects
775 * during ip_rt_redirect_silence, we assume that the host
776 * forgot redirected route and start to send redirects again.
777 *
778 * This algorithm is much cheaper and more intelligent than dumb load limiting
779 * in icmp.c.
780 *
781 * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
782 * and "frag. need" (breaks PMTU discovery) in icmp.c.
783 */
784
785void ip_rt_send_redirect(struct sk_buff *skb)
786{
Eric Dumazet511c3f92009-06-02 05:14:27 +0000787 struct rtable *rt = skb_rtable(skb);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700788 struct in_device *in_dev;
David S. Miller92d86822011-02-04 15:55:25 -0800789 struct inet_peer *peer;
David S. Miller1d861aa2012-07-10 03:58:16 -0700790 struct net *net;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700791 int log_martians;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792
Eric Dumazet30038fc2009-08-28 23:52:01 -0700793 rcu_read_lock();
Changli Gaod8d1f302010-06-10 23:31:35 -0700794 in_dev = __in_dev_get_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700795 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
796 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700797 return;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700798 }
799 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
800 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801
David S. Miller1d861aa2012-07-10 03:58:16 -0700802 net = dev_net(rt->dst.dev);
803 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800804 if (!peer) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000805 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
806 rt_nexthop(rt, ip_hdr(skb)->daddr));
David S. Miller92d86822011-02-04 15:55:25 -0800807 return;
808 }
809
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810 /* No redirected packets during ip_rt_redirect_silence;
811 * reset the algorithm.
812 */
David S. Miller92d86822011-02-04 15:55:25 -0800813 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
814 peer->rate_tokens = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815
816 /* Too many ignored redirects; do not send anything
Changli Gaod8d1f302010-06-10 23:31:35 -0700817 * set dst.rate_last to the last seen redirected packet.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700818 */
David S. Miller92d86822011-02-04 15:55:25 -0800819 if (peer->rate_tokens >= ip_rt_redirect_number) {
820 peer->rate_last = jiffies;
David S. Miller1d861aa2012-07-10 03:58:16 -0700821 goto out_put_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822 }
823
824 /* Check for load limit; set rate_last to the latest sent
825 * redirect.
826 */
David S. Miller92d86822011-02-04 15:55:25 -0800827 if (peer->rate_tokens == 0 ||
Li Yewang14fb8a72006-12-18 00:26:35 -0800828 time_after(jiffies,
David S. Miller92d86822011-02-04 15:55:25 -0800829 (peer->rate_last +
830 (ip_rt_redirect_load << peer->rate_tokens)))) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000831 __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
832
833 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
David S. Miller92d86822011-02-04 15:55:25 -0800834 peer->rate_last = jiffies;
835 ++peer->rate_tokens;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700836#ifdef CONFIG_IP_ROUTE_VERBOSE
Eric Dumazet30038fc2009-08-28 23:52:01 -0700837 if (log_martians &&
Joe Perchese87cc472012-05-13 21:56:26 +0000838 peer->rate_tokens == ip_rt_redirect_number)
839 net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
David S. Miller92101b32012-07-23 16:29:00 -0700840 &ip_hdr(skb)->saddr, inet_iif(skb),
Julian Anastasove81da0e2012-10-08 11:41:15 +0000841 &ip_hdr(skb)->daddr, &gw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700842#endif
843 }
David S. Miller1d861aa2012-07-10 03:58:16 -0700844out_put_peer:
845 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846}
847
848static int ip_error(struct sk_buff *skb)
849{
David S. Miller251da412012-06-26 16:27:09 -0700850 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
Eric Dumazet511c3f92009-06-02 05:14:27 +0000851 struct rtable *rt = skb_rtable(skb);
David S. Miller92d86822011-02-04 15:55:25 -0800852 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700853 unsigned long now;
David S. Miller251da412012-06-26 16:27:09 -0700854 struct net *net;
David S. Miller92d86822011-02-04 15:55:25 -0800855 bool send;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700856 int code;
857
David S. Miller251da412012-06-26 16:27:09 -0700858 net = dev_net(rt->dst.dev);
859 if (!IN_DEV_FORWARD(in_dev)) {
860 switch (rt->dst.error) {
861 case EHOSTUNREACH:
862 IP_INC_STATS_BH(net, IPSTATS_MIB_INADDRERRORS);
863 break;
864
865 case ENETUNREACH:
866 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
867 break;
868 }
869 goto out;
870 }
871
Changli Gaod8d1f302010-06-10 23:31:35 -0700872 switch (rt->dst.error) {
Joe Perches4500ebf2011-07-01 09:43:07 +0000873 case EINVAL:
874 default:
875 goto out;
876 case EHOSTUNREACH:
877 code = ICMP_HOST_UNREACH;
878 break;
879 case ENETUNREACH:
880 code = ICMP_NET_UNREACH;
David S. Miller251da412012-06-26 16:27:09 -0700881 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
Joe Perches4500ebf2011-07-01 09:43:07 +0000882 break;
883 case EACCES:
884 code = ICMP_PKT_FILTERED;
885 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886 }
887
David S. Miller1d861aa2012-07-10 03:58:16 -0700888 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800889
890 send = true;
891 if (peer) {
892 now = jiffies;
893 peer->rate_tokens += now - peer->rate_last;
894 if (peer->rate_tokens > ip_rt_error_burst)
895 peer->rate_tokens = ip_rt_error_burst;
896 peer->rate_last = now;
897 if (peer->rate_tokens >= ip_rt_error_cost)
898 peer->rate_tokens -= ip_rt_error_cost;
899 else
900 send = false;
David S. Miller1d861aa2012-07-10 03:58:16 -0700901 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902 }
David S. Miller92d86822011-02-04 15:55:25 -0800903 if (send)
904 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905
906out: kfree_skb(skb);
907 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900908}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909
Steffen Klassertd851c122012-10-07 22:47:25 +0000910static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911{
Steffen Klassertd851c122012-10-07 22:47:25 +0000912 struct dst_entry *dst = &rt->dst;
David S. Miller4895c772012-07-17 04:19:00 -0700913 struct fib_result res;
David S. Miller2c8cec52011-02-09 20:42:07 -0800914
Steffen Klassertfa1e4922013-01-16 20:58:10 +0000915 if (dst_metric_locked(dst, RTAX_MTU))
916 return;
917
Steffen Klassert7f92d3342012-10-07 22:48:18 +0000918 if (dst->dev->mtu < mtu)
919 return;
920
David S. Miller59436342012-07-10 06:58:42 -0700921 if (mtu < ip_rt_min_pmtu)
922 mtu = ip_rt_min_pmtu;
Eric Dumazetfe6fe792011-06-08 06:07:07 +0000923
Steffen Klassertd851c122012-10-07 22:47:25 +0000924 if (!rt->rt_pmtu) {
925 dst->obsolete = DST_OBSOLETE_KILL;
926 } else {
927 rt->rt_pmtu = mtu;
928 dst->expires = max(1UL, jiffies + ip_rt_mtu_expires);
929 }
930
Eric Dumazetc5ae7d42012-08-28 12:33:07 +0000931 rcu_read_lock();
Steffen Klassertd851c122012-10-07 22:47:25 +0000932 if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) {
David S. Miller4895c772012-07-17 04:19:00 -0700933 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -0700934
Julian Anastasovaee06da2012-07-18 10:15:35 +0000935 update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
936 jiffies + ip_rt_mtu_expires);
David S. Miller4895c772012-07-17 04:19:00 -0700937 }
Eric Dumazetc5ae7d42012-08-28 12:33:07 +0000938 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939}
940
David S. Miller4895c772012-07-17 04:19:00 -0700941static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
942 struct sk_buff *skb, u32 mtu)
943{
944 struct rtable *rt = (struct rtable *) dst;
945 struct flowi4 fl4;
946
947 ip_rt_build_flow_key(&fl4, sk, skb);
Steffen Klassertd851c122012-10-07 22:47:25 +0000948 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller4895c772012-07-17 04:19:00 -0700949}
950
David S. Miller36393392012-06-14 22:21:46 -0700951void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
952 int oif, u32 mark, u8 protocol, int flow_flags)
953{
David S. Miller4895c772012-07-17 04:19:00 -0700954 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Miller36393392012-06-14 22:21:46 -0700955 struct flowi4 fl4;
956 struct rtable *rt;
957
David S. Miller4895c772012-07-17 04:19:00 -0700958 __build_flow_key(&fl4, NULL, iph, oif,
959 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Miller36393392012-06-14 22:21:46 -0700960 rt = __ip_route_output_key(net, &fl4);
961 if (!IS_ERR(rt)) {
David S. Miller4895c772012-07-17 04:19:00 -0700962 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller36393392012-06-14 22:21:46 -0700963 ip_rt_put(rt);
964 }
965}
966EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
967
968void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
969{
David S. Miller4895c772012-07-17 04:19:00 -0700970 const struct iphdr *iph = (const struct iphdr *) skb->data;
971 struct flowi4 fl4;
972 struct rtable *rt;
David S. Miller36393392012-06-14 22:21:46 -0700973
David S. Miller4895c772012-07-17 04:19:00 -0700974 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
975 rt = __ip_route_output_key(sock_net(sk), &fl4);
976 if (!IS_ERR(rt)) {
977 __ip_rt_update_pmtu(rt, &fl4, mtu);
978 ip_rt_put(rt);
979 }
David S. Miller36393392012-06-14 22:21:46 -0700980}
981EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
David S. Millerf39925d2011-02-09 22:00:16 -0800982
David S. Millerb42597e2012-07-11 21:25:45 -0700983void ipv4_redirect(struct sk_buff *skb, struct net *net,
984 int oif, u32 mark, u8 protocol, int flow_flags)
985{
David S. Miller4895c772012-07-17 04:19:00 -0700986 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Millerb42597e2012-07-11 21:25:45 -0700987 struct flowi4 fl4;
988 struct rtable *rt;
989
David S. Miller4895c772012-07-17 04:19:00 -0700990 __build_flow_key(&fl4, NULL, iph, oif,
991 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Millerb42597e2012-07-11 21:25:45 -0700992 rt = __ip_route_output_key(net, &fl4);
993 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -0700994 __ip_do_redirect(rt, skb, &fl4, false);
David S. Millerb42597e2012-07-11 21:25:45 -0700995 ip_rt_put(rt);
996 }
997}
998EXPORT_SYMBOL_GPL(ipv4_redirect);
999
1000void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
1001{
David S. Miller4895c772012-07-17 04:19:00 -07001002 const struct iphdr *iph = (const struct iphdr *) skb->data;
1003 struct flowi4 fl4;
1004 struct rtable *rt;
David S. Millerb42597e2012-07-11 21:25:45 -07001005
David S. Miller4895c772012-07-17 04:19:00 -07001006 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1007 rt = __ip_route_output_key(sock_net(sk), &fl4);
1008 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001009 __ip_do_redirect(rt, skb, &fl4, false);
David S. Miller4895c772012-07-17 04:19:00 -07001010 ip_rt_put(rt);
1011 }
David S. Millerb42597e2012-07-11 21:25:45 -07001012}
1013EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1014
David S. Millerefbc368d2011-12-01 13:38:59 -05001015static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1016{
1017 struct rtable *rt = (struct rtable *) dst;
1018
David S. Millerceb33202012-07-17 11:31:28 -07001019 /* All IPV4 dsts are created with ->obsolete set to the value
1020 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1021 * into this function always.
1022 *
1023 * When a PMTU/redirect information update invalidates a
1024 * route, this is indicated by setting obsolete to
1025 * DST_OBSOLETE_KILL.
1026 */
1027 if (dst->obsolete == DST_OBSOLETE_KILL || rt_is_expired(rt))
David S. Millerefbc368d2011-12-01 13:38:59 -05001028 return NULL;
Timo Teräsd11a4dc2010-03-18 23:20:20 +00001029 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030}
1031
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032static void ipv4_link_failure(struct sk_buff *skb)
1033{
1034 struct rtable *rt;
1035
1036 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1037
Eric Dumazet511c3f92009-06-02 05:14:27 +00001038 rt = skb_rtable(skb);
David S. Miller59436342012-07-10 06:58:42 -07001039 if (rt)
1040 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001041}
1042
1043static int ip_rt_bug(struct sk_buff *skb)
1044{
Joe Perches91df42b2012-05-15 14:11:54 +00001045 pr_debug("%s: %pI4 -> %pI4, %s\n",
1046 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1047 skb->dev ? skb->dev->name : "?");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001048 kfree_skb(skb);
Dave Jonesc378a9c2011-05-21 07:16:42 +00001049 WARN_ON(1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001050 return 0;
1051}
1052
1053/*
1054 We do not cache source address of outgoing interface,
1055 because it is used only by IP RR, TS and SRR options,
1056 so that it out of fast path.
1057
1058 BTW remember: "addr" is allowed to be not aligned
1059 in IP options!
1060 */
1061
David S. Miller8e363602011-05-13 17:29:41 -04001062void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001063{
Al Viroa61ced52006-09-26 21:27:54 -07001064 __be32 src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001065
David S. Millerc7537962010-11-11 17:07:48 -08001066 if (rt_is_output_route(rt))
David S. Millerc5be24f2011-05-13 18:01:21 -04001067 src = ip_hdr(skb)->saddr;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001068 else {
David S. Miller8e363602011-05-13 17:29:41 -04001069 struct fib_result res;
1070 struct flowi4 fl4;
1071 struct iphdr *iph;
1072
1073 iph = ip_hdr(skb);
1074
1075 memset(&fl4, 0, sizeof(fl4));
1076 fl4.daddr = iph->daddr;
1077 fl4.saddr = iph->saddr;
Julian Anastasovb0fe4a32011-07-23 02:00:41 +00001078 fl4.flowi4_tos = RT_TOS(iph->tos);
David S. Miller8e363602011-05-13 17:29:41 -04001079 fl4.flowi4_oif = rt->dst.dev->ifindex;
1080 fl4.flowi4_iif = skb->dev->ifindex;
1081 fl4.flowi4_mark = skb->mark;
David S. Miller5e2b61f2011-03-04 21:47:09 -08001082
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001083 rcu_read_lock();
David S. Miller68a5e3d2011-03-11 20:07:33 -05001084 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
David S. Miller436c3b62011-03-24 17:42:21 -07001085 src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001086 else
David S. Millerf8126f12012-07-13 05:03:45 -07001087 src = inet_select_addr(rt->dst.dev,
1088 rt_nexthop(rt, iph->daddr),
1089 RT_SCOPE_UNIVERSE);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001090 rcu_read_unlock();
1091 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001092 memcpy(addr, &src, 4);
1093}
1094
Patrick McHardyc7066f72011-01-14 13:36:42 +01001095#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001096static void set_class_tag(struct rtable *rt, u32 tag)
1097{
Changli Gaod8d1f302010-06-10 23:31:35 -07001098 if (!(rt->dst.tclassid & 0xFFFF))
1099 rt->dst.tclassid |= tag & 0xFFFF;
1100 if (!(rt->dst.tclassid & 0xFFFF0000))
1101 rt->dst.tclassid |= tag & 0xFFFF0000;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102}
1103#endif
1104
David S. Miller0dbaee32010-12-13 12:52:14 -08001105static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1106{
1107 unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS);
1108
1109 if (advmss == 0) {
1110 advmss = max_t(unsigned int, dst->dev->mtu - 40,
1111 ip_rt_min_advmss);
1112 if (advmss > 65535 - 40)
1113 advmss = 65535 - 40;
1114 }
1115 return advmss;
1116}
1117
Steffen Klassertebb762f2011-11-23 02:12:51 +00001118static unsigned int ipv4_mtu(const struct dst_entry *dst)
David S. Millerd33e4552010-12-14 13:01:14 -08001119{
Steffen Klassert261663b2011-11-23 02:14:50 +00001120 const struct rtable *rt = (const struct rtable *) dst;
David S. Miller59436342012-07-10 06:58:42 -07001121 unsigned int mtu = rt->rt_pmtu;
1122
Alexander Duyck98d75c32012-08-27 06:30:01 +00001123 if (!mtu || time_after_eq(jiffies, rt->dst.expires))
David S. Miller59436342012-07-10 06:58:42 -07001124 mtu = dst_metric_raw(dst, RTAX_MTU);
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001125
Steffen Klassert38d523e2013-01-16 20:55:01 +00001126 if (mtu)
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001127 return mtu;
1128
1129 mtu = dst->dev->mtu;
David S. Millerd33e4552010-12-14 13:01:14 -08001130
1131 if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
Julian Anastasov155e8332012-10-08 11:41:18 +00001132 if (rt->rt_uses_gateway && mtu > 576)
David S. Millerd33e4552010-12-14 13:01:14 -08001133 mtu = 576;
1134 }
1135
1136 if (mtu > IP_MAX_MTU)
1137 mtu = IP_MAX_MTU;
1138
1139 return mtu;
1140}
1141
David S. Millerf2bb4be2012-07-17 12:20:47 -07001142static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
David S. Miller4895c772012-07-17 04:19:00 -07001143{
1144 struct fnhe_hash_bucket *hash = nh->nh_exceptions;
1145 struct fib_nh_exception *fnhe;
1146 u32 hval;
1147
David S. Millerf2bb4be2012-07-17 12:20:47 -07001148 if (!hash)
1149 return NULL;
1150
David S. Millerd3a25c92012-07-17 13:23:08 -07001151 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -07001152
1153 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1154 fnhe = rcu_dereference(fnhe->fnhe_next)) {
David S. Millerf2bb4be2012-07-17 12:20:47 -07001155 if (fnhe->fnhe_daddr == daddr)
1156 return fnhe;
1157 }
1158 return NULL;
1159}
David S. Miller4895c772012-07-17 04:19:00 -07001160
David S. Millercaacf052012-07-31 15:06:50 -07001161static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001162 __be32 daddr)
1163{
David S. Millercaacf052012-07-31 15:06:50 -07001164 bool ret = false;
1165
David S. Millerc5038a82012-07-31 15:02:02 -07001166 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +00001167
David S. Millerc5038a82012-07-31 15:02:02 -07001168 if (daddr == fnhe->fnhe_daddr) {
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001169 struct rtable *orig = rcu_dereference(fnhe->fnhe_rth);
1170 if (orig && rt_is_expired(orig)) {
1171 fnhe->fnhe_gw = 0;
1172 fnhe->fnhe_pmtu = 0;
1173 fnhe->fnhe_expires = 0;
1174 }
David S. Millerc5038a82012-07-31 15:02:02 -07001175 if (fnhe->fnhe_pmtu) {
1176 unsigned long expires = fnhe->fnhe_expires;
1177 unsigned long diff = expires - jiffies;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001178
David S. Millerc5038a82012-07-31 15:02:02 -07001179 if (time_before(jiffies, expires)) {
1180 rt->rt_pmtu = fnhe->fnhe_pmtu;
1181 dst_set_expires(&rt->dst, diff);
1182 }
David S. Miller4895c772012-07-17 04:19:00 -07001183 }
David S. Millerc5038a82012-07-31 15:02:02 -07001184 if (fnhe->fnhe_gw) {
1185 rt->rt_flags |= RTCF_REDIRECTED;
1186 rt->rt_gateway = fnhe->fnhe_gw;
Julian Anastasov155e8332012-10-08 11:41:18 +00001187 rt->rt_uses_gateway = 1;
1188 } else if (!rt->rt_gateway)
1189 rt->rt_gateway = daddr;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001190
David S. Millerc5038a82012-07-31 15:02:02 -07001191 rcu_assign_pointer(fnhe->fnhe_rth, rt);
1192 if (orig)
1193 rt_free(orig);
1194
1195 fnhe->fnhe_stamp = jiffies;
David S. Millercaacf052012-07-31 15:06:50 -07001196 ret = true;
David S. Millerc5038a82012-07-31 15:02:02 -07001197 }
1198 spin_unlock_bh(&fnhe_lock);
David S. Millercaacf052012-07-31 15:06:50 -07001199
1200 return ret;
Eric Dumazet54764bb2012-07-31 01:08:23 +00001201}
1202
David S. Millercaacf052012-07-31 15:06:50 -07001203static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
David S. Millerf2bb4be2012-07-17 12:20:47 -07001204{
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001205 struct rtable *orig, *prev, **p;
David S. Millercaacf052012-07-31 15:06:50 -07001206 bool ret = true;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001207
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001208 if (rt_is_input_route(rt)) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001209 p = (struct rtable **)&nh->nh_rth_input;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001210 } else {
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001211 p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output);
1212 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001213 orig = *p;
1214
1215 prev = cmpxchg(p, orig, rt);
1216 if (prev == orig) {
David S. Millerf2bb4be2012-07-17 12:20:47 -07001217 if (orig)
Eric Dumazet54764bb2012-07-31 01:08:23 +00001218 rt_free(orig);
Julian Anastasov155e8332012-10-08 11:41:18 +00001219 } else
David S. Millercaacf052012-07-31 15:06:50 -07001220 ret = false;
David S. Millercaacf052012-07-31 15:06:50 -07001221
1222 return ret;
1223}
1224
1225static DEFINE_SPINLOCK(rt_uncached_lock);
1226static LIST_HEAD(rt_uncached_list);
1227
1228static void rt_add_uncached_list(struct rtable *rt)
1229{
1230 spin_lock_bh(&rt_uncached_lock);
1231 list_add_tail(&rt->rt_uncached, &rt_uncached_list);
1232 spin_unlock_bh(&rt_uncached_lock);
1233}
1234
1235static void ipv4_dst_destroy(struct dst_entry *dst)
1236{
1237 struct rtable *rt = (struct rtable *) dst;
1238
Eric Dumazet78df76a2012-08-24 05:40:47 +00001239 if (!list_empty(&rt->rt_uncached)) {
David S. Millercaacf052012-07-31 15:06:50 -07001240 spin_lock_bh(&rt_uncached_lock);
1241 list_del(&rt->rt_uncached);
1242 spin_unlock_bh(&rt_uncached_lock);
1243 }
1244}
1245
1246void rt_flush_dev(struct net_device *dev)
1247{
1248 if (!list_empty(&rt_uncached_list)) {
1249 struct net *net = dev_net(dev);
1250 struct rtable *rt;
1251
1252 spin_lock_bh(&rt_uncached_lock);
1253 list_for_each_entry(rt, &rt_uncached_list, rt_uncached) {
1254 if (rt->dst.dev != dev)
1255 continue;
1256 rt->dst.dev = net->loopback_dev;
1257 dev_hold(rt->dst.dev);
1258 dev_put(dev);
1259 }
1260 spin_unlock_bh(&rt_uncached_lock);
David S. Miller4895c772012-07-17 04:19:00 -07001261 }
1262}
1263
Eric Dumazet4331deb2012-07-25 05:11:23 +00001264static bool rt_cache_valid(const struct rtable *rt)
David S. Millerd2d68ba92012-07-17 12:58:50 -07001265{
Eric Dumazet4331deb2012-07-25 05:11:23 +00001266 return rt &&
1267 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1268 !rt_is_expired(rt);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001269}
1270
David S. Millerf2bb4be2012-07-17 12:20:47 -07001271static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
David S. Miller5e2b61f2011-03-04 21:47:09 -08001272 const struct fib_result *res,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001273 struct fib_nh_exception *fnhe,
David S. Miller982721f2011-02-16 21:44:24 -08001274 struct fib_info *fi, u16 type, u32 itag)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001275{
David S. Millercaacf052012-07-31 15:06:50 -07001276 bool cached = false;
1277
Linus Torvalds1da177e2005-04-16 15:20:36 -07001278 if (fi) {
David S. Miller4895c772012-07-17 04:19:00 -07001279 struct fib_nh *nh = &FIB_RES_NH(*res);
1280
Julian Anastasov155e8332012-10-08 11:41:18 +00001281 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) {
David S. Miller4895c772012-07-17 04:19:00 -07001282 rt->rt_gateway = nh->nh_gw;
Julian Anastasov155e8332012-10-08 11:41:18 +00001283 rt->rt_uses_gateway = 1;
1284 }
David S. Miller28605832012-07-17 14:55:59 -07001285 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
Patrick McHardyc7066f72011-01-14 13:36:42 +01001286#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf2bb4be2012-07-17 12:20:47 -07001287 rt->dst.tclassid = nh->nh_tclassid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001288#endif
David S. Millerc5038a82012-07-31 15:02:02 -07001289 if (unlikely(fnhe))
David S. Millercaacf052012-07-31 15:06:50 -07001290 cached = rt_bind_exception(rt, fnhe, daddr);
David S. Millerc5038a82012-07-31 15:02:02 -07001291 else if (!(rt->dst.flags & DST_NOCACHE))
David S. Millercaacf052012-07-31 15:06:50 -07001292 cached = rt_cache_route(nh, rt);
Julian Anastasov155e8332012-10-08 11:41:18 +00001293 if (unlikely(!cached)) {
1294 /* Routes we intend to cache in nexthop exception or
1295 * FIB nexthop have the DST_NOCACHE bit clear.
1296 * However, if we are unsuccessful at storing this
1297 * route into the cache we really need to set it.
1298 */
1299 rt->dst.flags |= DST_NOCACHE;
1300 if (!rt->rt_gateway)
1301 rt->rt_gateway = daddr;
1302 rt_add_uncached_list(rt);
1303 }
1304 } else
David S. Millercaacf052012-07-31 15:06:50 -07001305 rt_add_uncached_list(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001306
Patrick McHardyc7066f72011-01-14 13:36:42 +01001307#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001308#ifdef CONFIG_IP_MULTIPLE_TABLES
David S. Miller85b91b02012-07-13 08:21:29 -07001309 set_class_tag(rt, res->tclassid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001310#endif
1311 set_class_tag(rt, itag);
1312#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313}
1314
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001315static struct rtable *rt_dst_alloc(struct net_device *dev,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001316 bool nopolicy, bool noxfrm, bool will_cache)
David S. Miller0c4dcd52011-02-17 15:42:37 -08001317{
David S. Millerf5b0a872012-07-19 12:31:33 -07001318 return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
David S. Millerc6cffba2012-07-26 11:14:38 +00001319 (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001320 (nopolicy ? DST_NOPOLICY : 0) |
1321 (noxfrm ? DST_NOXFRM : 0));
David S. Miller0c4dcd52011-02-17 15:42:37 -08001322}
1323
Eric Dumazet96d36222010-06-02 19:21:31 +00001324/* called in rcu_read_lock() section */
Al Viro9e12bb22006-09-26 21:25:20 -07001325static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001326 u8 tos, struct net_device *dev, int our)
1327{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001328 struct rtable *rth;
Eric Dumazet96d36222010-06-02 19:21:31 +00001329 struct in_device *in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001330 u32 itag = 0;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001331 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332
1333 /* Primary sanity checks. */
1334
1335 if (in_dev == NULL)
1336 return -EINVAL;
1337
Jan Engelhardt1e637c72008-01-21 03:18:08 -08001338 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
Thomas Grafd0daebc32012-06-12 00:44:01 +00001339 skb->protocol != htons(ETH_P_IP))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001340 goto e_inval;
1341
Thomas Grafd0daebc32012-06-12 00:44:01 +00001342 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1343 if (ipv4_is_loopback(saddr))
1344 goto e_inval;
1345
Joe Perchesf97c1e02007-12-16 13:45:43 -08001346 if (ipv4_is_zeronet(saddr)) {
1347 if (!ipv4_is_local_multicast(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001348 goto e_inval;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001349 } else {
David S. Miller9e56e382012-06-28 18:54:02 -07001350 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1351 in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001352 if (err < 0)
1353 goto e_err;
1354 }
Benjamin LaHaise4e7b2f12012-03-27 15:55:32 +00001355 rth = rt_dst_alloc(dev_net(dev)->loopback_dev,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001356 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001357 if (!rth)
1358 goto e_nobufs;
1359
Patrick McHardyc7066f72011-01-14 13:36:42 +01001360#ifdef CONFIG_IP_ROUTE_CLASSID
Changli Gaod8d1f302010-06-10 23:31:35 -07001361 rth->dst.tclassid = itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001362#endif
David S. Millercf911662011-04-28 14:31:47 -07001363 rth->dst.output = ip_rt_bug;
1364
Denis V. Luneve84f84f2008-07-05 19:04:32 -07001365 rth->rt_genid = rt_genid(dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001366 rth->rt_flags = RTCF_MULTICAST;
Eric Dumazet29e75252008-01-31 17:05:09 -08001367 rth->rt_type = RTN_MULTICAST;
David S. Miller9917e1e82012-07-17 14:44:26 -07001368 rth->rt_is_input= 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001369 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001370 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001371 rth->rt_gateway = 0;
Julian Anastasov155e8332012-10-08 11:41:18 +00001372 rth->rt_uses_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001373 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001374 if (our) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001375 rth->dst.input= ip_local_deliver;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001376 rth->rt_flags |= RTCF_LOCAL;
1377 }
1378
1379#ifdef CONFIG_IP_MROUTE
Joe Perchesf97c1e02007-12-16 13:45:43 -08001380 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
Changli Gaod8d1f302010-06-10 23:31:35 -07001381 rth->dst.input = ip_mr_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001382#endif
1383 RT_CACHE_STAT_INC(in_slow_mc);
1384
David S. Miller89aef892012-07-17 11:00:09 -07001385 skb_dst_set(skb, &rth->dst);
1386 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387
1388e_nobufs:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001389 return -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390e_inval:
Eric Dumazet96d36222010-06-02 19:21:31 +00001391 return -EINVAL;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001392e_err:
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001393 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001394}
1395
1396
1397static void ip_handle_martian_source(struct net_device *dev,
1398 struct in_device *in_dev,
1399 struct sk_buff *skb,
Al Viro9e12bb22006-09-26 21:25:20 -07001400 __be32 daddr,
1401 __be32 saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001402{
1403 RT_CACHE_STAT_INC(in_martian_src);
1404#ifdef CONFIG_IP_ROUTE_VERBOSE
1405 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1406 /*
1407 * RFC1812 recommendation, if source is martian,
1408 * the only hint is MAC header.
1409 */
Joe Perches058bd4d2012-03-11 18:36:11 +00001410 pr_warn("martian source %pI4 from %pI4, on dev %s\n",
Harvey Harrison673d57e2008-10-31 00:53:57 -07001411 &daddr, &saddr, dev->name);
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001412 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
Joe Perches058bd4d2012-03-11 18:36:11 +00001413 print_hex_dump(KERN_WARNING, "ll header: ",
1414 DUMP_PREFIX_OFFSET, 16, 1,
1415 skb_mac_header(skb),
1416 dev->hard_header_len, true);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001417 }
1418 }
1419#endif
1420}
1421
Eric Dumazet47360222010-06-03 04:13:21 +00001422/* called in rcu_read_lock() section */
Stephen Hemminger5969f712008-04-10 01:52:09 -07001423static int __mkroute_input(struct sk_buff *skb,
David S. Miller982721f2011-02-16 21:44:24 -08001424 const struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001425 struct in_device *in_dev,
David S. Millerc6cffba2012-07-26 11:14:38 +00001426 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001427{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001428 struct rtable *rth;
1429 int err;
1430 struct in_device *out_dev;
Eric Dumazet47360222010-06-03 04:13:21 +00001431 unsigned int flags = 0;
David S. Millerd2d68ba92012-07-17 12:58:50 -07001432 bool do_cache;
Al Virod9c9df82006-09-26 21:28:14 -07001433 u32 itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001434
1435 /* get a working reference to the output device */
Eric Dumazet47360222010-06-03 04:13:21 +00001436 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001437 if (out_dev == NULL) {
Joe Perchese87cc472012-05-13 21:56:26 +00001438 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439 return -EINVAL;
1440 }
1441
Michael Smith5c04c812011-04-07 04:51:50 +00001442 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
David S. Miller9e56e382012-06-28 18:54:02 -07001443 in_dev->dev, in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001444 if (err < 0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001445 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001446 saddr);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001447
Linus Torvalds1da177e2005-04-16 15:20:36 -07001448 goto cleanup;
1449 }
1450
Julian Anastasove81da0e2012-10-08 11:41:15 +00001451 do_cache = res->fi && !itag;
1452 if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001453 (IN_DEV_SHARED_MEDIA(out_dev) ||
Julian Anastasove81da0e2012-10-08 11:41:15 +00001454 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001455 flags |= RTCF_DOREDIRECT;
Julian Anastasove81da0e2012-10-08 11:41:15 +00001456 do_cache = false;
1457 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001458
1459 if (skb->protocol != htons(ETH_P_IP)) {
1460 /* Not IP (i.e. ARP). Do not create route, if it is
1461 * invalid for proxy arp. DNAT routes are always valid.
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001462 *
1463 * Proxy arp feature have been extended to allow, ARP
1464 * replies back to the same interface, to support
1465 * Private VLAN switch technologies. See arp.c.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001466 */
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001467 if (out_dev == in_dev &&
1468 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001469 err = -EINVAL;
1470 goto cleanup;
1471 }
1472 }
1473
Julian Anastasove81da0e2012-10-08 11:41:15 +00001474 if (do_cache) {
1475 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1476 if (rt_cache_valid(rth)) {
1477 skb_dst_set_noref(skb, &rth->dst);
1478 goto out;
David S. Millerd2d68ba92012-07-17 12:58:50 -07001479 }
1480 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001481
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001482 rth = rt_dst_alloc(out_dev->dev,
1483 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerd2d68ba92012-07-17 12:58:50 -07001484 IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001485 if (!rth) {
1486 err = -ENOBUFS;
1487 goto cleanup;
1488 }
1489
David S. Millercf911662011-04-28 14:31:47 -07001490 rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
1491 rth->rt_flags = flags;
1492 rth->rt_type = res->type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001493 rth->rt_is_input = 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001494 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001495 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001496 rth->rt_gateway = 0;
Julian Anastasov155e8332012-10-08 11:41:18 +00001497 rth->rt_uses_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001498 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001499
Changli Gaod8d1f302010-06-10 23:31:35 -07001500 rth->dst.input = ip_forward;
1501 rth->dst.output = ip_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001502
David S. Millerd2d68ba92012-07-17 12:58:50 -07001503 rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag);
David S. Millerc6cffba2012-07-26 11:14:38 +00001504 skb_dst_set(skb, &rth->dst);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001505out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001506 err = 0;
1507 cleanup:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001508 return err;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001509}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001510
Stephen Hemminger5969f712008-04-10 01:52:09 -07001511static int ip_mkroute_input(struct sk_buff *skb,
1512 struct fib_result *res,
David S. Miller68a5e3d2011-03-11 20:07:33 -05001513 const struct flowi4 *fl4,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001514 struct in_device *in_dev,
1515 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001516{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001517#ifdef CONFIG_IP_ROUTE_MULTIPATH
David S. Millerff3fccb2011-03-10 16:23:24 -08001518 if (res->fi && res->fi->fib_nhs > 1)
David S. Miller1b7fe5932011-03-10 17:01:16 -08001519 fib_select_multipath(res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520#endif
1521
1522 /* create a routing cache entry */
David S. Millerc6cffba2012-07-26 11:14:38 +00001523 return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001524}
1525
Linus Torvalds1da177e2005-04-16 15:20:36 -07001526/*
1527 * NOTE. We drop all the packets that has local source
1528 * addresses, because every properly looped back packet
1529 * must have correct destination already attached by output routine.
1530 *
1531 * Such approach solves two big problems:
1532 * 1. Not simplex devices are handled properly.
1533 * 2. IP spoofing attempts are filtered with 100% of guarantee.
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001534 * called with rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001535 */
1536
Al Viro9e12bb22006-09-26 21:25:20 -07001537static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
David S. Millerc10237e2012-06-27 17:05:06 -07001538 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001539{
1540 struct fib_result res;
Eric Dumazet96d36222010-06-02 19:21:31 +00001541 struct in_device *in_dev = __in_dev_get_rcu(dev);
David S. Miller68a5e3d2011-03-11 20:07:33 -05001542 struct flowi4 fl4;
Eric Dumazet95c96172012-04-15 05:58:06 +00001543 unsigned int flags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001544 u32 itag = 0;
Eric Dumazet95c96172012-04-15 05:58:06 +00001545 struct rtable *rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001546 int err = -EINVAL;
Daniel Baluta5e73ea12012-04-15 01:34:41 +00001547 struct net *net = dev_net(dev);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001548 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001549
1550 /* IP on this device is disabled. */
1551
1552 if (!in_dev)
1553 goto out;
1554
1555 /* Check for the most weird martians, which can be not detected
1556 by fib_lookup.
1557 */
1558
Thomas Grafd0daebc32012-06-12 00:44:01 +00001559 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001560 goto martian_source;
1561
David S. Millerd2d68ba92012-07-17 12:58:50 -07001562 res.fi = NULL;
Andy Walls27a954b2010-10-17 15:11:22 +00001563 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001564 goto brd_input;
1565
1566 /* Accept zero addresses only to limited broadcast;
1567 * I even do not know to fix it or not. Waiting for complains :-)
1568 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001569 if (ipv4_is_zeronet(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570 goto martian_source;
1571
Thomas Grafd0daebc32012-06-12 00:44:01 +00001572 if (ipv4_is_zeronet(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001573 goto martian_destination;
1574
Eric Dumazet9eb43e72012-08-03 21:27:25 +00001575 /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
1576 * and call it once if daddr or/and saddr are loopback addresses
1577 */
1578 if (ipv4_is_loopback(daddr)) {
1579 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00001580 goto martian_destination;
Eric Dumazet9eb43e72012-08-03 21:27:25 +00001581 } else if (ipv4_is_loopback(saddr)) {
1582 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00001583 goto martian_source;
1584 }
1585
Linus Torvalds1da177e2005-04-16 15:20:36 -07001586 /*
1587 * Now we are ready to route packet.
1588 */
David S. Miller68a5e3d2011-03-11 20:07:33 -05001589 fl4.flowi4_oif = 0;
1590 fl4.flowi4_iif = dev->ifindex;
1591 fl4.flowi4_mark = skb->mark;
1592 fl4.flowi4_tos = tos;
1593 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
1594 fl4.daddr = daddr;
1595 fl4.saddr = saddr;
1596 err = fib_lookup(net, &fl4, &res);
David S. Miller251da412012-06-26 16:27:09 -07001597 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001598 goto no_route;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001599
1600 RT_CACHE_STAT_INC(in_slow_tot);
1601
1602 if (res.type == RTN_BROADCAST)
1603 goto brd_input;
1604
1605 if (res.type == RTN_LOCAL) {
Michael Smith5c04c812011-04-07 04:51:50 +00001606 err = fib_validate_source(skb, saddr, daddr, tos,
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00001607 LOOPBACK_IFINDEX,
David S. Miller9e56e382012-06-28 18:54:02 -07001608 dev, in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001609 if (err < 0)
1610 goto martian_source_keep_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001611 goto local_input;
1612 }
1613
1614 if (!IN_DEV_FORWARD(in_dev))
David S. Miller251da412012-06-26 16:27:09 -07001615 goto no_route;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001616 if (res.type != RTN_UNICAST)
1617 goto martian_destination;
1618
David S. Miller68a5e3d2011-03-11 20:07:33 -05001619 err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001620out: return err;
1621
1622brd_input:
1623 if (skb->protocol != htons(ETH_P_IP))
1624 goto e_inval;
1625
David S. Miller41347dc2012-06-28 04:05:27 -07001626 if (!ipv4_is_zeronet(saddr)) {
David S. Miller9e56e382012-06-28 18:54:02 -07001627 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1628 in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001629 if (err < 0)
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001630 goto martian_source_keep_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001631 }
1632 flags |= RTCF_BROADCAST;
1633 res.type = RTN_BROADCAST;
1634 RT_CACHE_STAT_INC(in_brd);
1635
1636local_input:
David S. Millerd2d68ba92012-07-17 12:58:50 -07001637 do_cache = false;
1638 if (res.fi) {
David S. Millerfe3edf42012-07-23 13:22:20 -07001639 if (!itag) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001640 rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001641 if (rt_cache_valid(rth)) {
David S. Millerc6cffba2012-07-26 11:14:38 +00001642 skb_dst_set_noref(skb, &rth->dst);
1643 err = 0;
1644 goto out;
David S. Millerd2d68ba92012-07-17 12:58:50 -07001645 }
1646 do_cache = true;
1647 }
1648 }
1649
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001650 rth = rt_dst_alloc(net->loopback_dev,
David S. Millerd2d68ba92012-07-17 12:58:50 -07001651 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001652 if (!rth)
1653 goto e_nobufs;
1654
David S. Millercf911662011-04-28 14:31:47 -07001655 rth->dst.input= ip_local_deliver;
Changli Gaod8d1f302010-06-10 23:31:35 -07001656 rth->dst.output= ip_rt_bug;
David S. Millercf911662011-04-28 14:31:47 -07001657#ifdef CONFIG_IP_ROUTE_CLASSID
1658 rth->dst.tclassid = itag;
1659#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001660
David S. Millercf911662011-04-28 14:31:47 -07001661 rth->rt_genid = rt_genid(net);
1662 rth->rt_flags = flags|RTCF_LOCAL;
1663 rth->rt_type = res.type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001664 rth->rt_is_input = 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001665 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001666 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001667 rth->rt_gateway = 0;
Julian Anastasov155e8332012-10-08 11:41:18 +00001668 rth->rt_uses_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001669 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001670 if (res.type == RTN_UNREACHABLE) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001671 rth->dst.input= ip_error;
1672 rth->dst.error= -err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001673 rth->rt_flags &= ~RTCF_LOCAL;
1674 }
David S. Millerd2d68ba92012-07-17 12:58:50 -07001675 if (do_cache)
1676 rt_cache_route(&FIB_RES_NH(res), rth);
David S. Miller89aef892012-07-17 11:00:09 -07001677 skb_dst_set(skb, &rth->dst);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001678 err = 0;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001679 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001680
1681no_route:
1682 RT_CACHE_STAT_INC(in_no_route);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683 res.type = RTN_UNREACHABLE;
Mitsuru Chinen7f538782007-12-07 01:07:24 -08001684 if (err == -ESRCH)
1685 err = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001686 goto local_input;
1687
1688 /*
1689 * Do not cache martian addresses: they should be logged (RFC1812)
1690 */
1691martian_destination:
1692 RT_CACHE_STAT_INC(in_martian_dst);
1693#ifdef CONFIG_IP_ROUTE_VERBOSE
Joe Perchese87cc472012-05-13 21:56:26 +00001694 if (IN_DEV_LOG_MARTIANS(in_dev))
1695 net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
1696 &daddr, &saddr, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697#endif
Dietmar Eggemann2c2910a2005-06-28 13:06:23 -07001698
Linus Torvalds1da177e2005-04-16 15:20:36 -07001699e_inval:
1700 err = -EINVAL;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001701 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001702
1703e_nobufs:
1704 err = -ENOBUFS;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001705 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001706
1707martian_source:
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001708 err = -EINVAL;
1709martian_source_keep_err:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001710 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001711 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001712}
1713
David S. Millerc6cffba2012-07-26 11:14:38 +00001714int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1715 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001716{
Eric Dumazet96d36222010-06-02 19:21:31 +00001717 int res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001718
Eric Dumazet96d36222010-06-02 19:21:31 +00001719 rcu_read_lock();
1720
Linus Torvalds1da177e2005-04-16 15:20:36 -07001721 /* Multicast recognition logic is moved from route cache to here.
1722 The problem was that too many Ethernet cards have broken/missing
1723 hardware multicast filters :-( As result the host on multicasting
1724 network acquires a lot of useless route cache entries, sort of
1725 SDR messages from all the world. Now we try to get rid of them.
1726 Really, provided software IP multicast filter is organized
1727 reasonably (at least, hashed), it does not result in a slowdown
1728 comparing with route cache reject entries.
1729 Note, that multicast routers are not affected, because
1730 route cache entry is created eventually.
1731 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001732 if (ipv4_is_multicast(daddr)) {
Eric Dumazet96d36222010-06-02 19:21:31 +00001733 struct in_device *in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001734
Eric Dumazet96d36222010-06-02 19:21:31 +00001735 if (in_dev) {
David S. Millerdbdd9a52011-03-10 16:34:38 -08001736 int our = ip_check_mc_rcu(in_dev, daddr, saddr,
1737 ip_hdr(skb)->protocol);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001738 if (our
1739#ifdef CONFIG_IP_MROUTE
Joe Perches9d4fb272009-11-23 10:41:23 -08001740 ||
1741 (!ipv4_is_local_multicast(daddr) &&
1742 IN_DEV_MFORWARD(in_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001743#endif
Joe Perches9d4fb272009-11-23 10:41:23 -08001744 ) {
Eric Dumazet96d36222010-06-02 19:21:31 +00001745 int res = ip_route_input_mc(skb, daddr, saddr,
1746 tos, dev, our);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001747 rcu_read_unlock();
Eric Dumazet96d36222010-06-02 19:21:31 +00001748 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001749 }
1750 }
1751 rcu_read_unlock();
1752 return -EINVAL;
1753 }
David S. Millerc10237e2012-06-27 17:05:06 -07001754 res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
Eric Dumazet96d36222010-06-02 19:21:31 +00001755 rcu_read_unlock();
1756 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001757}
David S. Millerc6cffba2012-07-26 11:14:38 +00001758EXPORT_SYMBOL(ip_route_input_noref);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001759
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001760/* called with rcu_read_lock() */
David S. Miller982721f2011-02-16 21:44:24 -08001761static struct rtable *__mkroute_output(const struct fib_result *res,
David Miller1a00fee2012-07-01 02:02:56 +00001762 const struct flowi4 *fl4, int orig_oif,
Julian Anastasovf61759e2011-12-02 11:39:42 +00001763 struct net_device *dev_out,
David S. Miller5ada5522011-02-17 15:29:00 -08001764 unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001765{
David S. Miller982721f2011-02-16 21:44:24 -08001766 struct fib_info *fi = res->fi;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001767 struct fib_nh_exception *fnhe;
David S. Miller5ada5522011-02-17 15:29:00 -08001768 struct in_device *in_dev;
David S. Miller982721f2011-02-16 21:44:24 -08001769 u16 type = res->type;
David S. Miller5ada5522011-02-17 15:29:00 -08001770 struct rtable *rth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00001771 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001772
Thomas Grafd0daebc32012-06-12 00:44:01 +00001773 in_dev = __in_dev_get_rcu(dev_out);
1774 if (!in_dev)
David S. Miller5ada5522011-02-17 15:29:00 -08001775 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001776
Thomas Grafd0daebc32012-06-12 00:44:01 +00001777 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1778 if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
1779 return ERR_PTR(-EINVAL);
1780
David S. Miller68a5e3d2011-03-11 20:07:33 -05001781 if (ipv4_is_lbcast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08001782 type = RTN_BROADCAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001783 else if (ipv4_is_multicast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08001784 type = RTN_MULTICAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001785 else if (ipv4_is_zeronet(fl4->daddr))
David S. Miller5ada5522011-02-17 15:29:00 -08001786 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001787
1788 if (dev_out->flags & IFF_LOOPBACK)
1789 flags |= RTCF_LOCAL;
1790
Julian Anastasov63617422012-11-22 23:04:14 +02001791 do_cache = true;
David S. Miller982721f2011-02-16 21:44:24 -08001792 if (type == RTN_BROADCAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001793 flags |= RTCF_BROADCAST | RTCF_LOCAL;
David S. Miller982721f2011-02-16 21:44:24 -08001794 fi = NULL;
1795 } else if (type == RTN_MULTICAST) {
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00001796 flags |= RTCF_MULTICAST | RTCF_LOCAL;
David S. Miller813b3b52011-04-28 14:48:42 -07001797 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
1798 fl4->flowi4_proto))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001799 flags &= ~RTCF_LOCAL;
Julian Anastasov63617422012-11-22 23:04:14 +02001800 else
1801 do_cache = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802 /* If multicast route do not exist use
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00001803 * default one, but do not gateway in this case.
1804 * Yes, it is hack.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001805 */
David S. Miller982721f2011-02-16 21:44:24 -08001806 if (fi && res->prefixlen < 4)
1807 fi = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001808 }
1809
David S. Millerf2bb4be2012-07-17 12:20:47 -07001810 fnhe = NULL;
Julian Anastasov63617422012-11-22 23:04:14 +02001811 do_cache &= fi != NULL;
1812 if (do_cache) {
David S. Millerc5038a82012-07-31 15:02:02 -07001813 struct rtable __rcu **prth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00001814 struct fib_nh *nh = &FIB_RES_NH(*res);
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001815
Julian Anastasovc92b9652012-10-08 11:41:19 +00001816 fnhe = find_exception(nh, fl4->daddr);
David S. Millerc5038a82012-07-31 15:02:02 -07001817 if (fnhe)
1818 prth = &fnhe->fnhe_rth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00001819 else {
1820 if (unlikely(fl4->flowi4_flags &
1821 FLOWI_FLAG_KNOWN_NH &&
1822 !(nh->nh_gw &&
1823 nh->nh_scope == RT_SCOPE_LINK))) {
1824 do_cache = false;
1825 goto add;
1826 }
1827 prth = __this_cpu_ptr(nh->nh_pcpu_rth_output);
1828 }
David S. Millerc5038a82012-07-31 15:02:02 -07001829 rth = rcu_dereference(*prth);
1830 if (rt_cache_valid(rth)) {
1831 dst_hold(&rth->dst);
1832 return rth;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001833 }
1834 }
Julian Anastasovc92b9652012-10-08 11:41:19 +00001835
1836add:
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001837 rth = rt_dst_alloc(dev_out,
1838 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerf2bb4be2012-07-17 12:20:47 -07001839 IN_DEV_CONF_GET(in_dev, NOXFRM),
Julian Anastasovc92b9652012-10-08 11:41:19 +00001840 do_cache);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00001841 if (!rth)
David S. Miller5ada5522011-02-17 15:29:00 -08001842 return ERR_PTR(-ENOBUFS);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00001843
David S. Millercf911662011-04-28 14:31:47 -07001844 rth->dst.output = ip_output;
1845
David S. Millercf911662011-04-28 14:31:47 -07001846 rth->rt_genid = rt_genid(dev_net(dev_out));
1847 rth->rt_flags = flags;
1848 rth->rt_type = type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001849 rth->rt_is_input = 0;
David S. Miller13378ca2012-07-23 13:57:45 -07001850 rth->rt_iif = orig_oif ? : 0;
David S. Miller59436342012-07-10 06:58:42 -07001851 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001852 rth->rt_gateway = 0;
Julian Anastasov155e8332012-10-08 11:41:18 +00001853 rth->rt_uses_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001854 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001855
1856 RT_CACHE_STAT_INC(out_slow_tot);
1857
David S. Miller41347dc2012-06-28 04:05:27 -07001858 if (flags & RTCF_LOCAL)
Changli Gaod8d1f302010-06-10 23:31:35 -07001859 rth->dst.input = ip_local_deliver;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001860 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001861 if (flags & RTCF_LOCAL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001862 !(dev_out->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001863 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001864 RT_CACHE_STAT_INC(out_slow_mc);
1865 }
1866#ifdef CONFIG_IP_MROUTE
David S. Miller982721f2011-02-16 21:44:24 -08001867 if (type == RTN_MULTICAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001868 if (IN_DEV_MFORWARD(in_dev) &&
David S. Miller813b3b52011-04-28 14:48:42 -07001869 !ipv4_is_local_multicast(fl4->daddr)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001870 rth->dst.input = ip_mr_input;
1871 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001872 }
1873 }
1874#endif
1875 }
1876
David S. Millerf2bb4be2012-07-17 12:20:47 -07001877 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001878
David S. Miller5ada5522011-02-17 15:29:00 -08001879 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001880}
1881
Linus Torvalds1da177e2005-04-16 15:20:36 -07001882/*
1883 * Major route resolver routine.
1884 */
1885
David S. Miller89aef892012-07-17 11:00:09 -07001886struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001887{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001888 struct net_device *dev_out = NULL;
Julian Anastasovf61759e2011-12-02 11:39:42 +00001889 __u8 tos = RT_FL_TOS(fl4);
David S. Miller813b3b52011-04-28 14:48:42 -07001890 unsigned int flags = 0;
1891 struct fib_result res;
David S. Miller5ada5522011-02-17 15:29:00 -08001892 struct rtable *rth;
David S. Miller813b3b52011-04-28 14:48:42 -07001893 int orig_oif;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001894
David S. Miller85b91b02012-07-13 08:21:29 -07001895 res.tclassid = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001896 res.fi = NULL;
David S. Miller8b96d222012-06-11 02:01:56 -07001897 res.table = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001898
David S. Miller813b3b52011-04-28 14:48:42 -07001899 orig_oif = fl4->flowi4_oif;
1900
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00001901 fl4->flowi4_iif = LOOPBACK_IFINDEX;
David S. Miller813b3b52011-04-28 14:48:42 -07001902 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
1903 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
1904 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
David S. Miller44713b62011-03-04 21:24:47 -08001905
David S. Miller010c2702011-02-17 15:37:09 -08001906 rcu_read_lock();
David S. Miller813b3b52011-04-28 14:48:42 -07001907 if (fl4->saddr) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08001908 rth = ERR_PTR(-EINVAL);
David S. Miller813b3b52011-04-28 14:48:42 -07001909 if (ipv4_is_multicast(fl4->saddr) ||
1910 ipv4_is_lbcast(fl4->saddr) ||
1911 ipv4_is_zeronet(fl4->saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001912 goto out;
1913
Linus Torvalds1da177e2005-04-16 15:20:36 -07001914 /* I removed check for oif == dev_out->oif here.
1915 It was wrong for two reasons:
Denis V. Lunev1ab35272008-01-22 22:04:30 -08001916 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
1917 is assigned to multiple interfaces.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001918 2. Moreover, we are allowed to send packets with saddr
1919 of another iface. --ANK
1920 */
1921
David S. Miller813b3b52011-04-28 14:48:42 -07001922 if (fl4->flowi4_oif == 0 &&
1923 (ipv4_is_multicast(fl4->daddr) ||
1924 ipv4_is_lbcast(fl4->daddr))) {
Julian Anastasova210d012008-10-01 07:28:28 -07001925 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07001926 dev_out = __ip_dev_find(net, fl4->saddr, false);
Julian Anastasova210d012008-10-01 07:28:28 -07001927 if (dev_out == NULL)
1928 goto out;
1929
Linus Torvalds1da177e2005-04-16 15:20:36 -07001930 /* Special hack: user can direct multicasts
1931 and limited broadcast via necessary interface
1932 without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
1933 This hack is not just for fun, it allows
1934 vic,vat and friends to work.
1935 They bind socket to loopback, set ttl to zero
1936 and expect that it will work.
1937 From the viewpoint of routing cache they are broken,
1938 because we are not allowed to build multicast path
1939 with loopback source addr (look, routing cache
1940 cannot know, that ttl is zero, so that packet
1941 will not leave this host and route is valid).
1942 Luckily, this hack is good workaround.
1943 */
1944
David S. Miller813b3b52011-04-28 14:48:42 -07001945 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001946 goto make_route;
1947 }
Julian Anastasova210d012008-10-01 07:28:28 -07001948
David S. Miller813b3b52011-04-28 14:48:42 -07001949 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
Julian Anastasova210d012008-10-01 07:28:28 -07001950 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07001951 if (!__ip_dev_find(net, fl4->saddr, false))
Julian Anastasova210d012008-10-01 07:28:28 -07001952 goto out;
Julian Anastasova210d012008-10-01 07:28:28 -07001953 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001954 }
1955
1956
David S. Miller813b3b52011-04-28 14:48:42 -07001957 if (fl4->flowi4_oif) {
1958 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001959 rth = ERR_PTR(-ENODEV);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001960 if (dev_out == NULL)
1961 goto out;
Herbert Xue5ed6392005-10-03 14:35:55 -07001962
1963 /* RACE: Check return value of inet_select_addr instead. */
Eric Dumazetfc75fc82010-12-22 04:39:39 +00001964 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08001965 rth = ERR_PTR(-ENETUNREACH);
Eric Dumazetfc75fc82010-12-22 04:39:39 +00001966 goto out;
1967 }
David S. Miller813b3b52011-04-28 14:48:42 -07001968 if (ipv4_is_local_multicast(fl4->daddr) ||
1969 ipv4_is_lbcast(fl4->daddr)) {
1970 if (!fl4->saddr)
1971 fl4->saddr = inet_select_addr(dev_out, 0,
1972 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001973 goto make_route;
1974 }
David S. Miller813b3b52011-04-28 14:48:42 -07001975 if (fl4->saddr) {
1976 if (ipv4_is_multicast(fl4->daddr))
1977 fl4->saddr = inet_select_addr(dev_out, 0,
1978 fl4->flowi4_scope);
1979 else if (!fl4->daddr)
1980 fl4->saddr = inet_select_addr(dev_out, 0,
1981 RT_SCOPE_HOST);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001982 }
1983 }
1984
David S. Miller813b3b52011-04-28 14:48:42 -07001985 if (!fl4->daddr) {
1986 fl4->daddr = fl4->saddr;
1987 if (!fl4->daddr)
1988 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
Denis V. Lunevb40afd02008-01-22 22:06:19 -08001989 dev_out = net->loopback_dev;
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00001990 fl4->flowi4_oif = LOOPBACK_IFINDEX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001991 res.type = RTN_LOCAL;
1992 flags |= RTCF_LOCAL;
1993 goto make_route;
1994 }
1995
David S. Miller813b3b52011-04-28 14:48:42 -07001996 if (fib_lookup(net, fl4, &res)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001997 res.fi = NULL;
David S. Miller8b96d222012-06-11 02:01:56 -07001998 res.table = NULL;
David S. Miller813b3b52011-04-28 14:48:42 -07001999 if (fl4->flowi4_oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002000 /* Apparently, routing tables are wrong. Assume,
2001 that the destination is on link.
2002
2003 WHY? DW.
2004 Because we are allowed to send to iface
2005 even if it has NO routes and NO assigned
2006 addresses. When oif is specified, routing
2007 tables are looked up with only one purpose:
2008 to catch if destination is gatewayed, rather than
2009 direct. Moreover, if MSG_DONTROUTE is set,
2010 we send packet, ignoring both routing tables
2011 and ifaddr state. --ANK
2012
2013
2014 We could make it even if oif is unknown,
2015 likely IPv6, but we do not.
2016 */
2017
David S. Miller813b3b52011-04-28 14:48:42 -07002018 if (fl4->saddr == 0)
2019 fl4->saddr = inet_select_addr(dev_out, 0,
2020 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002021 res.type = RTN_UNICAST;
2022 goto make_route;
2023 }
David S. Millerb23dd4f2011-03-02 14:31:35 -08002024 rth = ERR_PTR(-ENETUNREACH);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002025 goto out;
2026 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002027
2028 if (res.type == RTN_LOCAL) {
David S. Miller813b3b52011-04-28 14:48:42 -07002029 if (!fl4->saddr) {
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002030 if (res.fi->fib_prefsrc)
David S. Miller813b3b52011-04-28 14:48:42 -07002031 fl4->saddr = res.fi->fib_prefsrc;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002032 else
David S. Miller813b3b52011-04-28 14:48:42 -07002033 fl4->saddr = fl4->daddr;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002034 }
Denis V. Lunevb40afd02008-01-22 22:06:19 -08002035 dev_out = net->loopback_dev;
David S. Miller813b3b52011-04-28 14:48:42 -07002036 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002037 flags |= RTCF_LOCAL;
2038 goto make_route;
2039 }
2040
2041#ifdef CONFIG_IP_ROUTE_MULTIPATH
David S. Miller813b3b52011-04-28 14:48:42 -07002042 if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0)
David S. Miller1b7fe5932011-03-10 17:01:16 -08002043 fib_select_multipath(&res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002044 else
2045#endif
David S. Miller21d8c492011-04-14 14:49:37 -07002046 if (!res.prefixlen &&
2047 res.table->tb_num_default > 1 &&
David S. Miller813b3b52011-04-28 14:48:42 -07002048 res.type == RTN_UNICAST && !fl4->flowi4_oif)
David S. Miller0c838ff2011-01-31 16:16:50 -08002049 fib_select_default(&res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002050
David S. Miller813b3b52011-04-28 14:48:42 -07002051 if (!fl4->saddr)
2052 fl4->saddr = FIB_RES_PREFSRC(net, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002053
Linus Torvalds1da177e2005-04-16 15:20:36 -07002054 dev_out = FIB_RES_DEV(res);
David S. Miller813b3b52011-04-28 14:48:42 -07002055 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002056
2057
2058make_route:
David Miller1a00fee2012-07-01 02:02:56 +00002059 rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002060
David S. Miller010c2702011-02-17 15:37:09 -08002061out:
2062 rcu_read_unlock();
David S. Millerb23dd4f2011-03-02 14:31:35 -08002063 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002064}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002065EXPORT_SYMBOL_GPL(__ip_route_output_key);
2066
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002067static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2068{
2069 return NULL;
2070}
2071
Steffen Klassertebb762f2011-11-23 02:12:51 +00002072static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
Roland Dreierec831ea2011-01-31 13:16:00 -08002073{
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002074 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2075
2076 return mtu ? : dst->dev->mtu;
Roland Dreierec831ea2011-01-31 13:16:00 -08002077}
2078
David S. Miller6700c272012-07-17 03:29:28 -07002079static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2080 struct sk_buff *skb, u32 mtu)
David S. Miller14e50e52007-05-24 18:17:54 -07002081{
2082}
2083
David S. Miller6700c272012-07-17 03:29:28 -07002084static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2085 struct sk_buff *skb)
David S. Millerb587ee32012-07-12 00:39:24 -07002086{
2087}
2088
Held Bernhard0972ddb2011-04-24 22:07:32 +00002089static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2090 unsigned long old)
2091{
2092 return NULL;
2093}
2094
David S. Miller14e50e52007-05-24 18:17:54 -07002095static struct dst_ops ipv4_dst_blackhole_ops = {
2096 .family = AF_INET,
Harvey Harrison09640e632009-02-01 00:45:17 -08002097 .protocol = cpu_to_be16(ETH_P_IP),
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002098 .check = ipv4_blackhole_dst_check,
Steffen Klassertebb762f2011-11-23 02:12:51 +00002099 .mtu = ipv4_blackhole_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -08002100 .default_advmss = ipv4_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -07002101 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
David S. Millerb587ee32012-07-12 00:39:24 -07002102 .redirect = ipv4_rt_blackhole_redirect,
Held Bernhard0972ddb2011-04-24 22:07:32 +00002103 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -07002104 .neigh_lookup = ipv4_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -07002105};
2106
David S. Miller2774c132011-03-01 14:59:04 -08002107struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -07002108{
David S. Miller2774c132011-03-01 14:59:04 -08002109 struct rtable *ort = (struct rtable *) dst_orig;
David S. Millerf5b0a872012-07-19 12:31:33 -07002110 struct rtable *rt;
David S. Miller14e50e52007-05-24 18:17:54 -07002111
David S. Millerf5b0a872012-07-19 12:31:33 -07002112 rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
David S. Miller14e50e52007-05-24 18:17:54 -07002113 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002114 struct dst_entry *new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -07002115
David S. Miller14e50e52007-05-24 18:17:54 -07002116 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -08002117 new->input = dst_discard;
2118 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -07002119
Changli Gaod8d1f302010-06-10 23:31:35 -07002120 new->dev = ort->dst.dev;
David S. Miller14e50e52007-05-24 18:17:54 -07002121 if (new->dev)
2122 dev_hold(new->dev);
2123
David S. Miller9917e1e82012-07-17 14:44:26 -07002124 rt->rt_is_input = ort->rt_is_input;
David S. Miller5e2b61f2011-03-04 21:47:09 -08002125 rt->rt_iif = ort->rt_iif;
David S. Miller59436342012-07-10 06:58:42 -07002126 rt->rt_pmtu = ort->rt_pmtu;
David S. Miller14e50e52007-05-24 18:17:54 -07002127
Denis V. Luneve84f84f2008-07-05 19:04:32 -07002128 rt->rt_genid = rt_genid(net);
David S. Miller14e50e52007-05-24 18:17:54 -07002129 rt->rt_flags = ort->rt_flags;
2130 rt->rt_type = ort->rt_type;
David S. Miller14e50e52007-05-24 18:17:54 -07002131 rt->rt_gateway = ort->rt_gateway;
Julian Anastasov155e8332012-10-08 11:41:18 +00002132 rt->rt_uses_gateway = ort->rt_uses_gateway;
David S. Miller14e50e52007-05-24 18:17:54 -07002133
David S. Millercaacf052012-07-31 15:06:50 -07002134 INIT_LIST_HEAD(&rt->rt_uncached);
2135
David S. Miller14e50e52007-05-24 18:17:54 -07002136 dst_free(new);
2137 }
2138
David S. Miller2774c132011-03-01 14:59:04 -08002139 dst_release(dst_orig);
2140
2141 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -07002142}
2143
David S. Miller9d6ec932011-03-12 01:12:47 -05002144struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
David S. Millerb23dd4f2011-03-02 14:31:35 -08002145 struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002146{
David S. Miller9d6ec932011-03-12 01:12:47 -05002147 struct rtable *rt = __ip_route_output_key(net, flp4);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002148
David S. Millerb23dd4f2011-03-02 14:31:35 -08002149 if (IS_ERR(rt))
2150 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002151
David S. Miller56157872011-05-02 14:37:45 -07002152 if (flp4->flowi4_proto)
David S. Miller9d6ec932011-03-12 01:12:47 -05002153 rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
2154 flowi4_to_flowi(flp4),
2155 sk, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002156
David S. Millerb23dd4f2011-03-02 14:31:35 -08002157 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002158}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002159EXPORT_SYMBOL_GPL(ip_route_output_flow);
2160
David S. Millerf1ce3062012-07-12 10:10:17 -07002161static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
Eric W. Biederman15e47302012-09-07 20:12:54 +00002162 struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
David S. Millerf1ce3062012-07-12 10:10:17 -07002163 u32 seq, int event, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002164{
Eric Dumazet511c3f92009-06-02 05:14:27 +00002165 struct rtable *rt = skb_rtable(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002166 struct rtmsg *r;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002167 struct nlmsghdr *nlh;
Steffen Klassert2bc8ca42011-10-11 01:12:02 +00002168 unsigned long expires = 0;
David S. Millerf1850712012-07-10 07:26:01 -07002169 u32 error;
Julian Anastasov521f5492012-07-20 12:02:08 +03002170 u32 metrics[RTAX_MAX];
Thomas Grafbe403ea2006-08-17 18:15:17 -07002171
Eric W. Biederman15e47302012-09-07 20:12:54 +00002172 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags);
Thomas Grafbe403ea2006-08-17 18:15:17 -07002173 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002174 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002175
2176 r = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002177 r->rtm_family = AF_INET;
2178 r->rtm_dst_len = 32;
2179 r->rtm_src_len = 0;
David Millerd6c0a4f2012-07-01 02:02:59 +00002180 r->rtm_tos = fl4->flowi4_tos;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002181 r->rtm_table = RT_TABLE_MAIN;
David S. Millerf3756b72012-04-01 20:39:02 -04002182 if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN))
2183 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002184 r->rtm_type = rt->rt_type;
2185 r->rtm_scope = RT_SCOPE_UNIVERSE;
2186 r->rtm_protocol = RTPROT_UNSPEC;
2187 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2188 if (rt->rt_flags & RTCF_NOTIFY)
2189 r->rtm_flags |= RTM_F_NOTIFY;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002190
David S. Millerf1ce3062012-07-12 10:10:17 -07002191 if (nla_put_be32(skb, RTA_DST, dst))
David S. Millerf3756b72012-04-01 20:39:02 -04002192 goto nla_put_failure;
David Miller1a00fee2012-07-01 02:02:56 +00002193 if (src) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002194 r->rtm_src_len = 32;
David Miller1a00fee2012-07-01 02:02:56 +00002195 if (nla_put_be32(skb, RTA_SRC, src))
David S. Millerf3756b72012-04-01 20:39:02 -04002196 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002197 }
David S. Millerf3756b72012-04-01 20:39:02 -04002198 if (rt->dst.dev &&
2199 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2200 goto nla_put_failure;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002201#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf3756b72012-04-01 20:39:02 -04002202 if (rt->dst.tclassid &&
2203 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2204 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002205#endif
David S. Miller41347dc2012-06-28 04:05:27 -07002206 if (!rt_is_input_route(rt) &&
David Millerd6c0a4f2012-07-01 02:02:59 +00002207 fl4->saddr != src) {
2208 if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr))
David S. Millerf3756b72012-04-01 20:39:02 -04002209 goto nla_put_failure;
2210 }
Julian Anastasov155e8332012-10-08 11:41:18 +00002211 if (rt->rt_uses_gateway &&
David S. Millerf3756b72012-04-01 20:39:02 -04002212 nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway))
2213 goto nla_put_failure;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002214
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002215 expires = rt->dst.expires;
2216 if (expires) {
2217 unsigned long now = jiffies;
2218
2219 if (time_before(now, expires))
2220 expires -= now;
2221 else
2222 expires = 0;
2223 }
2224
Julian Anastasov521f5492012-07-20 12:02:08 +03002225 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002226 if (rt->rt_pmtu && expires)
Julian Anastasov521f5492012-07-20 12:02:08 +03002227 metrics[RTAX_MTU - 1] = rt->rt_pmtu;
2228 if (rtnetlink_put_metrics(skb, metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -07002229 goto nla_put_failure;
2230
David Millerb4869882012-07-01 02:03:01 +00002231 if (fl4->flowi4_mark &&
stephen hemminger68aaed52012-10-10 08:27:25 +00002232 nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
David S. Millerf3756b72012-04-01 20:39:02 -04002233 goto nla_put_failure;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002234
Changli Gaod8d1f302010-06-10 23:31:35 -07002235 error = rt->dst.error;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002236
David S. Millerc7537962010-11-11 17:07:48 -08002237 if (rt_is_input_route(rt)) {
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002238#ifdef CONFIG_IP_MROUTE
2239 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2240 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2241 int err = ipmr_get_route(net, skb,
2242 fl4->saddr, fl4->daddr,
2243 r, nowait);
2244 if (err <= 0) {
2245 if (!nowait) {
2246 if (err == 0)
2247 return 0;
2248 goto nla_put_failure;
2249 } else {
2250 if (err == -EMSGSIZE)
2251 goto nla_put_failure;
2252 error = err;
2253 }
2254 }
2255 } else
2256#endif
2257 if (nla_put_u32(skb, RTA_IIF, rt->rt_iif))
2258 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002259 }
2260
David S. Millerf1850712012-07-10 07:26:01 -07002261 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002262 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002263
Thomas Grafbe403ea2006-08-17 18:15:17 -07002264 return nlmsg_end(skb, nlh);
2265
2266nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002267 nlmsg_cancel(skb, nlh);
2268 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002269}
2270
Daniel Baluta5e73ea12012-04-15 01:34:41 +00002271static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002272{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002273 struct net *net = sock_net(in_skb->sk);
Thomas Grafd889ce32006-08-17 18:15:44 -07002274 struct rtmsg *rtm;
2275 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002276 struct rtable *rt = NULL;
David Millerd6c0a4f2012-07-01 02:02:59 +00002277 struct flowi4 fl4;
Al Viro9e12bb22006-09-26 21:25:20 -07002278 __be32 dst = 0;
2279 __be32 src = 0;
2280 u32 iif;
Thomas Grafd889ce32006-08-17 18:15:44 -07002281 int err;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002282 int mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002283 struct sk_buff *skb;
2284
Thomas Grafd889ce32006-08-17 18:15:44 -07002285 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2286 if (err < 0)
2287 goto errout;
2288
2289 rtm = nlmsg_data(nlh);
2290
Linus Torvalds1da177e2005-04-16 15:20:36 -07002291 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafd889ce32006-08-17 18:15:44 -07002292 if (skb == NULL) {
2293 err = -ENOBUFS;
2294 goto errout;
2295 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002296
2297 /* Reserve room for dummy headers, this skb can pass
2298 through good chunk of routing engine.
2299 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002300 skb_reset_mac_header(skb);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07002301 skb_reset_network_header(skb);
Stephen Hemmingerd2c962b2006-04-17 17:27:11 -07002302
2303 /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07002304 ip_hdr(skb)->protocol = IPPROTO_ICMP;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002305 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2306
Al Viro17fb2c62006-09-26 22:15:25 -07002307 src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
2308 dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0;
Thomas Grafd889ce32006-08-17 18:15:44 -07002309 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002310 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002311
David Millerd6c0a4f2012-07-01 02:02:59 +00002312 memset(&fl4, 0, sizeof(fl4));
2313 fl4.daddr = dst;
2314 fl4.saddr = src;
2315 fl4.flowi4_tos = rtm->rtm_tos;
2316 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
2317 fl4.flowi4_mark = mark;
2318
Linus Torvalds1da177e2005-04-16 15:20:36 -07002319 if (iif) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002320 struct net_device *dev;
2321
Denis V. Lunev19375042008-02-28 20:52:04 -08002322 dev = __dev_get_by_index(net, iif);
Thomas Grafd889ce32006-08-17 18:15:44 -07002323 if (dev == NULL) {
2324 err = -ENODEV;
2325 goto errout_free;
2326 }
2327
Linus Torvalds1da177e2005-04-16 15:20:36 -07002328 skb->protocol = htons(ETH_P_IP);
2329 skb->dev = dev;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002330 skb->mark = mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002331 local_bh_disable();
2332 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2333 local_bh_enable();
Thomas Grafd889ce32006-08-17 18:15:44 -07002334
Eric Dumazet511c3f92009-06-02 05:14:27 +00002335 rt = skb_rtable(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07002336 if (err == 0 && rt->dst.error)
2337 err = -rt->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002338 } else {
David S. Miller9d6ec932011-03-12 01:12:47 -05002339 rt = ip_route_output_key(net, &fl4);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002340
2341 err = 0;
2342 if (IS_ERR(rt))
2343 err = PTR_ERR(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002344 }
Thomas Grafd889ce32006-08-17 18:15:44 -07002345
Linus Torvalds1da177e2005-04-16 15:20:36 -07002346 if (err)
Thomas Grafd889ce32006-08-17 18:15:44 -07002347 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002348
Changli Gaod8d1f302010-06-10 23:31:35 -07002349 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002350 if (rtm->rtm_flags & RTM_F_NOTIFY)
2351 rt->rt_flags |= RTCF_NOTIFY;
2352
David S. Millerf1ce3062012-07-12 10:10:17 -07002353 err = rt_fill_info(net, dst, src, &fl4, skb,
Eric W. Biederman15e47302012-09-07 20:12:54 +00002354 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
Denis V. Lunev19375042008-02-28 20:52:04 -08002355 RTM_NEWROUTE, 0, 0);
Thomas Grafd889ce32006-08-17 18:15:44 -07002356 if (err <= 0)
2357 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002358
Eric W. Biederman15e47302012-09-07 20:12:54 +00002359 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
Thomas Grafd889ce32006-08-17 18:15:44 -07002360errout:
Thomas Graf2942e902006-08-15 00:30:25 -07002361 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002362
Thomas Grafd889ce32006-08-17 18:15:44 -07002363errout_free:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002364 kfree_skb(skb);
Thomas Grafd889ce32006-08-17 18:15:44 -07002365 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002366}
2367
2368int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2369{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002370 return skb->len;
2371}
2372
2373void ip_rt_multicast_event(struct in_device *in_dev)
2374{
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00002375 rt_cache_flush(dev_net(in_dev->dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002376}
2377
2378#ifdef CONFIG_SYSCTL
Denis V. Lunev81c684d2008-07-08 03:05:28 -07002379static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002380 void __user *buffer,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002381 size_t *lenp, loff_t *ppos)
2382{
2383 if (write) {
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00002384 rt_cache_flush((struct net *)__ctl->extra1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002385 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002386 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002387
2388 return -EINVAL;
2389}
2390
Al Viroeeb61f72008-07-27 08:59:33 +01002391static ctl_table ipv4_route_table[] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002392 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002393 .procname = "gc_thresh",
2394 .data = &ipv4_dst_ops.gc_thresh,
2395 .maxlen = sizeof(int),
2396 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002397 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002398 },
2399 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002400 .procname = "max_size",
2401 .data = &ip_rt_max_size,
2402 .maxlen = sizeof(int),
2403 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002404 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002405 },
2406 {
2407 /* Deprecated. Use gc_min_interval_ms */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002408
Linus Torvalds1da177e2005-04-16 15:20:36 -07002409 .procname = "gc_min_interval",
2410 .data = &ip_rt_gc_min_interval,
2411 .maxlen = sizeof(int),
2412 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002413 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002414 },
2415 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002416 .procname = "gc_min_interval_ms",
2417 .data = &ip_rt_gc_min_interval,
2418 .maxlen = sizeof(int),
2419 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002420 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002421 },
2422 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002423 .procname = "gc_timeout",
2424 .data = &ip_rt_gc_timeout,
2425 .maxlen = sizeof(int),
2426 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002427 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002428 },
2429 {
Eric Dumazet9f28a2f2011-12-21 15:47:16 -05002430 .procname = "gc_interval",
2431 .data = &ip_rt_gc_interval,
2432 .maxlen = sizeof(int),
2433 .mode = 0644,
2434 .proc_handler = proc_dointvec_jiffies,
2435 },
2436 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002437 .procname = "redirect_load",
2438 .data = &ip_rt_redirect_load,
2439 .maxlen = sizeof(int),
2440 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002441 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002442 },
2443 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002444 .procname = "redirect_number",
2445 .data = &ip_rt_redirect_number,
2446 .maxlen = sizeof(int),
2447 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002448 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002449 },
2450 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002451 .procname = "redirect_silence",
2452 .data = &ip_rt_redirect_silence,
2453 .maxlen = sizeof(int),
2454 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002455 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002456 },
2457 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002458 .procname = "error_cost",
2459 .data = &ip_rt_error_cost,
2460 .maxlen = sizeof(int),
2461 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002462 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002463 },
2464 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002465 .procname = "error_burst",
2466 .data = &ip_rt_error_burst,
2467 .maxlen = sizeof(int),
2468 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002469 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002470 },
2471 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002472 .procname = "gc_elasticity",
2473 .data = &ip_rt_gc_elasticity,
2474 .maxlen = sizeof(int),
2475 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002476 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002477 },
2478 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002479 .procname = "mtu_expires",
2480 .data = &ip_rt_mtu_expires,
2481 .maxlen = sizeof(int),
2482 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002483 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002484 },
2485 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002486 .procname = "min_pmtu",
2487 .data = &ip_rt_min_pmtu,
2488 .maxlen = sizeof(int),
2489 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002490 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002491 },
2492 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002493 .procname = "min_adv_mss",
2494 .data = &ip_rt_min_advmss,
2495 .maxlen = sizeof(int),
2496 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002497 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002498 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002499 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002500};
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002501
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002502static struct ctl_table ipv4_route_flush_table[] = {
2503 {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002504 .procname = "flush",
2505 .maxlen = sizeof(int),
2506 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002507 .proc_handler = ipv4_sysctl_rtcache_flush,
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002508 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002509 { },
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002510};
2511
2512static __net_init int sysctl_route_net_init(struct net *net)
2513{
2514 struct ctl_table *tbl;
2515
2516 tbl = ipv4_route_flush_table;
Octavian Purdila09ad9bc2009-11-25 15:14:13 -08002517 if (!net_eq(net, &init_net)) {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002518 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
2519 if (tbl == NULL)
2520 goto err_dup;
Eric W. Biederman464dc802012-11-16 03:02:59 +00002521
2522 /* Don't export sysctls to unprivileged users */
2523 if (net->user_ns != &init_user_ns)
2524 tbl[0].procname = NULL;
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002525 }
2526 tbl[0].extra1 = net;
2527
Eric W. Biedermanec8f23c2012-04-19 13:44:49 +00002528 net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002529 if (net->ipv4.route_hdr == NULL)
2530 goto err_reg;
2531 return 0;
2532
2533err_reg:
2534 if (tbl != ipv4_route_flush_table)
2535 kfree(tbl);
2536err_dup:
2537 return -ENOMEM;
2538}
2539
2540static __net_exit void sysctl_route_net_exit(struct net *net)
2541{
2542 struct ctl_table *tbl;
2543
2544 tbl = net->ipv4.route_hdr->ctl_table_arg;
2545 unregister_net_sysctl_table(net->ipv4.route_hdr);
2546 BUG_ON(tbl == ipv4_route_flush_table);
2547 kfree(tbl);
2548}
2549
2550static __net_initdata struct pernet_operations sysctl_route_ops = {
2551 .init = sysctl_route_net_init,
2552 .exit = sysctl_route_net_exit,
2553};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002554#endif
2555
Neil Horman3ee94372010-05-08 01:57:52 -07002556static __net_init int rt_genid_init(struct net *net)
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002557{
Nicolas Dichtelb42664f2012-09-10 22:09:44 +00002558 atomic_set(&net->rt_genid, 0);
David S. Miller436c3b62011-03-24 17:42:21 -07002559 get_random_bytes(&net->ipv4.dev_addr_genid,
2560 sizeof(net->ipv4.dev_addr_genid));
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002561 return 0;
2562}
2563
Neil Horman3ee94372010-05-08 01:57:52 -07002564static __net_initdata struct pernet_operations rt_genid_ops = {
2565 .init = rt_genid_init,
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002566};
2567
David S. Millerc3426b42012-06-09 16:27:05 -07002568static int __net_init ipv4_inetpeer_init(struct net *net)
2569{
2570 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2571
2572 if (!bp)
2573 return -ENOMEM;
2574 inet_peer_base_init(bp);
2575 net->ipv4.peers = bp;
2576 return 0;
2577}
2578
2579static void __net_exit ipv4_inetpeer_exit(struct net *net)
2580{
2581 struct inet_peer_base *bp = net->ipv4.peers;
2582
2583 net->ipv4.peers = NULL;
David S. Miller56a6b242012-06-09 16:32:41 -07002584 inetpeer_invalidate_tree(bp);
David S. Millerc3426b42012-06-09 16:27:05 -07002585 kfree(bp);
2586}
2587
2588static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
2589 .init = ipv4_inetpeer_init,
2590 .exit = ipv4_inetpeer_exit,
2591};
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002592
Patrick McHardyc7066f72011-01-14 13:36:42 +01002593#ifdef CONFIG_IP_ROUTE_CLASSID
Tejun Heo7d720c32010-02-16 15:20:26 +00002594struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002595#endif /* CONFIG_IP_ROUTE_CLASSID */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002596
Linus Torvalds1da177e2005-04-16 15:20:36 -07002597int __init ip_rt_init(void)
2598{
Eric Dumazet424c4b72005-07-05 14:58:19 -07002599 int rc = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002600
Patrick McHardyc7066f72011-01-14 13:36:42 +01002601#ifdef CONFIG_IP_ROUTE_CLASSID
Ingo Molnar0dcec8c2009-02-25 14:07:33 +01002602 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002603 if (!ip_rt_acct)
2604 panic("IP: failed to allocate ip_rt_acct\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002605#endif
2606
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002607 ipv4_dst_ops.kmem_cachep =
2608 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09002609 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002610
David S. Miller14e50e52007-05-24 18:17:54 -07002611 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
2612
Eric Dumazetfc66f952010-10-08 06:37:34 +00002613 if (dst_entries_init(&ipv4_dst_ops) < 0)
2614 panic("IP: failed to allocate ipv4_dst_ops counter\n");
2615
2616 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
2617 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
2618
David S. Miller89aef892012-07-17 11:00:09 -07002619 ipv4_dst_ops.gc_thresh = ~0;
2620 ip_rt_max_size = INT_MAX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002621
Linus Torvalds1da177e2005-04-16 15:20:36 -07002622 devinet_init();
2623 ip_fib_init();
2624
Denis V. Lunev73b38712008-02-28 20:51:18 -08002625 if (ip_rt_proc_init())
Joe Perches058bd4d2012-03-11 18:36:11 +00002626 pr_err("Unable to create route proc files\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002627#ifdef CONFIG_XFRM
2628 xfrm_init();
Steffen Klassert703fb942012-11-13 08:52:24 +01002629 xfrm4_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002630#endif
Greg Rosec7ac8672011-06-10 01:27:09 +00002631 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
Thomas Graf63f34442007-03-22 11:55:17 -07002632
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002633#ifdef CONFIG_SYSCTL
2634 register_pernet_subsys(&sysctl_route_ops);
2635#endif
Neil Horman3ee94372010-05-08 01:57:52 -07002636 register_pernet_subsys(&rt_genid_ops);
David S. Millerc3426b42012-06-09 16:27:05 -07002637 register_pernet_subsys(&ipv4_inetpeer_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002638 return rc;
2639}
2640
Al Viroa1bc6eb2008-07-30 06:32:52 -04002641#ifdef CONFIG_SYSCTL
Al Viroeeb61f72008-07-27 08:59:33 +01002642/*
2643 * We really need to sanitize the damn ipv4 init order, then all
2644 * this nonsense will go away.
2645 */
2646void __init ip_static_sysctl_init(void)
2647{
Eric W. Biederman4e5ca782012-04-19 13:32:39 +00002648 register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
Al Viroeeb61f72008-07-27 08:59:33 +01002649}
Al Viroa1bc6eb2008-07-30 06:32:52 -04002650#endif