blob: fd9af60397b590dd817e02a52a33fdf99bb24c8e [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * ROUTE - implementation of the IP router.
7 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07008 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
12 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 *
14 * Fixes:
15 * Alan Cox : Verify area fixes.
16 * Alan Cox : cli() protects routing changes
17 * Rui Oliveira : ICMP routing table updates
18 * (rco@di.uminho.pt) Routing table insertion and update
19 * Linus Torvalds : Rewrote bits to be sensible
20 * Alan Cox : Added BSD route gw semantics
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090021 * Alan Cox : Super /proc >4K
Linus Torvalds1da177e2005-04-16 15:20:36 -070022 * Alan Cox : MTU in route table
23 * Alan Cox : MSS actually. Also added the window
24 * clamper.
25 * Sam Lantinga : Fixed route matching in rt_del()
26 * Alan Cox : Routing cache support.
27 * Alan Cox : Removed compatibility cruft.
28 * Alan Cox : RTF_REJECT support.
29 * Alan Cox : TCP irtt support.
30 * Jonathan Naylor : Added Metric support.
31 * Miquel van Smoorenburg : BSD API fixes.
32 * Miquel van Smoorenburg : Metrics.
33 * Alan Cox : Use __u32 properly
34 * Alan Cox : Aligned routing errors more closely with BSD
35 * our system is still very different.
36 * Alan Cox : Faster /proc handling
37 * Alexey Kuznetsov : Massive rework to support tree based routing,
38 * routing caches and better behaviour.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090039 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 * Olaf Erb : irtt wasn't being copied right.
41 * Bjorn Ekwall : Kerneld route support.
42 * Alan Cox : Multicast fixed (I hope)
43 * Pavel Krauz : Limited broadcast fixed
44 * Mike McLagan : Routing by source
45 * Alexey Kuznetsov : End of old history. Split to fib.c and
46 * route.c and rewritten from scratch.
47 * Andi Kleen : Load-limit warning messages.
48 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
49 * Vitaly E. Lavrov : Race condition in ip_route_input_slow.
50 * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow.
51 * Vladimir V. Ivanov : IP rule info (flowid) is really useful.
52 * Marc Boucher : routing by fwmark
53 * Robert Olsson : Added rt_cache statistics
54 * Arnaldo C. Melo : Convert proc stuff to seq_file
Eric Dumazetbb1d23b2005-07-05 15:00:32 -070055 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
Ilia Sotnikovcef26852006-03-25 01:38:55 -080056 * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect
57 * Ilia Sotnikov : Removed TOS from hash calculations
Linus Torvalds1da177e2005-04-16 15:20:36 -070058 *
59 * This program is free software; you can redistribute it and/or
60 * modify it under the terms of the GNU General Public License
61 * as published by the Free Software Foundation; either version
62 * 2 of the License, or (at your option) any later version.
63 */
64
Joe Perchesafd465032012-03-12 07:03:32 +000065#define pr_fmt(fmt) "IPv4: " fmt
66
Linus Torvalds1da177e2005-04-16 15:20:36 -070067#include <linux/module.h>
68#include <asm/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070069#include <linux/bitops.h>
70#include <linux/types.h>
71#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070072#include <linux/mm.h>
73#include <linux/string.h>
74#include <linux/socket.h>
75#include <linux/sockios.h>
76#include <linux/errno.h>
77#include <linux/in.h>
78#include <linux/inet.h>
79#include <linux/netdevice.h>
80#include <linux/proc_fs.h>
81#include <linux/init.h>
82#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070083#include <linux/inetdevice.h>
84#include <linux/igmp.h>
85#include <linux/pkt_sched.h>
86#include <linux/mroute.h>
87#include <linux/netfilter_ipv4.h>
88#include <linux/random.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070089#include <linux/rcupdate.h>
90#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090091#include <linux/slab.h>
Herbert Xu352e5122007-11-13 21:34:06 -080092#include <net/dst.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020093#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070094#include <net/protocol.h>
95#include <net/ip.h>
96#include <net/route.h>
97#include <net/inetpeer.h>
98#include <net/sock.h>
99#include <net/ip_fib.h>
100#include <net/arp.h>
101#include <net/tcp.h>
102#include <net/icmp.h>
103#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -0700104#include <net/netevent.h>
Thomas Graf63f34442007-03-22 11:55:17 -0700105#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106#ifdef CONFIG_SYSCTL
107#include <linux/sysctl.h>
Shan Wei7426a562012-04-18 18:05:46 +0000108#include <linux/kmemleak.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109#endif
David S. Miller6e5714e2011-08-03 20:50:44 -0700110#include <net/secure_seq.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111
David S. Miller68a5e3d2011-03-11 20:07:33 -0500112#define RT_FL_TOS(oldflp4) \
Julian Anastasovf61759e2011-12-02 11:39:42 +0000113 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114
115#define IP_MAX_MTU 0xFFF0
116
117#define RT_GC_TIMEOUT (300*HZ)
118
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119static int ip_rt_max_size;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700120static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
Eric Dumazet9f28a2f2011-12-21 15:47:16 -0500121static int ip_rt_gc_interval __read_mostly = 60 * HZ;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700122static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
123static int ip_rt_redirect_number __read_mostly = 9;
124static int ip_rt_redirect_load __read_mostly = HZ / 50;
125static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
126static int ip_rt_error_cost __read_mostly = HZ;
127static int ip_rt_error_burst __read_mostly = 5 * HZ;
128static int ip_rt_gc_elasticity __read_mostly = 8;
129static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
130static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
131static int ip_rt_min_advmss __read_mostly = 256;
Eric Dumazet9f28a2f2011-12-21 15:47:16 -0500132
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133/*
134 * Interface to generic destination cache.
135 */
136
137static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -0800138static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
Steffen Klassertebb762f2011-11-23 02:12:51 +0000139static unsigned int ipv4_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
141static void ipv4_link_failure(struct sk_buff *skb);
David S. Miller6700c272012-07-17 03:29:28 -0700142static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
143 struct sk_buff *skb, u32 mtu);
144static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
145 struct sk_buff *skb);
David S. Millercaacf052012-07-31 15:06:50 -0700146static void ipv4_dst_destroy(struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147
Eric Dumazet72cdd1d2010-11-11 07:14:07 +0000148static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
149 int how)
150{
151}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152
David S. Miller62fa8a82011-01-26 20:51:05 -0800153static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
154{
David S. Miller31248732012-07-10 07:08:18 -0700155 WARN_ON(1);
156 return NULL;
David S. Miller62fa8a82011-01-26 20:51:05 -0800157}
158
David S. Millerf894cbf2012-07-02 21:52:24 -0700159static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
160 struct sk_buff *skb,
161 const void *daddr);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700162
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163static struct dst_ops ipv4_dst_ops = {
164 .family = AF_INET,
Harvey Harrison09640e632009-02-01 00:45:17 -0800165 .protocol = cpu_to_be16(ETH_P_IP),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166 .check = ipv4_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800167 .default_advmss = ipv4_default_advmss,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000168 .mtu = ipv4_mtu,
David S. Miller62fa8a82011-01-26 20:51:05 -0800169 .cow_metrics = ipv4_cow_metrics,
David S. Millercaacf052012-07-31 15:06:50 -0700170 .destroy = ipv4_dst_destroy,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171 .ifdown = ipv4_dst_ifdown,
172 .negative_advice = ipv4_negative_advice,
173 .link_failure = ipv4_link_failure,
174 .update_pmtu = ip_rt_update_pmtu,
David S. Millere47a1852012-07-11 20:55:47 -0700175 .redirect = ip_do_redirect,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700176 .local_out = __ip_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700177 .neigh_lookup = ipv4_neigh_lookup,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178};
179
180#define ECN_OR_COST(class) TC_PRIO_##class
181
Philippe De Muyter4839c522007-07-09 15:32:57 -0700182const __u8 ip_tos2prio[16] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183 TC_PRIO_BESTEFFORT,
Dan Siemon4a2b9c32011-03-15 13:56:07 +0000184 ECN_OR_COST(BESTEFFORT),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185 TC_PRIO_BESTEFFORT,
186 ECN_OR_COST(BESTEFFORT),
187 TC_PRIO_BULK,
188 ECN_OR_COST(BULK),
189 TC_PRIO_BULK,
190 ECN_OR_COST(BULK),
191 TC_PRIO_INTERACTIVE,
192 ECN_OR_COST(INTERACTIVE),
193 TC_PRIO_INTERACTIVE,
194 ECN_OR_COST(INTERACTIVE),
195 TC_PRIO_INTERACTIVE_BULK,
196 ECN_OR_COST(INTERACTIVE_BULK),
197 TC_PRIO_INTERACTIVE_BULK,
198 ECN_OR_COST(INTERACTIVE_BULK)
199};
Amir Vadaid4a96862012-04-04 21:33:28 +0000200EXPORT_SYMBOL(ip_tos2prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201
Eric Dumazet2f970d82006-01-17 02:54:36 -0800202static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
Eric Dumazet27f39c73e2010-05-19 22:07:23 +0000203#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
207{
Eric Dumazet29e75252008-01-31 17:05:09 -0800208 if (*pos)
David S. Miller89aef892012-07-17 11:00:09 -0700209 return NULL;
Eric Dumazet29e75252008-01-31 17:05:09 -0800210 return SEQ_START_TOKEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211}
212
213static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
214{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215 ++*pos;
David S. Miller89aef892012-07-17 11:00:09 -0700216 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217}
218
219static void rt_cache_seq_stop(struct seq_file *seq, void *v)
220{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221}
222
223static int rt_cache_seq_show(struct seq_file *seq, void *v)
224{
225 if (v == SEQ_START_TOKEN)
226 seq_printf(seq, "%-127s\n",
227 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
228 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
229 "HHUptod\tSpecDst");
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900230 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231}
232
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700233static const struct seq_operations rt_cache_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234 .start = rt_cache_seq_start,
235 .next = rt_cache_seq_next,
236 .stop = rt_cache_seq_stop,
237 .show = rt_cache_seq_show,
238};
239
240static int rt_cache_seq_open(struct inode *inode, struct file *file)
241{
David S. Miller89aef892012-07-17 11:00:09 -0700242 return seq_open(file, &rt_cache_seq_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243}
244
Arjan van de Ven9a321442007-02-12 00:55:35 -0800245static const struct file_operations rt_cache_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246 .owner = THIS_MODULE,
247 .open = rt_cache_seq_open,
248 .read = seq_read,
249 .llseek = seq_lseek,
David S. Miller89aef892012-07-17 11:00:09 -0700250 .release = seq_release,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251};
252
253
254static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
255{
256 int cpu;
257
258 if (*pos == 0)
259 return SEQ_START_TOKEN;
260
Rusty Russell0f23174a2008-12-29 12:23:42 +0000261 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262 if (!cpu_possible(cpu))
263 continue;
264 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800265 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266 }
267 return NULL;
268}
269
270static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
271{
272 int cpu;
273
Rusty Russell0f23174a2008-12-29 12:23:42 +0000274 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 if (!cpu_possible(cpu))
276 continue;
277 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800278 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279 }
280 return NULL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900281
Linus Torvalds1da177e2005-04-16 15:20:36 -0700282}
283
284static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
285{
286
287}
288
289static int rt_cpu_seq_show(struct seq_file *seq, void *v)
290{
291 struct rt_cache_stat *st = v;
292
293 if (v == SEQ_START_TOKEN) {
Olaf Rempel5bec0032005-04-28 12:16:08 -0700294 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295 return 0;
296 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900297
Linus Torvalds1da177e2005-04-16 15:20:36 -0700298 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
299 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
Eric Dumazetfc66f952010-10-08 06:37:34 +0000300 dst_entries_get_slow(&ipv4_dst_ops),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700301 st->in_hit,
302 st->in_slow_tot,
303 st->in_slow_mc,
304 st->in_no_route,
305 st->in_brd,
306 st->in_martian_dst,
307 st->in_martian_src,
308
309 st->out_hit,
310 st->out_slow_tot,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900311 st->out_slow_mc,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312
313 st->gc_total,
314 st->gc_ignored,
315 st->gc_goal_miss,
316 st->gc_dst_overflow,
317 st->in_hlist_search,
318 st->out_hlist_search
319 );
320 return 0;
321}
322
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700323static const struct seq_operations rt_cpu_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324 .start = rt_cpu_seq_start,
325 .next = rt_cpu_seq_next,
326 .stop = rt_cpu_seq_stop,
327 .show = rt_cpu_seq_show,
328};
329
330
331static int rt_cpu_seq_open(struct inode *inode, struct file *file)
332{
333 return seq_open(file, &rt_cpu_seq_ops);
334}
335
Arjan van de Ven9a321442007-02-12 00:55:35 -0800336static const struct file_operations rt_cpu_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337 .owner = THIS_MODULE,
338 .open = rt_cpu_seq_open,
339 .read = seq_read,
340 .llseek = seq_lseek,
341 .release = seq_release,
342};
343
Patrick McHardyc7066f72011-01-14 13:36:42 +0100344#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800345static int rt_acct_proc_show(struct seq_file *m, void *v)
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800346{
Alexey Dobriyana661c412009-11-25 15:40:35 -0800347 struct ip_rt_acct *dst, *src;
348 unsigned int i, j;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800349
Alexey Dobriyana661c412009-11-25 15:40:35 -0800350 dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
351 if (!dst)
352 return -ENOMEM;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800353
Alexey Dobriyana661c412009-11-25 15:40:35 -0800354 for_each_possible_cpu(i) {
355 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
356 for (j = 0; j < 256; j++) {
357 dst[j].o_bytes += src[j].o_bytes;
358 dst[j].o_packets += src[j].o_packets;
359 dst[j].i_bytes += src[j].i_bytes;
360 dst[j].i_packets += src[j].i_packets;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800361 }
362 }
Alexey Dobriyana661c412009-11-25 15:40:35 -0800363
364 seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
365 kfree(dst);
366 return 0;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800367}
Alexey Dobriyana661c412009-11-25 15:40:35 -0800368
369static int rt_acct_proc_open(struct inode *inode, struct file *file)
370{
371 return single_open(file, rt_acct_proc_show, NULL);
372}
373
374static const struct file_operations rt_acct_proc_fops = {
375 .owner = THIS_MODULE,
376 .open = rt_acct_proc_open,
377 .read = seq_read,
378 .llseek = seq_lseek,
379 .release = single_release,
380};
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800381#endif
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800382
Denis V. Lunev73b38712008-02-28 20:51:18 -0800383static int __net_init ip_rt_do_proc_init(struct net *net)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800384{
385 struct proc_dir_entry *pde;
386
387 pde = proc_net_fops_create(net, "rt_cache", S_IRUGO,
388 &rt_cache_seq_fops);
389 if (!pde)
390 goto err1;
391
Wang Chen77020722008-02-28 14:14:25 -0800392 pde = proc_create("rt_cache", S_IRUGO,
393 net->proc_net_stat, &rt_cpu_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800394 if (!pde)
395 goto err2;
396
Patrick McHardyc7066f72011-01-14 13:36:42 +0100397#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800398 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800399 if (!pde)
400 goto err3;
401#endif
402 return 0;
403
Patrick McHardyc7066f72011-01-14 13:36:42 +0100404#ifdef CONFIG_IP_ROUTE_CLASSID
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800405err3:
406 remove_proc_entry("rt_cache", net->proc_net_stat);
407#endif
408err2:
409 remove_proc_entry("rt_cache", net->proc_net);
410err1:
411 return -ENOMEM;
412}
Denis V. Lunev73b38712008-02-28 20:51:18 -0800413
414static void __net_exit ip_rt_do_proc_exit(struct net *net)
415{
416 remove_proc_entry("rt_cache", net->proc_net_stat);
417 remove_proc_entry("rt_cache", net->proc_net);
Patrick McHardyc7066f72011-01-14 13:36:42 +0100418#ifdef CONFIG_IP_ROUTE_CLASSID
Denis V. Lunev73b38712008-02-28 20:51:18 -0800419 remove_proc_entry("rt_acct", net->proc_net);
Alexey Dobriyan0a931ac2010-01-17 03:32:50 +0000420#endif
Denis V. Lunev73b38712008-02-28 20:51:18 -0800421}
422
423static struct pernet_operations ip_rt_proc_ops __net_initdata = {
424 .init = ip_rt_do_proc_init,
425 .exit = ip_rt_do_proc_exit,
426};
427
428static int __init ip_rt_proc_init(void)
429{
430 return register_pernet_subsys(&ip_rt_proc_ops);
431}
432
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800433#else
Denis V. Lunev73b38712008-02-28 20:51:18 -0800434static inline int ip_rt_proc_init(void)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800435{
436 return 0;
437}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438#endif /* CONFIG_PROC_FS */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900439
Eric Dumazet4331deb2012-07-25 05:11:23 +0000440static inline bool rt_is_expired(const struct rtable *rth)
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700441{
Changli Gaod8d1f302010-06-10 23:31:35 -0700442 return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700443}
444
Nicolas Dichtelbafa6d92012-09-07 00:45:29 +0000445void rt_cache_flush(struct net *net)
Eric Dumazet29e75252008-01-31 17:05:09 -0800446{
Nicolas Dichtelb42664f2012-09-10 22:09:44 +0000447 rt_genid_bump(net);
Eric Dumazet98376382010-03-08 03:20:00 +0000448}
449
David S. Millerf894cbf2012-07-02 21:52:24 -0700450static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
451 struct sk_buff *skb,
452 const void *daddr)
David Miller3769cff2011-07-11 22:44:24 +0000453{
David S. Millerd3aaeb32011-07-18 00:40:17 -0700454 struct net_device *dev = dst->dev;
455 const __be32 *pkey = daddr;
David S. Miller39232972012-01-26 15:22:32 -0500456 const struct rtable *rt;
David Miller3769cff2011-07-11 22:44:24 +0000457 struct neighbour *n;
458
David S. Miller39232972012-01-26 15:22:32 -0500459 rt = (const struct rtable *) dst;
David S. Millera263b302012-07-02 02:02:15 -0700460 if (rt->rt_gateway)
David S. Miller39232972012-01-26 15:22:32 -0500461 pkey = (const __be32 *) &rt->rt_gateway;
David S. Millerf894cbf2012-07-02 21:52:24 -0700462 else if (skb)
463 pkey = &ip_hdr(skb)->daddr;
David S. Millerd3aaeb32011-07-18 00:40:17 -0700464
David S. Miller80703d22012-02-15 17:48:35 -0500465 n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700466 if (n)
467 return n;
David Miller32092ec2011-07-25 00:01:41 +0000468 return neigh_create(&arp_tbl, pkey, dev);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700469}
470
Linus Torvalds1da177e2005-04-16 15:20:36 -0700471/*
472 * Peer allocation may fail only in serious out-of-memory conditions. However
473 * we still can generate some output.
474 * Random ID selection looks a bit dangerous because we have no chances to
475 * select ID being unique in a reasonable period of time.
476 * But broken packet identifier may be better than no packet at all.
477 */
478static void ip_select_fb_ident(struct iphdr *iph)
479{
480 static DEFINE_SPINLOCK(ip_fb_id_lock);
481 static u32 ip_fallback_id;
482 u32 salt;
483
484 spin_lock_bh(&ip_fb_id_lock);
Al Viroe4485152006-09-26 22:15:01 -0700485 salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700486 iph->id = htons(salt & 0xFFFF);
487 ip_fallback_id = salt;
488 spin_unlock_bh(&ip_fb_id_lock);
489}
490
491void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
492{
David S. Miller1d861aa2012-07-10 03:58:16 -0700493 struct net *net = dev_net(dst->dev);
494 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495
David S. Miller1d861aa2012-07-10 03:58:16 -0700496 peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1);
497 if (peer) {
498 iph->id = htons(inet_getid(peer, more));
499 inet_putpeer(peer);
500 return;
501 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502
503 ip_select_fb_ident(iph);
504}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000505EXPORT_SYMBOL(__ip_select_ident);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200507static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk,
David S. Miller4895c772012-07-17 04:19:00 -0700508 const struct iphdr *iph,
509 int oif, u8 tos,
510 u8 prot, u32 mark, int flow_flags)
511{
512 if (sk) {
513 const struct inet_sock *inet = inet_sk(sk);
514
515 oif = sk->sk_bound_dev_if;
516 mark = sk->sk_mark;
517 tos = RT_CONN_FLAGS(sk);
518 prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
519 }
520 flowi4_init_output(fl4, oif, mark, tos,
521 RT_SCOPE_UNIVERSE, prot,
522 flow_flags,
523 iph->daddr, iph->saddr, 0, 0);
524}
525
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200526static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
527 const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700528{
529 const struct iphdr *iph = ip_hdr(skb);
530 int oif = skb->dev->ifindex;
531 u8 tos = RT_TOS(iph->tos);
532 u8 prot = iph->protocol;
533 u32 mark = skb->mark;
534
535 __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0);
536}
537
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200538static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700539{
540 const struct inet_sock *inet = inet_sk(sk);
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200541 const struct ip_options_rcu *inet_opt;
David S. Miller4895c772012-07-17 04:19:00 -0700542 __be32 daddr = inet->inet_daddr;
543
544 rcu_read_lock();
545 inet_opt = rcu_dereference(inet->inet_opt);
546 if (inet_opt && inet_opt->opt.srr)
547 daddr = inet_opt->opt.faddr;
548 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
549 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
550 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
551 inet_sk_flowi_flags(sk),
552 daddr, inet->inet_saddr, 0, 0);
553 rcu_read_unlock();
554}
555
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200556static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
557 const struct sk_buff *skb)
David S. Miller4895c772012-07-17 04:19:00 -0700558{
559 if (skb)
560 build_skb_flow_key(fl4, skb, sk);
561 else
562 build_sk_flow_key(fl4, sk);
563}
564
David S. Millerc5038a82012-07-31 15:02:02 -0700565static inline void rt_free(struct rtable *rt)
566{
567 call_rcu(&rt->dst.rcu_head, dst_rcu_free);
568}
569
570static DEFINE_SPINLOCK(fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700571
Julian Anastasovaee06da2012-07-18 10:15:35 +0000572static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
David S. Miller4895c772012-07-17 04:19:00 -0700573{
574 struct fib_nh_exception *fnhe, *oldest;
David S. Millerc5038a82012-07-31 15:02:02 -0700575 struct rtable *orig;
David S. Miller4895c772012-07-17 04:19:00 -0700576
577 oldest = rcu_dereference(hash->chain);
578 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
579 fnhe = rcu_dereference(fnhe->fnhe_next)) {
580 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
581 oldest = fnhe;
582 }
David S. Millerc5038a82012-07-31 15:02:02 -0700583 orig = rcu_dereference(oldest->fnhe_rth);
584 if (orig) {
585 RCU_INIT_POINTER(oldest->fnhe_rth, NULL);
586 rt_free(orig);
587 }
David S. Miller4895c772012-07-17 04:19:00 -0700588 return oldest;
589}
590
David S. Millerd3a25c92012-07-17 13:23:08 -0700591static inline u32 fnhe_hashfun(__be32 daddr)
592{
593 u32 hval;
594
595 hval = (__force u32) daddr;
596 hval ^= (hval >> 11) ^ (hval >> 22);
597
598 return hval & (FNHE_HASH_SIZE - 1);
599}
600
Julian Anastasovaee06da2012-07-18 10:15:35 +0000601static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
602 u32 pmtu, unsigned long expires)
David S. Miller4895c772012-07-17 04:19:00 -0700603{
Julian Anastasovaee06da2012-07-18 10:15:35 +0000604 struct fnhe_hash_bucket *hash;
David S. Miller4895c772012-07-17 04:19:00 -0700605 struct fib_nh_exception *fnhe;
606 int depth;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000607 u32 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -0700608
David S. Millerc5038a82012-07-31 15:02:02 -0700609 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000610
611 hash = nh->nh_exceptions;
David S. Miller4895c772012-07-17 04:19:00 -0700612 if (!hash) {
Julian Anastasovaee06da2012-07-18 10:15:35 +0000613 hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC);
David S. Miller4895c772012-07-17 04:19:00 -0700614 if (!hash)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000615 goto out_unlock;
616 nh->nh_exceptions = hash;
David S. Miller4895c772012-07-17 04:19:00 -0700617 }
618
David S. Miller4895c772012-07-17 04:19:00 -0700619 hash += hval;
620
621 depth = 0;
622 for (fnhe = rcu_dereference(hash->chain); fnhe;
623 fnhe = rcu_dereference(fnhe->fnhe_next)) {
624 if (fnhe->fnhe_daddr == daddr)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000625 break;
David S. Miller4895c772012-07-17 04:19:00 -0700626 depth++;
627 }
628
Julian Anastasovaee06da2012-07-18 10:15:35 +0000629 if (fnhe) {
630 if (gw)
631 fnhe->fnhe_gw = gw;
632 if (pmtu) {
633 fnhe->fnhe_pmtu = pmtu;
634 fnhe->fnhe_expires = expires;
635 }
636 } else {
637 if (depth > FNHE_RECLAIM_DEPTH)
638 fnhe = fnhe_oldest(hash);
639 else {
640 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
641 if (!fnhe)
642 goto out_unlock;
643
644 fnhe->fnhe_next = hash->chain;
645 rcu_assign_pointer(hash->chain, fnhe);
646 }
647 fnhe->fnhe_daddr = daddr;
648 fnhe->fnhe_gw = gw;
649 fnhe->fnhe_pmtu = pmtu;
650 fnhe->fnhe_expires = expires;
David S. Miller4895c772012-07-17 04:19:00 -0700651 }
David S. Miller4895c772012-07-17 04:19:00 -0700652
David S. Miller4895c772012-07-17 04:19:00 -0700653 fnhe->fnhe_stamp = jiffies;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000654
655out_unlock:
David S. Millerc5038a82012-07-31 15:02:02 -0700656 spin_unlock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000657 return;
David S. Miller4895c772012-07-17 04:19:00 -0700658}
659
David S. Millerceb33202012-07-17 11:31:28 -0700660static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
661 bool kill_route)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700662{
David S. Millere47a1852012-07-11 20:55:47 -0700663 __be32 new_gw = icmp_hdr(skb)->un.gateway;
David S. Miller94206122012-07-11 20:38:08 -0700664 __be32 old_gw = ip_hdr(skb)->saddr;
David S. Millere47a1852012-07-11 20:55:47 -0700665 struct net_device *dev = skb->dev;
David S. Millere47a1852012-07-11 20:55:47 -0700666 struct in_device *in_dev;
David S. Miller4895c772012-07-17 04:19:00 -0700667 struct fib_result res;
David S. Millere47a1852012-07-11 20:55:47 -0700668 struct neighbour *n;
Denis V. Lunev317805b2008-02-28 20:50:06 -0800669 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670
David S. Miller94206122012-07-11 20:38:08 -0700671 switch (icmp_hdr(skb)->code & 7) {
672 case ICMP_REDIR_NET:
673 case ICMP_REDIR_NETTOS:
674 case ICMP_REDIR_HOST:
675 case ICMP_REDIR_HOSTTOS:
676 break;
677
678 default:
679 return;
680 }
681
David S. Millere47a1852012-07-11 20:55:47 -0700682 if (rt->rt_gateway != old_gw)
683 return;
684
685 in_dev = __in_dev_get_rcu(dev);
686 if (!in_dev)
687 return;
688
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900689 net = dev_net(dev);
Joe Perches9d4fb272009-11-23 10:41:23 -0800690 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
691 ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
692 ipv4_is_zeronet(new_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700693 goto reject_redirect;
694
695 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
696 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
697 goto reject_redirect;
698 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
699 goto reject_redirect;
700 } else {
Denis V. Lunev317805b2008-02-28 20:50:06 -0800701 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702 goto reject_redirect;
703 }
704
David S. Miller4895c772012-07-17 04:19:00 -0700705 n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
David S. Millere47a1852012-07-11 20:55:47 -0700706 if (n) {
707 if (!(n->nud_state & NUD_VALID)) {
708 neigh_event_send(n, NULL);
709 } else {
David S. Miller4895c772012-07-17 04:19:00 -0700710 if (fib_lookup(net, fl4, &res) == 0) {
711 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -0700712
Julian Anastasovaee06da2012-07-18 10:15:35 +0000713 update_or_create_fnhe(nh, fl4->daddr, new_gw,
714 0, 0);
David S. Miller4895c772012-07-17 04:19:00 -0700715 }
David S. Millerceb33202012-07-17 11:31:28 -0700716 if (kill_route)
717 rt->dst.obsolete = DST_OBSOLETE_KILL;
David S. Millere47a1852012-07-11 20:55:47 -0700718 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
719 }
720 neigh_release(n);
721 }
722 return;
723
724reject_redirect:
725#ifdef CONFIG_IP_ROUTE_VERBOSE
David S. Miller99ee0382012-07-12 07:40:05 -0700726 if (IN_DEV_LOG_MARTIANS(in_dev)) {
727 const struct iphdr *iph = (const struct iphdr *) skb->data;
728 __be32 daddr = iph->daddr;
729 __be32 saddr = iph->saddr;
730
David S. Millere47a1852012-07-11 20:55:47 -0700731 net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
732 " Advised path = %pI4 -> %pI4\n",
733 &old_gw, dev->name, &new_gw,
734 &saddr, &daddr);
David S. Miller99ee0382012-07-12 07:40:05 -0700735 }
David S. Millere47a1852012-07-11 20:55:47 -0700736#endif
737 ;
738}
739
David S. Miller4895c772012-07-17 04:19:00 -0700740static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
741{
742 struct rtable *rt;
743 struct flowi4 fl4;
744
745 rt = (struct rtable *) dst;
746
747 ip_rt_build_flow_key(&fl4, sk, skb);
David S. Millerceb33202012-07-17 11:31:28 -0700748 __ip_do_redirect(rt, skb, &fl4, true);
David S. Miller4895c772012-07-17 04:19:00 -0700749}
750
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
752{
Eric Dumazetee6b9672008-03-05 18:30:47 -0800753 struct rtable *rt = (struct rtable *)dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754 struct dst_entry *ret = dst;
755
756 if (rt) {
Timo Teräsd11a4dc2010-03-18 23:20:20 +0000757 if (dst->obsolete > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758 ip_rt_put(rt);
759 ret = NULL;
David S. Miller59436342012-07-10 06:58:42 -0700760 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
761 rt->dst.expires) {
David S. Miller89aef892012-07-17 11:00:09 -0700762 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763 ret = NULL;
764 }
765 }
766 return ret;
767}
768
769/*
770 * Algorithm:
771 * 1. The first ip_rt_redirect_number redirects are sent
772 * with exponential backoff, then we stop sending them at all,
773 * assuming that the host ignores our redirects.
774 * 2. If we did not see packets requiring redirects
775 * during ip_rt_redirect_silence, we assume that the host
776 * forgot redirected route and start to send redirects again.
777 *
778 * This algorithm is much cheaper and more intelligent than dumb load limiting
779 * in icmp.c.
780 *
781 * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
782 * and "frag. need" (breaks PMTU discovery) in icmp.c.
783 */
784
785void ip_rt_send_redirect(struct sk_buff *skb)
786{
Eric Dumazet511c3f92009-06-02 05:14:27 +0000787 struct rtable *rt = skb_rtable(skb);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700788 struct in_device *in_dev;
David S. Miller92d86822011-02-04 15:55:25 -0800789 struct inet_peer *peer;
David S. Miller1d861aa2012-07-10 03:58:16 -0700790 struct net *net;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700791 int log_martians;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792
Eric Dumazet30038fc2009-08-28 23:52:01 -0700793 rcu_read_lock();
Changli Gaod8d1f302010-06-10 23:31:35 -0700794 in_dev = __in_dev_get_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700795 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
796 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700797 return;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700798 }
799 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
800 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801
David S. Miller1d861aa2012-07-10 03:58:16 -0700802 net = dev_net(rt->dst.dev);
803 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800804 if (!peer) {
805 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
806 return;
807 }
808
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809 /* No redirected packets during ip_rt_redirect_silence;
810 * reset the algorithm.
811 */
David S. Miller92d86822011-02-04 15:55:25 -0800812 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
813 peer->rate_tokens = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700814
815 /* Too many ignored redirects; do not send anything
Changli Gaod8d1f302010-06-10 23:31:35 -0700816 * set dst.rate_last to the last seen redirected packet.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700817 */
David S. Miller92d86822011-02-04 15:55:25 -0800818 if (peer->rate_tokens >= ip_rt_redirect_number) {
819 peer->rate_last = jiffies;
David S. Miller1d861aa2012-07-10 03:58:16 -0700820 goto out_put_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700821 }
822
823 /* Check for load limit; set rate_last to the latest sent
824 * redirect.
825 */
David S. Miller92d86822011-02-04 15:55:25 -0800826 if (peer->rate_tokens == 0 ||
Li Yewang14fb8a72006-12-18 00:26:35 -0800827 time_after(jiffies,
David S. Miller92d86822011-02-04 15:55:25 -0800828 (peer->rate_last +
829 (ip_rt_redirect_load << peer->rate_tokens)))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700830 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
David S. Miller92d86822011-02-04 15:55:25 -0800831 peer->rate_last = jiffies;
832 ++peer->rate_tokens;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700833#ifdef CONFIG_IP_ROUTE_VERBOSE
Eric Dumazet30038fc2009-08-28 23:52:01 -0700834 if (log_martians &&
Joe Perchese87cc472012-05-13 21:56:26 +0000835 peer->rate_tokens == ip_rt_redirect_number)
836 net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
David S. Miller92101b32012-07-23 16:29:00 -0700837 &ip_hdr(skb)->saddr, inet_iif(skb),
David S. Millerf1ce3062012-07-12 10:10:17 -0700838 &ip_hdr(skb)->daddr, &rt->rt_gateway);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839#endif
840 }
David S. Miller1d861aa2012-07-10 03:58:16 -0700841out_put_peer:
842 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843}
844
845static int ip_error(struct sk_buff *skb)
846{
David S. Miller251da412012-06-26 16:27:09 -0700847 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
Eric Dumazet511c3f92009-06-02 05:14:27 +0000848 struct rtable *rt = skb_rtable(skb);
David S. Miller92d86822011-02-04 15:55:25 -0800849 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850 unsigned long now;
David S. Miller251da412012-06-26 16:27:09 -0700851 struct net *net;
David S. Miller92d86822011-02-04 15:55:25 -0800852 bool send;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700853 int code;
854
David S. Miller251da412012-06-26 16:27:09 -0700855 net = dev_net(rt->dst.dev);
856 if (!IN_DEV_FORWARD(in_dev)) {
857 switch (rt->dst.error) {
858 case EHOSTUNREACH:
859 IP_INC_STATS_BH(net, IPSTATS_MIB_INADDRERRORS);
860 break;
861
862 case ENETUNREACH:
863 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
864 break;
865 }
866 goto out;
867 }
868
Changli Gaod8d1f302010-06-10 23:31:35 -0700869 switch (rt->dst.error) {
Joe Perches4500ebf2011-07-01 09:43:07 +0000870 case EINVAL:
871 default:
872 goto out;
873 case EHOSTUNREACH:
874 code = ICMP_HOST_UNREACH;
875 break;
876 case ENETUNREACH:
877 code = ICMP_NET_UNREACH;
David S. Miller251da412012-06-26 16:27:09 -0700878 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
Joe Perches4500ebf2011-07-01 09:43:07 +0000879 break;
880 case EACCES:
881 code = ICMP_PKT_FILTERED;
882 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700883 }
884
David S. Miller1d861aa2012-07-10 03:58:16 -0700885 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800886
887 send = true;
888 if (peer) {
889 now = jiffies;
890 peer->rate_tokens += now - peer->rate_last;
891 if (peer->rate_tokens > ip_rt_error_burst)
892 peer->rate_tokens = ip_rt_error_burst;
893 peer->rate_last = now;
894 if (peer->rate_tokens >= ip_rt_error_cost)
895 peer->rate_tokens -= ip_rt_error_cost;
896 else
897 send = false;
David S. Miller1d861aa2012-07-10 03:58:16 -0700898 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899 }
David S. Miller92d86822011-02-04 15:55:25 -0800900 if (send)
901 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902
903out: kfree_skb(skb);
904 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900905}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906
David S. Millerceb33202012-07-17 11:31:28 -0700907static u32 __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700908{
David S. Miller4895c772012-07-17 04:19:00 -0700909 struct fib_result res;
David S. Miller2c8cec52011-02-09 20:42:07 -0800910
David S. Miller59436342012-07-10 06:58:42 -0700911 if (mtu < ip_rt_min_pmtu)
912 mtu = ip_rt_min_pmtu;
Eric Dumazetfe6fe792011-06-08 06:07:07 +0000913
Eric Dumazetc5ae7d42012-08-28 12:33:07 +0000914 rcu_read_lock();
David S. Miller4895c772012-07-17 04:19:00 -0700915 if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) {
916 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -0700917
Julian Anastasovaee06da2012-07-18 10:15:35 +0000918 update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
919 jiffies + ip_rt_mtu_expires);
David S. Miller4895c772012-07-17 04:19:00 -0700920 }
Eric Dumazetc5ae7d42012-08-28 12:33:07 +0000921 rcu_read_unlock();
David S. Millerceb33202012-07-17 11:31:28 -0700922 return mtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923}
924
David S. Miller4895c772012-07-17 04:19:00 -0700925static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
926 struct sk_buff *skb, u32 mtu)
927{
928 struct rtable *rt = (struct rtable *) dst;
929 struct flowi4 fl4;
930
931 ip_rt_build_flow_key(&fl4, sk, skb);
David S. Millerceb33202012-07-17 11:31:28 -0700932 mtu = __ip_rt_update_pmtu(rt, &fl4, mtu);
933
934 if (!rt->rt_pmtu) {
935 dst->obsolete = DST_OBSOLETE_KILL;
936 } else {
937 rt->rt_pmtu = mtu;
Eric Dumazet9b04f352012-08-21 20:48:29 +0000938 rt->dst.expires = max(1UL, jiffies + ip_rt_mtu_expires);
David S. Millerceb33202012-07-17 11:31:28 -0700939 }
David S. Miller4895c772012-07-17 04:19:00 -0700940}
941
David S. Miller36393392012-06-14 22:21:46 -0700942void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
943 int oif, u32 mark, u8 protocol, int flow_flags)
944{
David S. Miller4895c772012-07-17 04:19:00 -0700945 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Miller36393392012-06-14 22:21:46 -0700946 struct flowi4 fl4;
947 struct rtable *rt;
948
David S. Miller4895c772012-07-17 04:19:00 -0700949 __build_flow_key(&fl4, NULL, iph, oif,
950 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Miller36393392012-06-14 22:21:46 -0700951 rt = __ip_route_output_key(net, &fl4);
952 if (!IS_ERR(rt)) {
David S. Miller4895c772012-07-17 04:19:00 -0700953 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller36393392012-06-14 22:21:46 -0700954 ip_rt_put(rt);
955 }
956}
957EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
958
959void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
960{
David S. Miller4895c772012-07-17 04:19:00 -0700961 const struct iphdr *iph = (const struct iphdr *) skb->data;
962 struct flowi4 fl4;
963 struct rtable *rt;
David S. Miller36393392012-06-14 22:21:46 -0700964
David S. Miller4895c772012-07-17 04:19:00 -0700965 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
966 rt = __ip_route_output_key(sock_net(sk), &fl4);
967 if (!IS_ERR(rt)) {
968 __ip_rt_update_pmtu(rt, &fl4, mtu);
969 ip_rt_put(rt);
970 }
David S. Miller36393392012-06-14 22:21:46 -0700971}
972EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
David S. Millerf39925d2011-02-09 22:00:16 -0800973
David S. Millerb42597e2012-07-11 21:25:45 -0700974void ipv4_redirect(struct sk_buff *skb, struct net *net,
975 int oif, u32 mark, u8 protocol, int flow_flags)
976{
David S. Miller4895c772012-07-17 04:19:00 -0700977 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Millerb42597e2012-07-11 21:25:45 -0700978 struct flowi4 fl4;
979 struct rtable *rt;
980
David S. Miller4895c772012-07-17 04:19:00 -0700981 __build_flow_key(&fl4, NULL, iph, oif,
982 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Millerb42597e2012-07-11 21:25:45 -0700983 rt = __ip_route_output_key(net, &fl4);
984 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -0700985 __ip_do_redirect(rt, skb, &fl4, false);
David S. Millerb42597e2012-07-11 21:25:45 -0700986 ip_rt_put(rt);
987 }
988}
989EXPORT_SYMBOL_GPL(ipv4_redirect);
990
991void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
992{
David S. Miller4895c772012-07-17 04:19:00 -0700993 const struct iphdr *iph = (const struct iphdr *) skb->data;
994 struct flowi4 fl4;
995 struct rtable *rt;
David S. Millerb42597e2012-07-11 21:25:45 -0700996
David S. Miller4895c772012-07-17 04:19:00 -0700997 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
998 rt = __ip_route_output_key(sock_net(sk), &fl4);
999 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001000 __ip_do_redirect(rt, skb, &fl4, false);
David S. Miller4895c772012-07-17 04:19:00 -07001001 ip_rt_put(rt);
1002 }
David S. Millerb42597e2012-07-11 21:25:45 -07001003}
1004EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1005
David S. Millerefbc368d2011-12-01 13:38:59 -05001006static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1007{
1008 struct rtable *rt = (struct rtable *) dst;
1009
David S. Millerceb33202012-07-17 11:31:28 -07001010 /* All IPV4 dsts are created with ->obsolete set to the value
1011 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1012 * into this function always.
1013 *
1014 * When a PMTU/redirect information update invalidates a
1015 * route, this is indicated by setting obsolete to
1016 * DST_OBSOLETE_KILL.
1017 */
1018 if (dst->obsolete == DST_OBSOLETE_KILL || rt_is_expired(rt))
David S. Millerefbc368d2011-12-01 13:38:59 -05001019 return NULL;
Timo Teräsd11a4dc2010-03-18 23:20:20 +00001020 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001021}
1022
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023static void ipv4_link_failure(struct sk_buff *skb)
1024{
1025 struct rtable *rt;
1026
1027 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1028
Eric Dumazet511c3f92009-06-02 05:14:27 +00001029 rt = skb_rtable(skb);
David S. Miller59436342012-07-10 06:58:42 -07001030 if (rt)
1031 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032}
1033
1034static int ip_rt_bug(struct sk_buff *skb)
1035{
Joe Perches91df42b2012-05-15 14:11:54 +00001036 pr_debug("%s: %pI4 -> %pI4, %s\n",
1037 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1038 skb->dev ? skb->dev->name : "?");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039 kfree_skb(skb);
Dave Jonesc378a9c2011-05-21 07:16:42 +00001040 WARN_ON(1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001041 return 0;
1042}
1043
1044/*
1045 We do not cache source address of outgoing interface,
1046 because it is used only by IP RR, TS and SRR options,
1047 so that it out of fast path.
1048
1049 BTW remember: "addr" is allowed to be not aligned
1050 in IP options!
1051 */
1052
David S. Miller8e363602011-05-13 17:29:41 -04001053void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001054{
Al Viroa61ced52006-09-26 21:27:54 -07001055 __be32 src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001056
David S. Millerc7537962010-11-11 17:07:48 -08001057 if (rt_is_output_route(rt))
David S. Millerc5be24f2011-05-13 18:01:21 -04001058 src = ip_hdr(skb)->saddr;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001059 else {
David S. Miller8e363602011-05-13 17:29:41 -04001060 struct fib_result res;
1061 struct flowi4 fl4;
1062 struct iphdr *iph;
1063
1064 iph = ip_hdr(skb);
1065
1066 memset(&fl4, 0, sizeof(fl4));
1067 fl4.daddr = iph->daddr;
1068 fl4.saddr = iph->saddr;
Julian Anastasovb0fe4a32011-07-23 02:00:41 +00001069 fl4.flowi4_tos = RT_TOS(iph->tos);
David S. Miller8e363602011-05-13 17:29:41 -04001070 fl4.flowi4_oif = rt->dst.dev->ifindex;
1071 fl4.flowi4_iif = skb->dev->ifindex;
1072 fl4.flowi4_mark = skb->mark;
David S. Miller5e2b61f2011-03-04 21:47:09 -08001073
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001074 rcu_read_lock();
David S. Miller68a5e3d2011-03-11 20:07:33 -05001075 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
David S. Miller436c3b62011-03-24 17:42:21 -07001076 src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001077 else
David S. Millerf8126f12012-07-13 05:03:45 -07001078 src = inet_select_addr(rt->dst.dev,
1079 rt_nexthop(rt, iph->daddr),
1080 RT_SCOPE_UNIVERSE);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001081 rcu_read_unlock();
1082 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001083 memcpy(addr, &src, 4);
1084}
1085
Patrick McHardyc7066f72011-01-14 13:36:42 +01001086#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001087static void set_class_tag(struct rtable *rt, u32 tag)
1088{
Changli Gaod8d1f302010-06-10 23:31:35 -07001089 if (!(rt->dst.tclassid & 0xFFFF))
1090 rt->dst.tclassid |= tag & 0xFFFF;
1091 if (!(rt->dst.tclassid & 0xFFFF0000))
1092 rt->dst.tclassid |= tag & 0xFFFF0000;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093}
1094#endif
1095
David S. Miller0dbaee32010-12-13 12:52:14 -08001096static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1097{
1098 unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS);
1099
1100 if (advmss == 0) {
1101 advmss = max_t(unsigned int, dst->dev->mtu - 40,
1102 ip_rt_min_advmss);
1103 if (advmss > 65535 - 40)
1104 advmss = 65535 - 40;
1105 }
1106 return advmss;
1107}
1108
Steffen Klassertebb762f2011-11-23 02:12:51 +00001109static unsigned int ipv4_mtu(const struct dst_entry *dst)
David S. Millerd33e4552010-12-14 13:01:14 -08001110{
Steffen Klassert261663b2011-11-23 02:14:50 +00001111 const struct rtable *rt = (const struct rtable *) dst;
David S. Miller59436342012-07-10 06:58:42 -07001112 unsigned int mtu = rt->rt_pmtu;
1113
1114 if (mtu && time_after_eq(jiffies, rt->dst.expires))
1115 mtu = 0;
1116
1117 if (!mtu)
1118 mtu = dst_metric_raw(dst, RTAX_MTU);
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001119
Steffen Klassert261663b2011-11-23 02:14:50 +00001120 if (mtu && rt_is_output_route(rt))
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001121 return mtu;
1122
1123 mtu = dst->dev->mtu;
David S. Millerd33e4552010-12-14 13:01:14 -08001124
1125 if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
David S. Millerf8126f12012-07-13 05:03:45 -07001126 if (rt->rt_gateway && mtu > 576)
David S. Millerd33e4552010-12-14 13:01:14 -08001127 mtu = 576;
1128 }
1129
1130 if (mtu > IP_MAX_MTU)
1131 mtu = IP_MAX_MTU;
1132
1133 return mtu;
1134}
1135
David S. Millerf2bb4be2012-07-17 12:20:47 -07001136static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
David S. Miller4895c772012-07-17 04:19:00 -07001137{
1138 struct fnhe_hash_bucket *hash = nh->nh_exceptions;
1139 struct fib_nh_exception *fnhe;
1140 u32 hval;
1141
David S. Millerf2bb4be2012-07-17 12:20:47 -07001142 if (!hash)
1143 return NULL;
1144
David S. Millerd3a25c92012-07-17 13:23:08 -07001145 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -07001146
1147 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1148 fnhe = rcu_dereference(fnhe->fnhe_next)) {
David S. Millerf2bb4be2012-07-17 12:20:47 -07001149 if (fnhe->fnhe_daddr == daddr)
1150 return fnhe;
1151 }
1152 return NULL;
1153}
David S. Miller4895c772012-07-17 04:19:00 -07001154
David S. Millercaacf052012-07-31 15:06:50 -07001155static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001156 __be32 daddr)
1157{
David S. Millercaacf052012-07-31 15:06:50 -07001158 bool ret = false;
1159
David S. Millerc5038a82012-07-31 15:02:02 -07001160 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +00001161
David S. Millerc5038a82012-07-31 15:02:02 -07001162 if (daddr == fnhe->fnhe_daddr) {
1163 struct rtable *orig;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001164
David S. Millerc5038a82012-07-31 15:02:02 -07001165 if (fnhe->fnhe_pmtu) {
1166 unsigned long expires = fnhe->fnhe_expires;
1167 unsigned long diff = expires - jiffies;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001168
David S. Millerc5038a82012-07-31 15:02:02 -07001169 if (time_before(jiffies, expires)) {
1170 rt->rt_pmtu = fnhe->fnhe_pmtu;
1171 dst_set_expires(&rt->dst, diff);
1172 }
David S. Miller4895c772012-07-17 04:19:00 -07001173 }
David S. Millerc5038a82012-07-31 15:02:02 -07001174 if (fnhe->fnhe_gw) {
1175 rt->rt_flags |= RTCF_REDIRECTED;
1176 rt->rt_gateway = fnhe->fnhe_gw;
1177 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001178
David S. Millerc5038a82012-07-31 15:02:02 -07001179 orig = rcu_dereference(fnhe->fnhe_rth);
1180 rcu_assign_pointer(fnhe->fnhe_rth, rt);
1181 if (orig)
1182 rt_free(orig);
1183
1184 fnhe->fnhe_stamp = jiffies;
David S. Millercaacf052012-07-31 15:06:50 -07001185 ret = true;
David S. Millerc5038a82012-07-31 15:02:02 -07001186 } else {
1187 /* Routes we intend to cache in nexthop exception have
1188 * the DST_NOCACHE bit clear. However, if we are
1189 * unsuccessful at storing this route into the cache
1190 * we really need to set it.
1191 */
1192 rt->dst.flags |= DST_NOCACHE;
1193 }
1194 spin_unlock_bh(&fnhe_lock);
David S. Millercaacf052012-07-31 15:06:50 -07001195
1196 return ret;
Eric Dumazet54764bb2012-07-31 01:08:23 +00001197}
1198
David S. Millercaacf052012-07-31 15:06:50 -07001199static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
David S. Millerf2bb4be2012-07-17 12:20:47 -07001200{
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001201 struct rtable *orig, *prev, **p;
David S. Millercaacf052012-07-31 15:06:50 -07001202 bool ret = true;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001203
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001204 if (rt_is_input_route(rt)) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001205 p = (struct rtable **)&nh->nh_rth_input;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001206 } else {
1207 if (!nh->nh_pcpu_rth_output)
1208 goto nocache;
1209 p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output);
1210 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001211 orig = *p;
1212
1213 prev = cmpxchg(p, orig, rt);
1214 if (prev == orig) {
David S. Millerf2bb4be2012-07-17 12:20:47 -07001215 if (orig)
Eric Dumazet54764bb2012-07-31 01:08:23 +00001216 rt_free(orig);
David S. Millerc6cffba2012-07-26 11:14:38 +00001217 } else {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001218 /* Routes we intend to cache in the FIB nexthop have
1219 * the DST_NOCACHE bit clear. However, if we are
1220 * unsuccessful at storing this route into the cache
1221 * we really need to set it.
1222 */
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001223nocache:
Eric Dumazet54764bb2012-07-31 01:08:23 +00001224 rt->dst.flags |= DST_NOCACHE;
David S. Millercaacf052012-07-31 15:06:50 -07001225 ret = false;
1226 }
1227
1228 return ret;
1229}
1230
1231static DEFINE_SPINLOCK(rt_uncached_lock);
1232static LIST_HEAD(rt_uncached_list);
1233
1234static void rt_add_uncached_list(struct rtable *rt)
1235{
1236 spin_lock_bh(&rt_uncached_lock);
1237 list_add_tail(&rt->rt_uncached, &rt_uncached_list);
1238 spin_unlock_bh(&rt_uncached_lock);
1239}
1240
1241static void ipv4_dst_destroy(struct dst_entry *dst)
1242{
1243 struct rtable *rt = (struct rtable *) dst;
1244
Eric Dumazet78df76a2012-08-24 05:40:47 +00001245 if (!list_empty(&rt->rt_uncached)) {
David S. Millercaacf052012-07-31 15:06:50 -07001246 spin_lock_bh(&rt_uncached_lock);
1247 list_del(&rt->rt_uncached);
1248 spin_unlock_bh(&rt_uncached_lock);
1249 }
1250}
1251
1252void rt_flush_dev(struct net_device *dev)
1253{
1254 if (!list_empty(&rt_uncached_list)) {
1255 struct net *net = dev_net(dev);
1256 struct rtable *rt;
1257
1258 spin_lock_bh(&rt_uncached_lock);
1259 list_for_each_entry(rt, &rt_uncached_list, rt_uncached) {
1260 if (rt->dst.dev != dev)
1261 continue;
1262 rt->dst.dev = net->loopback_dev;
1263 dev_hold(rt->dst.dev);
1264 dev_put(dev);
1265 }
1266 spin_unlock_bh(&rt_uncached_lock);
David S. Miller4895c772012-07-17 04:19:00 -07001267 }
1268}
1269
Eric Dumazet4331deb2012-07-25 05:11:23 +00001270static bool rt_cache_valid(const struct rtable *rt)
David S. Millerd2d68ba92012-07-17 12:58:50 -07001271{
Eric Dumazet4331deb2012-07-25 05:11:23 +00001272 return rt &&
1273 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1274 !rt_is_expired(rt);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001275}
1276
David S. Millerf2bb4be2012-07-17 12:20:47 -07001277static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
David S. Miller5e2b61f2011-03-04 21:47:09 -08001278 const struct fib_result *res,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001279 struct fib_nh_exception *fnhe,
David S. Miller982721f2011-02-16 21:44:24 -08001280 struct fib_info *fi, u16 type, u32 itag)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001281{
David S. Millercaacf052012-07-31 15:06:50 -07001282 bool cached = false;
1283
Linus Torvalds1da177e2005-04-16 15:20:36 -07001284 if (fi) {
David S. Miller4895c772012-07-17 04:19:00 -07001285 struct fib_nh *nh = &FIB_RES_NH(*res);
1286
1287 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
1288 rt->rt_gateway = nh->nh_gw;
David S. Miller28605832012-07-17 14:55:59 -07001289 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
Patrick McHardyc7066f72011-01-14 13:36:42 +01001290#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf2bb4be2012-07-17 12:20:47 -07001291 rt->dst.tclassid = nh->nh_tclassid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001292#endif
David S. Millerc5038a82012-07-31 15:02:02 -07001293 if (unlikely(fnhe))
David S. Millercaacf052012-07-31 15:06:50 -07001294 cached = rt_bind_exception(rt, fnhe, daddr);
David S. Millerc5038a82012-07-31 15:02:02 -07001295 else if (!(rt->dst.flags & DST_NOCACHE))
David S. Millercaacf052012-07-31 15:06:50 -07001296 cached = rt_cache_route(nh, rt);
David S. Millerd33e4552010-12-14 13:01:14 -08001297 }
David S. Millercaacf052012-07-31 15:06:50 -07001298 if (unlikely(!cached))
1299 rt_add_uncached_list(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001300
Patrick McHardyc7066f72011-01-14 13:36:42 +01001301#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001302#ifdef CONFIG_IP_MULTIPLE_TABLES
David S. Miller85b91b02012-07-13 08:21:29 -07001303 set_class_tag(rt, res->tclassid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001304#endif
1305 set_class_tag(rt, itag);
1306#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001307}
1308
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001309static struct rtable *rt_dst_alloc(struct net_device *dev,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001310 bool nopolicy, bool noxfrm, bool will_cache)
David S. Miller0c4dcd52011-02-17 15:42:37 -08001311{
David S. Millerf5b0a872012-07-19 12:31:33 -07001312 return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
David S. Millerc6cffba2012-07-26 11:14:38 +00001313 (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001314 (nopolicy ? DST_NOPOLICY : 0) |
1315 (noxfrm ? DST_NOXFRM : 0));
David S. Miller0c4dcd52011-02-17 15:42:37 -08001316}
1317
Eric Dumazet96d36222010-06-02 19:21:31 +00001318/* called in rcu_read_lock() section */
Al Viro9e12bb22006-09-26 21:25:20 -07001319static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001320 u8 tos, struct net_device *dev, int our)
1321{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001322 struct rtable *rth;
Eric Dumazet96d36222010-06-02 19:21:31 +00001323 struct in_device *in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001324 u32 itag = 0;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001325 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001326
1327 /* Primary sanity checks. */
1328
1329 if (in_dev == NULL)
1330 return -EINVAL;
1331
Jan Engelhardt1e637c72008-01-21 03:18:08 -08001332 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
Thomas Grafd0daebc32012-06-12 00:44:01 +00001333 skb->protocol != htons(ETH_P_IP))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334 goto e_inval;
1335
Thomas Grafd0daebc32012-06-12 00:44:01 +00001336 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1337 if (ipv4_is_loopback(saddr))
1338 goto e_inval;
1339
Joe Perchesf97c1e02007-12-16 13:45:43 -08001340 if (ipv4_is_zeronet(saddr)) {
1341 if (!ipv4_is_local_multicast(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001342 goto e_inval;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001343 } else {
David S. Miller9e56e382012-06-28 18:54:02 -07001344 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1345 in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001346 if (err < 0)
1347 goto e_err;
1348 }
Benjamin LaHaise4e7b2f12012-03-27 15:55:32 +00001349 rth = rt_dst_alloc(dev_net(dev)->loopback_dev,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001350 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001351 if (!rth)
1352 goto e_nobufs;
1353
Patrick McHardyc7066f72011-01-14 13:36:42 +01001354#ifdef CONFIG_IP_ROUTE_CLASSID
Changli Gaod8d1f302010-06-10 23:31:35 -07001355 rth->dst.tclassid = itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001356#endif
David S. Millercf911662011-04-28 14:31:47 -07001357 rth->dst.output = ip_rt_bug;
1358
Denis V. Luneve84f84f2008-07-05 19:04:32 -07001359 rth->rt_genid = rt_genid(dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360 rth->rt_flags = RTCF_MULTICAST;
Eric Dumazet29e75252008-01-31 17:05:09 -08001361 rth->rt_type = RTN_MULTICAST;
David S. Miller9917e1e82012-07-17 14:44:26 -07001362 rth->rt_is_input= 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001363 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001364 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001365 rth->rt_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001366 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001367 if (our) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001368 rth->dst.input= ip_local_deliver;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369 rth->rt_flags |= RTCF_LOCAL;
1370 }
1371
1372#ifdef CONFIG_IP_MROUTE
Joe Perchesf97c1e02007-12-16 13:45:43 -08001373 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
Changli Gaod8d1f302010-06-10 23:31:35 -07001374 rth->dst.input = ip_mr_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375#endif
1376 RT_CACHE_STAT_INC(in_slow_mc);
1377
David S. Miller89aef892012-07-17 11:00:09 -07001378 skb_dst_set(skb, &rth->dst);
1379 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380
1381e_nobufs:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001382 return -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001383e_inval:
Eric Dumazet96d36222010-06-02 19:21:31 +00001384 return -EINVAL;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001385e_err:
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001386 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387}
1388
1389
1390static void ip_handle_martian_source(struct net_device *dev,
1391 struct in_device *in_dev,
1392 struct sk_buff *skb,
Al Viro9e12bb22006-09-26 21:25:20 -07001393 __be32 daddr,
1394 __be32 saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001395{
1396 RT_CACHE_STAT_INC(in_martian_src);
1397#ifdef CONFIG_IP_ROUTE_VERBOSE
1398 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1399 /*
1400 * RFC1812 recommendation, if source is martian,
1401 * the only hint is MAC header.
1402 */
Joe Perches058bd4d2012-03-11 18:36:11 +00001403 pr_warn("martian source %pI4 from %pI4, on dev %s\n",
Harvey Harrison673d57e2008-10-31 00:53:57 -07001404 &daddr, &saddr, dev->name);
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001405 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
Joe Perches058bd4d2012-03-11 18:36:11 +00001406 print_hex_dump(KERN_WARNING, "ll header: ",
1407 DUMP_PREFIX_OFFSET, 16, 1,
1408 skb_mac_header(skb),
1409 dev->hard_header_len, true);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001410 }
1411 }
1412#endif
1413}
1414
Eric Dumazet47360222010-06-03 04:13:21 +00001415/* called in rcu_read_lock() section */
Stephen Hemminger5969f712008-04-10 01:52:09 -07001416static int __mkroute_input(struct sk_buff *skb,
David S. Miller982721f2011-02-16 21:44:24 -08001417 const struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001418 struct in_device *in_dev,
David S. Millerc6cffba2012-07-26 11:14:38 +00001419 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001420{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001421 struct rtable *rth;
1422 int err;
1423 struct in_device *out_dev;
Eric Dumazet47360222010-06-03 04:13:21 +00001424 unsigned int flags = 0;
David S. Millerd2d68ba92012-07-17 12:58:50 -07001425 bool do_cache;
Al Virod9c9df82006-09-26 21:28:14 -07001426 u32 itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001427
1428 /* get a working reference to the output device */
Eric Dumazet47360222010-06-03 04:13:21 +00001429 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001430 if (out_dev == NULL) {
Joe Perchese87cc472012-05-13 21:56:26 +00001431 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001432 return -EINVAL;
1433 }
1434
1435
Michael Smith5c04c812011-04-07 04:51:50 +00001436 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
David S. Miller9e56e382012-06-28 18:54:02 -07001437 in_dev->dev, in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001438 if (err < 0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001439 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001440 saddr);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001441
Linus Torvalds1da177e2005-04-16 15:20:36 -07001442 goto cleanup;
1443 }
1444
Thomas Graf51b77ca2008-06-03 16:36:01 -07001445 if (out_dev == in_dev && err &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001446 (IN_DEV_SHARED_MEDIA(out_dev) ||
1447 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1448 flags |= RTCF_DOREDIRECT;
1449
1450 if (skb->protocol != htons(ETH_P_IP)) {
1451 /* Not IP (i.e. ARP). Do not create route, if it is
1452 * invalid for proxy arp. DNAT routes are always valid.
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001453 *
1454 * Proxy arp feature have been extended to allow, ARP
1455 * replies back to the same interface, to support
1456 * Private VLAN switch technologies. See arp.c.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001457 */
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001458 if (out_dev == in_dev &&
1459 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001460 err = -EINVAL;
1461 goto cleanup;
1462 }
1463 }
1464
David S. Millerd2d68ba92012-07-17 12:58:50 -07001465 do_cache = false;
1466 if (res->fi) {
David S. Millerfe3edf42012-07-23 13:22:20 -07001467 if (!itag) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001468 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001469 if (rt_cache_valid(rth)) {
David S. Millerc6cffba2012-07-26 11:14:38 +00001470 skb_dst_set_noref(skb, &rth->dst);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001471 goto out;
1472 }
1473 do_cache = true;
1474 }
1475 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001476
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001477 rth = rt_dst_alloc(out_dev->dev,
1478 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerd2d68ba92012-07-17 12:58:50 -07001479 IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001480 if (!rth) {
1481 err = -ENOBUFS;
1482 goto cleanup;
1483 }
1484
David S. Millercf911662011-04-28 14:31:47 -07001485 rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
1486 rth->rt_flags = flags;
1487 rth->rt_type = res->type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001488 rth->rt_is_input = 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001489 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001490 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001491 rth->rt_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001492 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001493
Changli Gaod8d1f302010-06-10 23:31:35 -07001494 rth->dst.input = ip_forward;
1495 rth->dst.output = ip_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001496
David S. Millerd2d68ba92012-07-17 12:58:50 -07001497 rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag);
David S. Millerc6cffba2012-07-26 11:14:38 +00001498 skb_dst_set(skb, &rth->dst);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001499out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500 err = 0;
1501 cleanup:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001502 return err;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001503}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001504
Stephen Hemminger5969f712008-04-10 01:52:09 -07001505static int ip_mkroute_input(struct sk_buff *skb,
1506 struct fib_result *res,
David S. Miller68a5e3d2011-03-11 20:07:33 -05001507 const struct flowi4 *fl4,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001508 struct in_device *in_dev,
1509 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001510{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001511#ifdef CONFIG_IP_ROUTE_MULTIPATH
David S. Millerff3fccb2011-03-10 16:23:24 -08001512 if (res->fi && res->fi->fib_nhs > 1)
David S. Miller1b7fe5932011-03-10 17:01:16 -08001513 fib_select_multipath(res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001514#endif
1515
1516 /* create a routing cache entry */
David S. Millerc6cffba2012-07-26 11:14:38 +00001517 return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001518}
1519
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520/*
1521 * NOTE. We drop all the packets that has local source
1522 * addresses, because every properly looped back packet
1523 * must have correct destination already attached by output routine.
1524 *
1525 * Such approach solves two big problems:
1526 * 1. Not simplex devices are handled properly.
1527 * 2. IP spoofing attempts are filtered with 100% of guarantee.
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001528 * called with rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001529 */
1530
Al Viro9e12bb22006-09-26 21:25:20 -07001531static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
David S. Millerc10237e2012-06-27 17:05:06 -07001532 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001533{
1534 struct fib_result res;
Eric Dumazet96d36222010-06-02 19:21:31 +00001535 struct in_device *in_dev = __in_dev_get_rcu(dev);
David S. Miller68a5e3d2011-03-11 20:07:33 -05001536 struct flowi4 fl4;
Eric Dumazet95c96172012-04-15 05:58:06 +00001537 unsigned int flags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001538 u32 itag = 0;
Eric Dumazet95c96172012-04-15 05:58:06 +00001539 struct rtable *rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001540 int err = -EINVAL;
Daniel Baluta5e73ea12012-04-15 01:34:41 +00001541 struct net *net = dev_net(dev);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001542 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001543
1544 /* IP on this device is disabled. */
1545
1546 if (!in_dev)
1547 goto out;
1548
1549 /* Check for the most weird martians, which can be not detected
1550 by fib_lookup.
1551 */
1552
Thomas Grafd0daebc32012-06-12 00:44:01 +00001553 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001554 goto martian_source;
1555
David S. Millerd2d68ba92012-07-17 12:58:50 -07001556 res.fi = NULL;
Andy Walls27a954b2010-10-17 15:11:22 +00001557 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001558 goto brd_input;
1559
1560 /* Accept zero addresses only to limited broadcast;
1561 * I even do not know to fix it or not. Waiting for complains :-)
1562 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001563 if (ipv4_is_zeronet(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001564 goto martian_source;
1565
Thomas Grafd0daebc32012-06-12 00:44:01 +00001566 if (ipv4_is_zeronet(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001567 goto martian_destination;
1568
Thomas Grafd0daebc32012-06-12 00:44:01 +00001569 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) {
1570 if (ipv4_is_loopback(daddr))
1571 goto martian_destination;
1572
1573 if (ipv4_is_loopback(saddr))
1574 goto martian_source;
1575 }
1576
Linus Torvalds1da177e2005-04-16 15:20:36 -07001577 /*
1578 * Now we are ready to route packet.
1579 */
David S. Miller68a5e3d2011-03-11 20:07:33 -05001580 fl4.flowi4_oif = 0;
1581 fl4.flowi4_iif = dev->ifindex;
1582 fl4.flowi4_mark = skb->mark;
1583 fl4.flowi4_tos = tos;
1584 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
1585 fl4.daddr = daddr;
1586 fl4.saddr = saddr;
1587 err = fib_lookup(net, &fl4, &res);
David S. Miller251da412012-06-26 16:27:09 -07001588 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001589 goto no_route;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001590
1591 RT_CACHE_STAT_INC(in_slow_tot);
1592
1593 if (res.type == RTN_BROADCAST)
1594 goto brd_input;
1595
1596 if (res.type == RTN_LOCAL) {
Michael Smith5c04c812011-04-07 04:51:50 +00001597 err = fib_validate_source(skb, saddr, daddr, tos,
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001598 net->loopback_dev->ifindex,
David S. Miller9e56e382012-06-28 18:54:02 -07001599 dev, in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001600 if (err < 0)
1601 goto martian_source_keep_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001602 goto local_input;
1603 }
1604
1605 if (!IN_DEV_FORWARD(in_dev))
David S. Miller251da412012-06-26 16:27:09 -07001606 goto no_route;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001607 if (res.type != RTN_UNICAST)
1608 goto martian_destination;
1609
David S. Miller68a5e3d2011-03-11 20:07:33 -05001610 err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001611out: return err;
1612
1613brd_input:
1614 if (skb->protocol != htons(ETH_P_IP))
1615 goto e_inval;
1616
David S. Miller41347dc2012-06-28 04:05:27 -07001617 if (!ipv4_is_zeronet(saddr)) {
David S. Miller9e56e382012-06-28 18:54:02 -07001618 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1619 in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001620 if (err < 0)
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001621 goto martian_source_keep_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001622 }
1623 flags |= RTCF_BROADCAST;
1624 res.type = RTN_BROADCAST;
1625 RT_CACHE_STAT_INC(in_brd);
1626
1627local_input:
David S. Millerd2d68ba92012-07-17 12:58:50 -07001628 do_cache = false;
1629 if (res.fi) {
David S. Millerfe3edf42012-07-23 13:22:20 -07001630 if (!itag) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001631 rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001632 if (rt_cache_valid(rth)) {
David S. Millerc6cffba2012-07-26 11:14:38 +00001633 skb_dst_set_noref(skb, &rth->dst);
1634 err = 0;
1635 goto out;
David S. Millerd2d68ba92012-07-17 12:58:50 -07001636 }
1637 do_cache = true;
1638 }
1639 }
1640
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001641 rth = rt_dst_alloc(net->loopback_dev,
David S. Millerd2d68ba92012-07-17 12:58:50 -07001642 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001643 if (!rth)
1644 goto e_nobufs;
1645
David S. Millercf911662011-04-28 14:31:47 -07001646 rth->dst.input= ip_local_deliver;
Changli Gaod8d1f302010-06-10 23:31:35 -07001647 rth->dst.output= ip_rt_bug;
David S. Millercf911662011-04-28 14:31:47 -07001648#ifdef CONFIG_IP_ROUTE_CLASSID
1649 rth->dst.tclassid = itag;
1650#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001651
David S. Millercf911662011-04-28 14:31:47 -07001652 rth->rt_genid = rt_genid(net);
1653 rth->rt_flags = flags|RTCF_LOCAL;
1654 rth->rt_type = res.type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001655 rth->rt_is_input = 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001656 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001657 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001658 rth->rt_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001659 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001660 if (res.type == RTN_UNREACHABLE) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001661 rth->dst.input= ip_error;
1662 rth->dst.error= -err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001663 rth->rt_flags &= ~RTCF_LOCAL;
1664 }
David S. Millerd2d68ba92012-07-17 12:58:50 -07001665 if (do_cache)
1666 rt_cache_route(&FIB_RES_NH(res), rth);
David S. Miller89aef892012-07-17 11:00:09 -07001667 skb_dst_set(skb, &rth->dst);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001668 err = 0;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001669 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001670
1671no_route:
1672 RT_CACHE_STAT_INC(in_no_route);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001673 res.type = RTN_UNREACHABLE;
Mitsuru Chinen7f538782007-12-07 01:07:24 -08001674 if (err == -ESRCH)
1675 err = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676 goto local_input;
1677
1678 /*
1679 * Do not cache martian addresses: they should be logged (RFC1812)
1680 */
1681martian_destination:
1682 RT_CACHE_STAT_INC(in_martian_dst);
1683#ifdef CONFIG_IP_ROUTE_VERBOSE
Joe Perchese87cc472012-05-13 21:56:26 +00001684 if (IN_DEV_LOG_MARTIANS(in_dev))
1685 net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
1686 &daddr, &saddr, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001687#endif
Dietmar Eggemann2c2910a2005-06-28 13:06:23 -07001688
Linus Torvalds1da177e2005-04-16 15:20:36 -07001689e_inval:
1690 err = -EINVAL;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001691 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001692
1693e_nobufs:
1694 err = -ENOBUFS;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001695 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001696
1697martian_source:
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001698 err = -EINVAL;
1699martian_source_keep_err:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001700 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001701 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001702}
1703
David S. Millerc6cffba2012-07-26 11:14:38 +00001704int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1705 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001706{
Eric Dumazet96d36222010-06-02 19:21:31 +00001707 int res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001708
Eric Dumazet96d36222010-06-02 19:21:31 +00001709 rcu_read_lock();
1710
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711 /* Multicast recognition logic is moved from route cache to here.
1712 The problem was that too many Ethernet cards have broken/missing
1713 hardware multicast filters :-( As result the host on multicasting
1714 network acquires a lot of useless route cache entries, sort of
1715 SDR messages from all the world. Now we try to get rid of them.
1716 Really, provided software IP multicast filter is organized
1717 reasonably (at least, hashed), it does not result in a slowdown
1718 comparing with route cache reject entries.
1719 Note, that multicast routers are not affected, because
1720 route cache entry is created eventually.
1721 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001722 if (ipv4_is_multicast(daddr)) {
Eric Dumazet96d36222010-06-02 19:21:31 +00001723 struct in_device *in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001724
Eric Dumazet96d36222010-06-02 19:21:31 +00001725 if (in_dev) {
David S. Millerdbdd9a52011-03-10 16:34:38 -08001726 int our = ip_check_mc_rcu(in_dev, daddr, saddr,
1727 ip_hdr(skb)->protocol);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001728 if (our
1729#ifdef CONFIG_IP_MROUTE
Joe Perches9d4fb272009-11-23 10:41:23 -08001730 ||
1731 (!ipv4_is_local_multicast(daddr) &&
1732 IN_DEV_MFORWARD(in_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001733#endif
Joe Perches9d4fb272009-11-23 10:41:23 -08001734 ) {
Eric Dumazet96d36222010-06-02 19:21:31 +00001735 int res = ip_route_input_mc(skb, daddr, saddr,
1736 tos, dev, our);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001737 rcu_read_unlock();
Eric Dumazet96d36222010-06-02 19:21:31 +00001738 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001739 }
1740 }
1741 rcu_read_unlock();
1742 return -EINVAL;
1743 }
David S. Millerc10237e2012-06-27 17:05:06 -07001744 res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
Eric Dumazet96d36222010-06-02 19:21:31 +00001745 rcu_read_unlock();
1746 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001747}
David S. Millerc6cffba2012-07-26 11:14:38 +00001748EXPORT_SYMBOL(ip_route_input_noref);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001749
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001750/* called with rcu_read_lock() */
David S. Miller982721f2011-02-16 21:44:24 -08001751static struct rtable *__mkroute_output(const struct fib_result *res,
David Miller1a00fee2012-07-01 02:02:56 +00001752 const struct flowi4 *fl4, int orig_oif,
Julian Anastasovf61759e2011-12-02 11:39:42 +00001753 struct net_device *dev_out,
David S. Miller5ada5522011-02-17 15:29:00 -08001754 unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001755{
David S. Miller982721f2011-02-16 21:44:24 -08001756 struct fib_info *fi = res->fi;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001757 struct fib_nh_exception *fnhe;
David S. Miller5ada5522011-02-17 15:29:00 -08001758 struct in_device *in_dev;
David S. Miller982721f2011-02-16 21:44:24 -08001759 u16 type = res->type;
David S. Miller5ada5522011-02-17 15:29:00 -08001760 struct rtable *rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001761
Thomas Grafd0daebc32012-06-12 00:44:01 +00001762 in_dev = __in_dev_get_rcu(dev_out);
1763 if (!in_dev)
David S. Miller5ada5522011-02-17 15:29:00 -08001764 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001765
Thomas Grafd0daebc32012-06-12 00:44:01 +00001766 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1767 if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
1768 return ERR_PTR(-EINVAL);
1769
David S. Miller68a5e3d2011-03-11 20:07:33 -05001770 if (ipv4_is_lbcast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08001771 type = RTN_BROADCAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001772 else if (ipv4_is_multicast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08001773 type = RTN_MULTICAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001774 else if (ipv4_is_zeronet(fl4->daddr))
David S. Miller5ada5522011-02-17 15:29:00 -08001775 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001776
1777 if (dev_out->flags & IFF_LOOPBACK)
1778 flags |= RTCF_LOCAL;
1779
David S. Miller982721f2011-02-16 21:44:24 -08001780 if (type == RTN_BROADCAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001781 flags |= RTCF_BROADCAST | RTCF_LOCAL;
David S. Miller982721f2011-02-16 21:44:24 -08001782 fi = NULL;
1783 } else if (type == RTN_MULTICAST) {
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00001784 flags |= RTCF_MULTICAST | RTCF_LOCAL;
David S. Miller813b3b52011-04-28 14:48:42 -07001785 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
1786 fl4->flowi4_proto))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001787 flags &= ~RTCF_LOCAL;
1788 /* If multicast route do not exist use
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00001789 * default one, but do not gateway in this case.
1790 * Yes, it is hack.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001791 */
David S. Miller982721f2011-02-16 21:44:24 -08001792 if (fi && res->prefixlen < 4)
1793 fi = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001794 }
1795
David S. Millerf2bb4be2012-07-17 12:20:47 -07001796 fnhe = NULL;
1797 if (fi) {
David S. Millerc5038a82012-07-31 15:02:02 -07001798 struct rtable __rcu **prth;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001799
David S. Millerc5038a82012-07-31 15:02:02 -07001800 fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr);
1801 if (fnhe)
1802 prth = &fnhe->fnhe_rth;
1803 else
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001804 prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output);
David S. Millerc5038a82012-07-31 15:02:02 -07001805 rth = rcu_dereference(*prth);
1806 if (rt_cache_valid(rth)) {
1807 dst_hold(&rth->dst);
1808 return rth;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001809 }
1810 }
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001811 rth = rt_dst_alloc(dev_out,
1812 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerf2bb4be2012-07-17 12:20:47 -07001813 IN_DEV_CONF_GET(in_dev, NOXFRM),
David S. Millerc5038a82012-07-31 15:02:02 -07001814 fi);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00001815 if (!rth)
David S. Miller5ada5522011-02-17 15:29:00 -08001816 return ERR_PTR(-ENOBUFS);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00001817
David S. Millercf911662011-04-28 14:31:47 -07001818 rth->dst.output = ip_output;
1819
David S. Millercf911662011-04-28 14:31:47 -07001820 rth->rt_genid = rt_genid(dev_net(dev_out));
1821 rth->rt_flags = flags;
1822 rth->rt_type = type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001823 rth->rt_is_input = 0;
David S. Miller13378ca2012-07-23 13:57:45 -07001824 rth->rt_iif = orig_oif ? : 0;
David S. Miller59436342012-07-10 06:58:42 -07001825 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001826 rth->rt_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001827 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001828
1829 RT_CACHE_STAT_INC(out_slow_tot);
1830
David S. Miller41347dc2012-06-28 04:05:27 -07001831 if (flags & RTCF_LOCAL)
Changli Gaod8d1f302010-06-10 23:31:35 -07001832 rth->dst.input = ip_local_deliver;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001833 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001834 if (flags & RTCF_LOCAL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001835 !(dev_out->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001836 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001837 RT_CACHE_STAT_INC(out_slow_mc);
1838 }
1839#ifdef CONFIG_IP_MROUTE
David S. Miller982721f2011-02-16 21:44:24 -08001840 if (type == RTN_MULTICAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001841 if (IN_DEV_MFORWARD(in_dev) &&
David S. Miller813b3b52011-04-28 14:48:42 -07001842 !ipv4_is_local_multicast(fl4->daddr)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001843 rth->dst.input = ip_mr_input;
1844 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001845 }
1846 }
1847#endif
1848 }
1849
David S. Millerf2bb4be2012-07-17 12:20:47 -07001850 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001851
David S. Miller5ada5522011-02-17 15:29:00 -08001852 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001853}
1854
Linus Torvalds1da177e2005-04-16 15:20:36 -07001855/*
1856 * Major route resolver routine.
1857 */
1858
David S. Miller89aef892012-07-17 11:00:09 -07001859struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001860{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001861 struct net_device *dev_out = NULL;
Julian Anastasovf61759e2011-12-02 11:39:42 +00001862 __u8 tos = RT_FL_TOS(fl4);
David S. Miller813b3b52011-04-28 14:48:42 -07001863 unsigned int flags = 0;
1864 struct fib_result res;
David S. Miller5ada5522011-02-17 15:29:00 -08001865 struct rtable *rth;
David S. Miller813b3b52011-04-28 14:48:42 -07001866 int orig_oif;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001867
David S. Miller85b91b02012-07-13 08:21:29 -07001868 res.tclassid = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001869 res.fi = NULL;
David S. Miller8b96d222012-06-11 02:01:56 -07001870 res.table = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001871
David S. Miller813b3b52011-04-28 14:48:42 -07001872 orig_oif = fl4->flowi4_oif;
1873
1874 fl4->flowi4_iif = net->loopback_dev->ifindex;
1875 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
1876 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
1877 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
David S. Miller44713b62011-03-04 21:24:47 -08001878
David S. Miller010c2702011-02-17 15:37:09 -08001879 rcu_read_lock();
David S. Miller813b3b52011-04-28 14:48:42 -07001880 if (fl4->saddr) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08001881 rth = ERR_PTR(-EINVAL);
David S. Miller813b3b52011-04-28 14:48:42 -07001882 if (ipv4_is_multicast(fl4->saddr) ||
1883 ipv4_is_lbcast(fl4->saddr) ||
1884 ipv4_is_zeronet(fl4->saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001885 goto out;
1886
Linus Torvalds1da177e2005-04-16 15:20:36 -07001887 /* I removed check for oif == dev_out->oif here.
1888 It was wrong for two reasons:
Denis V. Lunev1ab35272008-01-22 22:04:30 -08001889 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
1890 is assigned to multiple interfaces.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001891 2. Moreover, we are allowed to send packets with saddr
1892 of another iface. --ANK
1893 */
1894
David S. Miller813b3b52011-04-28 14:48:42 -07001895 if (fl4->flowi4_oif == 0 &&
1896 (ipv4_is_multicast(fl4->daddr) ||
1897 ipv4_is_lbcast(fl4->daddr))) {
Julian Anastasova210d012008-10-01 07:28:28 -07001898 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07001899 dev_out = __ip_dev_find(net, fl4->saddr, false);
Julian Anastasova210d012008-10-01 07:28:28 -07001900 if (dev_out == NULL)
1901 goto out;
1902
Linus Torvalds1da177e2005-04-16 15:20:36 -07001903 /* Special hack: user can direct multicasts
1904 and limited broadcast via necessary interface
1905 without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
1906 This hack is not just for fun, it allows
1907 vic,vat and friends to work.
1908 They bind socket to loopback, set ttl to zero
1909 and expect that it will work.
1910 From the viewpoint of routing cache they are broken,
1911 because we are not allowed to build multicast path
1912 with loopback source addr (look, routing cache
1913 cannot know, that ttl is zero, so that packet
1914 will not leave this host and route is valid).
1915 Luckily, this hack is good workaround.
1916 */
1917
David S. Miller813b3b52011-04-28 14:48:42 -07001918 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001919 goto make_route;
1920 }
Julian Anastasova210d012008-10-01 07:28:28 -07001921
David S. Miller813b3b52011-04-28 14:48:42 -07001922 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
Julian Anastasova210d012008-10-01 07:28:28 -07001923 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07001924 if (!__ip_dev_find(net, fl4->saddr, false))
Julian Anastasova210d012008-10-01 07:28:28 -07001925 goto out;
Julian Anastasova210d012008-10-01 07:28:28 -07001926 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001927 }
1928
1929
David S. Miller813b3b52011-04-28 14:48:42 -07001930 if (fl4->flowi4_oif) {
1931 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001932 rth = ERR_PTR(-ENODEV);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001933 if (dev_out == NULL)
1934 goto out;
Herbert Xue5ed6392005-10-03 14:35:55 -07001935
1936 /* RACE: Check return value of inet_select_addr instead. */
Eric Dumazetfc75fc82010-12-22 04:39:39 +00001937 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08001938 rth = ERR_PTR(-ENETUNREACH);
Eric Dumazetfc75fc82010-12-22 04:39:39 +00001939 goto out;
1940 }
David S. Miller813b3b52011-04-28 14:48:42 -07001941 if (ipv4_is_local_multicast(fl4->daddr) ||
1942 ipv4_is_lbcast(fl4->daddr)) {
1943 if (!fl4->saddr)
1944 fl4->saddr = inet_select_addr(dev_out, 0,
1945 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001946 goto make_route;
1947 }
David S. Miller813b3b52011-04-28 14:48:42 -07001948 if (fl4->saddr) {
1949 if (ipv4_is_multicast(fl4->daddr))
1950 fl4->saddr = inet_select_addr(dev_out, 0,
1951 fl4->flowi4_scope);
1952 else if (!fl4->daddr)
1953 fl4->saddr = inet_select_addr(dev_out, 0,
1954 RT_SCOPE_HOST);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001955 }
1956 }
1957
David S. Miller813b3b52011-04-28 14:48:42 -07001958 if (!fl4->daddr) {
1959 fl4->daddr = fl4->saddr;
1960 if (!fl4->daddr)
1961 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
Denis V. Lunevb40afd02008-01-22 22:06:19 -08001962 dev_out = net->loopback_dev;
David S. Miller813b3b52011-04-28 14:48:42 -07001963 fl4->flowi4_oif = net->loopback_dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001964 res.type = RTN_LOCAL;
1965 flags |= RTCF_LOCAL;
1966 goto make_route;
1967 }
1968
David S. Miller813b3b52011-04-28 14:48:42 -07001969 if (fib_lookup(net, fl4, &res)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001970 res.fi = NULL;
David S. Miller8b96d222012-06-11 02:01:56 -07001971 res.table = NULL;
David S. Miller813b3b52011-04-28 14:48:42 -07001972 if (fl4->flowi4_oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001973 /* Apparently, routing tables are wrong. Assume,
1974 that the destination is on link.
1975
1976 WHY? DW.
1977 Because we are allowed to send to iface
1978 even if it has NO routes and NO assigned
1979 addresses. When oif is specified, routing
1980 tables are looked up with only one purpose:
1981 to catch if destination is gatewayed, rather than
1982 direct. Moreover, if MSG_DONTROUTE is set,
1983 we send packet, ignoring both routing tables
1984 and ifaddr state. --ANK
1985
1986
1987 We could make it even if oif is unknown,
1988 likely IPv6, but we do not.
1989 */
1990
David S. Miller813b3b52011-04-28 14:48:42 -07001991 if (fl4->saddr == 0)
1992 fl4->saddr = inet_select_addr(dev_out, 0,
1993 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001994 res.type = RTN_UNICAST;
1995 goto make_route;
1996 }
David S. Millerb23dd4f2011-03-02 14:31:35 -08001997 rth = ERR_PTR(-ENETUNREACH);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001998 goto out;
1999 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002000
2001 if (res.type == RTN_LOCAL) {
David S. Miller813b3b52011-04-28 14:48:42 -07002002 if (!fl4->saddr) {
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002003 if (res.fi->fib_prefsrc)
David S. Miller813b3b52011-04-28 14:48:42 -07002004 fl4->saddr = res.fi->fib_prefsrc;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002005 else
David S. Miller813b3b52011-04-28 14:48:42 -07002006 fl4->saddr = fl4->daddr;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002007 }
Denis V. Lunevb40afd02008-01-22 22:06:19 -08002008 dev_out = net->loopback_dev;
David S. Miller813b3b52011-04-28 14:48:42 -07002009 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002010 flags |= RTCF_LOCAL;
2011 goto make_route;
2012 }
2013
2014#ifdef CONFIG_IP_ROUTE_MULTIPATH
David S. Miller813b3b52011-04-28 14:48:42 -07002015 if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0)
David S. Miller1b7fe5932011-03-10 17:01:16 -08002016 fib_select_multipath(&res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002017 else
2018#endif
David S. Miller21d8c492011-04-14 14:49:37 -07002019 if (!res.prefixlen &&
2020 res.table->tb_num_default > 1 &&
David S. Miller813b3b52011-04-28 14:48:42 -07002021 res.type == RTN_UNICAST && !fl4->flowi4_oif)
David S. Miller0c838ff2011-01-31 16:16:50 -08002022 fib_select_default(&res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002023
David S. Miller813b3b52011-04-28 14:48:42 -07002024 if (!fl4->saddr)
2025 fl4->saddr = FIB_RES_PREFSRC(net, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002026
Linus Torvalds1da177e2005-04-16 15:20:36 -07002027 dev_out = FIB_RES_DEV(res);
David S. Miller813b3b52011-04-28 14:48:42 -07002028 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002029
2030
2031make_route:
David Miller1a00fee2012-07-01 02:02:56 +00002032 rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002033
David S. Miller010c2702011-02-17 15:37:09 -08002034out:
2035 rcu_read_unlock();
David S. Millerb23dd4f2011-03-02 14:31:35 -08002036 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002037}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002038EXPORT_SYMBOL_GPL(__ip_route_output_key);
2039
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002040static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2041{
2042 return NULL;
2043}
2044
Steffen Klassertebb762f2011-11-23 02:12:51 +00002045static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
Roland Dreierec831ea2011-01-31 13:16:00 -08002046{
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002047 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2048
2049 return mtu ? : dst->dev->mtu;
Roland Dreierec831ea2011-01-31 13:16:00 -08002050}
2051
David S. Miller6700c272012-07-17 03:29:28 -07002052static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2053 struct sk_buff *skb, u32 mtu)
David S. Miller14e50e52007-05-24 18:17:54 -07002054{
2055}
2056
David S. Miller6700c272012-07-17 03:29:28 -07002057static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2058 struct sk_buff *skb)
David S. Millerb587ee32012-07-12 00:39:24 -07002059{
2060}
2061
Held Bernhard0972ddb2011-04-24 22:07:32 +00002062static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2063 unsigned long old)
2064{
2065 return NULL;
2066}
2067
David S. Miller14e50e52007-05-24 18:17:54 -07002068static struct dst_ops ipv4_dst_blackhole_ops = {
2069 .family = AF_INET,
Harvey Harrison09640e632009-02-01 00:45:17 -08002070 .protocol = cpu_to_be16(ETH_P_IP),
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002071 .check = ipv4_blackhole_dst_check,
Steffen Klassertebb762f2011-11-23 02:12:51 +00002072 .mtu = ipv4_blackhole_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -08002073 .default_advmss = ipv4_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -07002074 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
David S. Millerb587ee32012-07-12 00:39:24 -07002075 .redirect = ipv4_rt_blackhole_redirect,
Held Bernhard0972ddb2011-04-24 22:07:32 +00002076 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -07002077 .neigh_lookup = ipv4_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -07002078};
2079
David S. Miller2774c132011-03-01 14:59:04 -08002080struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -07002081{
David S. Miller2774c132011-03-01 14:59:04 -08002082 struct rtable *ort = (struct rtable *) dst_orig;
David S. Millerf5b0a872012-07-19 12:31:33 -07002083 struct rtable *rt;
David S. Miller14e50e52007-05-24 18:17:54 -07002084
David S. Millerf5b0a872012-07-19 12:31:33 -07002085 rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
David S. Miller14e50e52007-05-24 18:17:54 -07002086 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002087 struct dst_entry *new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -07002088
David S. Miller14e50e52007-05-24 18:17:54 -07002089 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -08002090 new->input = dst_discard;
2091 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -07002092
Changli Gaod8d1f302010-06-10 23:31:35 -07002093 new->dev = ort->dst.dev;
David S. Miller14e50e52007-05-24 18:17:54 -07002094 if (new->dev)
2095 dev_hold(new->dev);
2096
David S. Miller9917e1e82012-07-17 14:44:26 -07002097 rt->rt_is_input = ort->rt_is_input;
David S. Miller5e2b61f2011-03-04 21:47:09 -08002098 rt->rt_iif = ort->rt_iif;
David S. Miller59436342012-07-10 06:58:42 -07002099 rt->rt_pmtu = ort->rt_pmtu;
David S. Miller14e50e52007-05-24 18:17:54 -07002100
Denis V. Luneve84f84f2008-07-05 19:04:32 -07002101 rt->rt_genid = rt_genid(net);
David S. Miller14e50e52007-05-24 18:17:54 -07002102 rt->rt_flags = ort->rt_flags;
2103 rt->rt_type = ort->rt_type;
David S. Miller14e50e52007-05-24 18:17:54 -07002104 rt->rt_gateway = ort->rt_gateway;
David S. Miller14e50e52007-05-24 18:17:54 -07002105
David S. Millercaacf052012-07-31 15:06:50 -07002106 INIT_LIST_HEAD(&rt->rt_uncached);
2107
David S. Miller14e50e52007-05-24 18:17:54 -07002108 dst_free(new);
2109 }
2110
David S. Miller2774c132011-03-01 14:59:04 -08002111 dst_release(dst_orig);
2112
2113 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -07002114}
2115
David S. Miller9d6ec932011-03-12 01:12:47 -05002116struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
David S. Millerb23dd4f2011-03-02 14:31:35 -08002117 struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002118{
David S. Miller9d6ec932011-03-12 01:12:47 -05002119 struct rtable *rt = __ip_route_output_key(net, flp4);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002120
David S. Millerb23dd4f2011-03-02 14:31:35 -08002121 if (IS_ERR(rt))
2122 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002123
David S. Miller56157872011-05-02 14:37:45 -07002124 if (flp4->flowi4_proto)
David S. Miller9d6ec932011-03-12 01:12:47 -05002125 rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
2126 flowi4_to_flowi(flp4),
2127 sk, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002128
David S. Millerb23dd4f2011-03-02 14:31:35 -08002129 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002130}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002131EXPORT_SYMBOL_GPL(ip_route_output_flow);
2132
David S. Millerf1ce3062012-07-12 10:10:17 -07002133static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
2134 struct flowi4 *fl4, struct sk_buff *skb, u32 pid,
2135 u32 seq, int event, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002136{
Eric Dumazet511c3f92009-06-02 05:14:27 +00002137 struct rtable *rt = skb_rtable(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002138 struct rtmsg *r;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002139 struct nlmsghdr *nlh;
Steffen Klassert2bc8ca42011-10-11 01:12:02 +00002140 unsigned long expires = 0;
David S. Millerf1850712012-07-10 07:26:01 -07002141 u32 error;
Julian Anastasov521f5492012-07-20 12:02:08 +03002142 u32 metrics[RTAX_MAX];
Thomas Grafbe403ea2006-08-17 18:15:17 -07002143
2144 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
2145 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002146 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002147
2148 r = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002149 r->rtm_family = AF_INET;
2150 r->rtm_dst_len = 32;
2151 r->rtm_src_len = 0;
David Millerd6c0a4f2012-07-01 02:02:59 +00002152 r->rtm_tos = fl4->flowi4_tos;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002153 r->rtm_table = RT_TABLE_MAIN;
David S. Millerf3756b72012-04-01 20:39:02 -04002154 if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN))
2155 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002156 r->rtm_type = rt->rt_type;
2157 r->rtm_scope = RT_SCOPE_UNIVERSE;
2158 r->rtm_protocol = RTPROT_UNSPEC;
2159 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2160 if (rt->rt_flags & RTCF_NOTIFY)
2161 r->rtm_flags |= RTM_F_NOTIFY;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002162
David S. Millerf1ce3062012-07-12 10:10:17 -07002163 if (nla_put_be32(skb, RTA_DST, dst))
David S. Millerf3756b72012-04-01 20:39:02 -04002164 goto nla_put_failure;
David Miller1a00fee2012-07-01 02:02:56 +00002165 if (src) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002166 r->rtm_src_len = 32;
David Miller1a00fee2012-07-01 02:02:56 +00002167 if (nla_put_be32(skb, RTA_SRC, src))
David S. Millerf3756b72012-04-01 20:39:02 -04002168 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002169 }
David S. Millerf3756b72012-04-01 20:39:02 -04002170 if (rt->dst.dev &&
2171 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2172 goto nla_put_failure;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002173#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf3756b72012-04-01 20:39:02 -04002174 if (rt->dst.tclassid &&
2175 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2176 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002177#endif
David S. Miller41347dc2012-06-28 04:05:27 -07002178 if (!rt_is_input_route(rt) &&
David Millerd6c0a4f2012-07-01 02:02:59 +00002179 fl4->saddr != src) {
2180 if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr))
David S. Millerf3756b72012-04-01 20:39:02 -04002181 goto nla_put_failure;
2182 }
David S. Millerf8126f12012-07-13 05:03:45 -07002183 if (rt->rt_gateway &&
David S. Millerf3756b72012-04-01 20:39:02 -04002184 nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway))
2185 goto nla_put_failure;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002186
Julian Anastasov521f5492012-07-20 12:02:08 +03002187 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
2188 if (rt->rt_pmtu)
2189 metrics[RTAX_MTU - 1] = rt->rt_pmtu;
2190 if (rtnetlink_put_metrics(skb, metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -07002191 goto nla_put_failure;
2192
David Millerb4869882012-07-01 02:03:01 +00002193 if (fl4->flowi4_mark &&
2194 nla_put_be32(skb, RTA_MARK, fl4->flowi4_mark))
David S. Millerf3756b72012-04-01 20:39:02 -04002195 goto nla_put_failure;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002196
Changli Gaod8d1f302010-06-10 23:31:35 -07002197 error = rt->dst.error;
David S. Miller59436342012-07-10 06:58:42 -07002198 expires = rt->dst.expires;
2199 if (expires) {
2200 if (time_before(jiffies, expires))
2201 expires -= jiffies;
2202 else
2203 expires = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002204 }
Thomas Grafbe403ea2006-08-17 18:15:17 -07002205
David S. Millerc7537962010-11-11 17:07:48 -08002206 if (rt_is_input_route(rt)) {
David S. Millerf1ce3062012-07-12 10:10:17 -07002207 if (nla_put_u32(skb, RTA_IIF, rt->rt_iif))
2208 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002209 }
2210
David S. Millerf1850712012-07-10 07:26:01 -07002211 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002212 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002213
Thomas Grafbe403ea2006-08-17 18:15:17 -07002214 return nlmsg_end(skb, nlh);
2215
2216nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002217 nlmsg_cancel(skb, nlh);
2218 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002219}
2220
Daniel Baluta5e73ea12012-04-15 01:34:41 +00002221static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002222{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002223 struct net *net = sock_net(in_skb->sk);
Thomas Grafd889ce32006-08-17 18:15:44 -07002224 struct rtmsg *rtm;
2225 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002226 struct rtable *rt = NULL;
David Millerd6c0a4f2012-07-01 02:02:59 +00002227 struct flowi4 fl4;
Al Viro9e12bb22006-09-26 21:25:20 -07002228 __be32 dst = 0;
2229 __be32 src = 0;
2230 u32 iif;
Thomas Grafd889ce32006-08-17 18:15:44 -07002231 int err;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002232 int mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002233 struct sk_buff *skb;
2234
Thomas Grafd889ce32006-08-17 18:15:44 -07002235 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2236 if (err < 0)
2237 goto errout;
2238
2239 rtm = nlmsg_data(nlh);
2240
Linus Torvalds1da177e2005-04-16 15:20:36 -07002241 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafd889ce32006-08-17 18:15:44 -07002242 if (skb == NULL) {
2243 err = -ENOBUFS;
2244 goto errout;
2245 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002246
2247 /* Reserve room for dummy headers, this skb can pass
2248 through good chunk of routing engine.
2249 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002250 skb_reset_mac_header(skb);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07002251 skb_reset_network_header(skb);
Stephen Hemmingerd2c962b2006-04-17 17:27:11 -07002252
2253 /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07002254 ip_hdr(skb)->protocol = IPPROTO_ICMP;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002255 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2256
Al Viro17fb2c62006-09-26 22:15:25 -07002257 src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
2258 dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0;
Thomas Grafd889ce32006-08-17 18:15:44 -07002259 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002260 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002261
David Millerd6c0a4f2012-07-01 02:02:59 +00002262 memset(&fl4, 0, sizeof(fl4));
2263 fl4.daddr = dst;
2264 fl4.saddr = src;
2265 fl4.flowi4_tos = rtm->rtm_tos;
2266 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
2267 fl4.flowi4_mark = mark;
2268
Linus Torvalds1da177e2005-04-16 15:20:36 -07002269 if (iif) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002270 struct net_device *dev;
2271
Denis V. Lunev19375042008-02-28 20:52:04 -08002272 dev = __dev_get_by_index(net, iif);
Thomas Grafd889ce32006-08-17 18:15:44 -07002273 if (dev == NULL) {
2274 err = -ENODEV;
2275 goto errout_free;
2276 }
2277
Linus Torvalds1da177e2005-04-16 15:20:36 -07002278 skb->protocol = htons(ETH_P_IP);
2279 skb->dev = dev;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002280 skb->mark = mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002281 local_bh_disable();
2282 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2283 local_bh_enable();
Thomas Grafd889ce32006-08-17 18:15:44 -07002284
Eric Dumazet511c3f92009-06-02 05:14:27 +00002285 rt = skb_rtable(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07002286 if (err == 0 && rt->dst.error)
2287 err = -rt->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002288 } else {
David S. Miller9d6ec932011-03-12 01:12:47 -05002289 rt = ip_route_output_key(net, &fl4);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002290
2291 err = 0;
2292 if (IS_ERR(rt))
2293 err = PTR_ERR(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002294 }
Thomas Grafd889ce32006-08-17 18:15:44 -07002295
Linus Torvalds1da177e2005-04-16 15:20:36 -07002296 if (err)
Thomas Grafd889ce32006-08-17 18:15:44 -07002297 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002298
Changli Gaod8d1f302010-06-10 23:31:35 -07002299 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002300 if (rtm->rtm_flags & RTM_F_NOTIFY)
2301 rt->rt_flags |= RTCF_NOTIFY;
2302
David S. Millerf1ce3062012-07-12 10:10:17 -07002303 err = rt_fill_info(net, dst, src, &fl4, skb,
David Miller1a00fee2012-07-01 02:02:56 +00002304 NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
Denis V. Lunev19375042008-02-28 20:52:04 -08002305 RTM_NEWROUTE, 0, 0);
Thomas Grafd889ce32006-08-17 18:15:44 -07002306 if (err <= 0)
2307 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002308
Denis V. Lunev19375042008-02-28 20:52:04 -08002309 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafd889ce32006-08-17 18:15:44 -07002310errout:
Thomas Graf2942e902006-08-15 00:30:25 -07002311 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002312
Thomas Grafd889ce32006-08-17 18:15:44 -07002313errout_free:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002314 kfree_skb(skb);
Thomas Grafd889ce32006-08-17 18:15:44 -07002315 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002316}
2317
2318int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2319{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002320 return skb->len;
2321}
2322
2323void ip_rt_multicast_event(struct in_device *in_dev)
2324{
Nicolas Dichtelbafa6d92012-09-07 00:45:29 +00002325 rt_cache_flush(dev_net(in_dev->dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002326}
2327
2328#ifdef CONFIG_SYSCTL
Denis V. Lunev81c684d2008-07-08 03:05:28 -07002329static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002330 void __user *buffer,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002331 size_t *lenp, loff_t *ppos)
2332{
2333 if (write) {
Nicolas Dichtelbafa6d92012-09-07 00:45:29 +00002334 rt_cache_flush((struct net *)__ctl->extra1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002335 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002336 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002337
2338 return -EINVAL;
2339}
2340
Al Viroeeb61f72008-07-27 08:59:33 +01002341static ctl_table ipv4_route_table[] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002342 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002343 .procname = "gc_thresh",
2344 .data = &ipv4_dst_ops.gc_thresh,
2345 .maxlen = sizeof(int),
2346 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002347 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002348 },
2349 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002350 .procname = "max_size",
2351 .data = &ip_rt_max_size,
2352 .maxlen = sizeof(int),
2353 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002354 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002355 },
2356 {
2357 /* Deprecated. Use gc_min_interval_ms */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002358
Linus Torvalds1da177e2005-04-16 15:20:36 -07002359 .procname = "gc_min_interval",
2360 .data = &ip_rt_gc_min_interval,
2361 .maxlen = sizeof(int),
2362 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002363 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002364 },
2365 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002366 .procname = "gc_min_interval_ms",
2367 .data = &ip_rt_gc_min_interval,
2368 .maxlen = sizeof(int),
2369 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002370 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002371 },
2372 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002373 .procname = "gc_timeout",
2374 .data = &ip_rt_gc_timeout,
2375 .maxlen = sizeof(int),
2376 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002377 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002378 },
2379 {
Eric Dumazet9f28a2f2011-12-21 15:47:16 -05002380 .procname = "gc_interval",
2381 .data = &ip_rt_gc_interval,
2382 .maxlen = sizeof(int),
2383 .mode = 0644,
2384 .proc_handler = proc_dointvec_jiffies,
2385 },
2386 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002387 .procname = "redirect_load",
2388 .data = &ip_rt_redirect_load,
2389 .maxlen = sizeof(int),
2390 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002391 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002392 },
2393 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002394 .procname = "redirect_number",
2395 .data = &ip_rt_redirect_number,
2396 .maxlen = sizeof(int),
2397 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002398 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002399 },
2400 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002401 .procname = "redirect_silence",
2402 .data = &ip_rt_redirect_silence,
2403 .maxlen = sizeof(int),
2404 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002405 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002406 },
2407 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002408 .procname = "error_cost",
2409 .data = &ip_rt_error_cost,
2410 .maxlen = sizeof(int),
2411 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002412 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002413 },
2414 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002415 .procname = "error_burst",
2416 .data = &ip_rt_error_burst,
2417 .maxlen = sizeof(int),
2418 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002419 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002420 },
2421 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002422 .procname = "gc_elasticity",
2423 .data = &ip_rt_gc_elasticity,
2424 .maxlen = sizeof(int),
2425 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002426 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002427 },
2428 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002429 .procname = "mtu_expires",
2430 .data = &ip_rt_mtu_expires,
2431 .maxlen = sizeof(int),
2432 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002433 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002434 },
2435 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002436 .procname = "min_pmtu",
2437 .data = &ip_rt_min_pmtu,
2438 .maxlen = sizeof(int),
2439 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002440 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002441 },
2442 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002443 .procname = "min_adv_mss",
2444 .data = &ip_rt_min_advmss,
2445 .maxlen = sizeof(int),
2446 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002447 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002448 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002449 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002450};
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002451
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002452static struct ctl_table ipv4_route_flush_table[] = {
2453 {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002454 .procname = "flush",
2455 .maxlen = sizeof(int),
2456 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002457 .proc_handler = ipv4_sysctl_rtcache_flush,
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002458 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002459 { },
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002460};
2461
2462static __net_init int sysctl_route_net_init(struct net *net)
2463{
2464 struct ctl_table *tbl;
2465
2466 tbl = ipv4_route_flush_table;
Octavian Purdila09ad9bc2009-11-25 15:14:13 -08002467 if (!net_eq(net, &init_net)) {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002468 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
2469 if (tbl == NULL)
2470 goto err_dup;
2471 }
2472 tbl[0].extra1 = net;
2473
Eric W. Biedermanec8f23c2012-04-19 13:44:49 +00002474 net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002475 if (net->ipv4.route_hdr == NULL)
2476 goto err_reg;
2477 return 0;
2478
2479err_reg:
2480 if (tbl != ipv4_route_flush_table)
2481 kfree(tbl);
2482err_dup:
2483 return -ENOMEM;
2484}
2485
2486static __net_exit void sysctl_route_net_exit(struct net *net)
2487{
2488 struct ctl_table *tbl;
2489
2490 tbl = net->ipv4.route_hdr->ctl_table_arg;
2491 unregister_net_sysctl_table(net->ipv4.route_hdr);
2492 BUG_ON(tbl == ipv4_route_flush_table);
2493 kfree(tbl);
2494}
2495
2496static __net_initdata struct pernet_operations sysctl_route_ops = {
2497 .init = sysctl_route_net_init,
2498 .exit = sysctl_route_net_exit,
2499};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002500#endif
2501
Neil Horman3ee94372010-05-08 01:57:52 -07002502static __net_init int rt_genid_init(struct net *net)
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002503{
Nicolas Dichtelb42664f2012-09-10 22:09:44 +00002504 atomic_set(&net->rt_genid, 0);
David S. Miller436c3b62011-03-24 17:42:21 -07002505 get_random_bytes(&net->ipv4.dev_addr_genid,
2506 sizeof(net->ipv4.dev_addr_genid));
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002507 return 0;
2508}
2509
Neil Horman3ee94372010-05-08 01:57:52 -07002510static __net_initdata struct pernet_operations rt_genid_ops = {
2511 .init = rt_genid_init,
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002512};
2513
David S. Millerc3426b42012-06-09 16:27:05 -07002514static int __net_init ipv4_inetpeer_init(struct net *net)
2515{
2516 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2517
2518 if (!bp)
2519 return -ENOMEM;
2520 inet_peer_base_init(bp);
2521 net->ipv4.peers = bp;
2522 return 0;
2523}
2524
2525static void __net_exit ipv4_inetpeer_exit(struct net *net)
2526{
2527 struct inet_peer_base *bp = net->ipv4.peers;
2528
2529 net->ipv4.peers = NULL;
David S. Miller56a6b242012-06-09 16:32:41 -07002530 inetpeer_invalidate_tree(bp);
David S. Millerc3426b42012-06-09 16:27:05 -07002531 kfree(bp);
2532}
2533
2534static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
2535 .init = ipv4_inetpeer_init,
2536 .exit = ipv4_inetpeer_exit,
2537};
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002538
Patrick McHardyc7066f72011-01-14 13:36:42 +01002539#ifdef CONFIG_IP_ROUTE_CLASSID
Tejun Heo7d720c32010-02-16 15:20:26 +00002540struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002541#endif /* CONFIG_IP_ROUTE_CLASSID */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002542
Linus Torvalds1da177e2005-04-16 15:20:36 -07002543int __init ip_rt_init(void)
2544{
Eric Dumazet424c4b72005-07-05 14:58:19 -07002545 int rc = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002546
Patrick McHardyc7066f72011-01-14 13:36:42 +01002547#ifdef CONFIG_IP_ROUTE_CLASSID
Ingo Molnar0dcec8c2009-02-25 14:07:33 +01002548 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002549 if (!ip_rt_acct)
2550 panic("IP: failed to allocate ip_rt_acct\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002551#endif
2552
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002553 ipv4_dst_ops.kmem_cachep =
2554 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09002555 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002556
David S. Miller14e50e52007-05-24 18:17:54 -07002557 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
2558
Eric Dumazetfc66f952010-10-08 06:37:34 +00002559 if (dst_entries_init(&ipv4_dst_ops) < 0)
2560 panic("IP: failed to allocate ipv4_dst_ops counter\n");
2561
2562 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
2563 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
2564
David S. Miller89aef892012-07-17 11:00:09 -07002565 ipv4_dst_ops.gc_thresh = ~0;
2566 ip_rt_max_size = INT_MAX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002567
Linus Torvalds1da177e2005-04-16 15:20:36 -07002568 devinet_init();
2569 ip_fib_init();
2570
Denis V. Lunev73b38712008-02-28 20:51:18 -08002571 if (ip_rt_proc_init())
Joe Perches058bd4d2012-03-11 18:36:11 +00002572 pr_err("Unable to create route proc files\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002573#ifdef CONFIG_XFRM
2574 xfrm_init();
Neil Hormana33bc5c2009-07-30 18:52:15 -07002575 xfrm4_init(ip_rt_max_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002576#endif
Greg Rosec7ac8672011-06-10 01:27:09 +00002577 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
Thomas Graf63f34442007-03-22 11:55:17 -07002578
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002579#ifdef CONFIG_SYSCTL
2580 register_pernet_subsys(&sysctl_route_ops);
2581#endif
Neil Horman3ee94372010-05-08 01:57:52 -07002582 register_pernet_subsys(&rt_genid_ops);
David S. Millerc3426b42012-06-09 16:27:05 -07002583 register_pernet_subsys(&ipv4_inetpeer_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002584 return rc;
2585}
2586
Al Viroa1bc6eb2008-07-30 06:32:52 -04002587#ifdef CONFIG_SYSCTL
Al Viroeeb61f72008-07-27 08:59:33 +01002588/*
2589 * We really need to sanitize the damn ipv4 init order, then all
2590 * this nonsense will go away.
2591 */
2592void __init ip_static_sysctl_init(void)
2593{
Eric W. Biederman4e5ca782012-04-19 13:32:39 +00002594 register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
Al Viroeeb61f72008-07-27 08:59:33 +01002595}
Al Viroa1bc6eb2008-07-30 06:32:52 -04002596#endif