blob: 3323168ee52d8774c6cb0258acbd807e7646a203 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/module.h>
17#include <asm/uaccess.h>
18#include <asm/system.h>
19#include <linux/bitops.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080020#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/types.h>
22#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/mm.h>
24#include <linux/string.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/errno.h>
28#include <linux/in.h>
29#include <linux/inet.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020030#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070031#include <linux/netdevice.h>
Thomas Graf18237302006-08-04 23:04:54 -070032#include <linux/if_addr.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/if_arp.h>
34#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035#include <linux/init.h>
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070036#include <linux/list.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070037
38#include <net/ip.h>
39#include <net/protocol.h>
40#include <net/route.h>
41#include <net/tcp.h>
42#include <net/sock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070043#include <net/arp.h>
44#include <net/ip_fib.h>
Thomas Graf63f34442007-03-22 11:55:17 -070045#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070046
Linus Torvalds1da177e2005-04-16 15:20:36 -070047#ifndef CONFIG_IP_MULTIPLE_TABLES
48
Denis V. Lunev7b1a74fd2008-01-10 03:22:17 -080049static int __net_init fib4_rules_init(struct net *net)
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -080050{
Denis V. Lunev93456b62008-01-10 03:23:38 -080051 struct fib_table *local_table, *main_table;
52
Stephen Hemminger7f9b8052008-01-14 23:14:20 -080053 local_table = fib_hash_table(RT_TABLE_LOCAL);
Denis V. Lunev93456b62008-01-10 03:23:38 -080054 if (local_table == NULL)
Denis V. Lunevdbb50162008-01-10 03:21:49 -080055 return -ENOMEM;
56
Stephen Hemminger7f9b8052008-01-14 23:14:20 -080057 main_table = fib_hash_table(RT_TABLE_MAIN);
Denis V. Lunev93456b62008-01-10 03:23:38 -080058 if (main_table == NULL)
Denis V. Lunevdbb50162008-01-10 03:21:49 -080059 goto fail;
60
Denis V. Lunev93456b62008-01-10 03:23:38 -080061 hlist_add_head_rcu(&local_table->tb_hlist,
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080062 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
Denis V. Lunev93456b62008-01-10 03:23:38 -080063 hlist_add_head_rcu(&main_table->tb_hlist,
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080064 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
Denis V. Lunevdbb50162008-01-10 03:21:49 -080065 return 0;
66
67fail:
Denis V. Lunev93456b62008-01-10 03:23:38 -080068 kfree(local_table);
Denis V. Lunevdbb50162008-01-10 03:21:49 -080069 return -ENOMEM;
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -080070}
Linus Torvalds1da177e2005-04-16 15:20:36 -070071#else
72
Denis V. Lunev8ad49422008-01-10 03:24:11 -080073struct fib_table *fib_new_table(struct net *net, u32 id)
Linus Torvalds1da177e2005-04-16 15:20:36 -070074{
75 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070076 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -070077
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070078 if (id == 0)
79 id = RT_TABLE_MAIN;
Denis V. Lunev8ad49422008-01-10 03:24:11 -080080 tb = fib_get_table(net, id);
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070081 if (tb)
82 return tb;
Stephen Hemminger7f9b8052008-01-14 23:14:20 -080083
84 tb = fib_hash_table(id);
Linus Torvalds1da177e2005-04-16 15:20:36 -070085 if (!tb)
86 return NULL;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070087 h = id & (FIB_TABLE_HASHSZ - 1);
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080088 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
Linus Torvalds1da177e2005-04-16 15:20:36 -070089 return tb;
90}
91
Denis V. Lunev8ad49422008-01-10 03:24:11 -080092struct fib_table *fib_get_table(struct net *net, u32 id)
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070093{
94 struct fib_table *tb;
95 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080096 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070097 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -070098
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070099 if (id == 0)
100 id = RT_TABLE_MAIN;
101 h = id & (FIB_TABLE_HASHSZ - 1);
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800102
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700103 rcu_read_lock();
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800104 head = &net->ipv4.fib_table_hash[h];
105 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700106 if (tb->tb_id == id) {
107 rcu_read_unlock();
108 return tb;
109 }
110 }
111 rcu_read_unlock();
112 return NULL;
113}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114#endif /* CONFIG_IP_MULTIPLE_TABLES */
115
Denis V. Lunev010278e2008-01-22 22:04:04 -0800116void fib_select_default(struct net *net,
117 const struct flowi *flp, struct fib_result *res)
Denis V. Lunev64c2d532008-01-22 22:03:33 -0800118{
119 struct fib_table *tb;
120 int table = RT_TABLE_MAIN;
121#ifdef CONFIG_IP_MULTIPLE_TABLES
122 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
123 return;
124 table = res->r->table;
125#endif
Denis V. Lunev010278e2008-01-22 22:04:04 -0800126 tb = fib_get_table(net, table);
Denis V. Lunev64c2d532008-01-22 22:03:33 -0800127 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000128 fib_table_select_default(tb, flp, res);
Denis V. Lunev64c2d532008-01-22 22:03:33 -0800129}
130
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800131static void fib_flush(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132{
133 int flushed = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700135 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800136 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700137 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700139 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800140 head = &net->ipv4.fib_table_hash[h];
141 hlist_for_each_entry(tb, node, head, tb_hlist)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000142 flushed += fib_table_flush(tb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144
145 if (flushed)
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700146 rt_cache_flush(net, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147}
148
149/*
150 * Find the first device with a given source address.
151 */
152
Denis V. Lunev1ab35272008-01-22 22:04:30 -0800153struct net_device * ip_dev_find(struct net *net, __be32 addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700154{
155 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
156 struct fib_result res;
157 struct net_device *dev = NULL;
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700158 struct fib_table *local_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159
160#ifdef CONFIG_IP_MULTIPLE_TABLES
161 res.r = NULL;
162#endif
163
Denis V. Lunev1ab35272008-01-22 22:04:30 -0800164 local_table = fib_get_table(net, RT_TABLE_LOCAL);
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000165 if (!local_table || fib_table_lookup(local_table, &fl, &res))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166 return NULL;
167 if (res.type != RTN_LOCAL)
168 goto out;
169 dev = FIB_RES_DEV(res);
170
171 if (dev)
172 dev_hold(dev);
173out:
174 fib_res_put(&res);
175 return dev;
176}
177
Laszlo Attila Toth055381162007-12-04 23:28:46 -0800178/*
179 * Find address type as if only "dev" was present in the system. If
180 * on_dev is NULL then all interfaces are taken into consideration.
181 */
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800182static inline unsigned __inet_dev_addr_type(struct net *net,
183 const struct net_device *dev,
Laszlo Attila Toth055381162007-12-04 23:28:46 -0800184 __be32 addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185{
186 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
187 struct fib_result res;
188 unsigned ret = RTN_BROADCAST;
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700189 struct fib_table *local_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700190
Jan Engelhardt1e637c72008-01-21 03:18:08 -0800191 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700192 return RTN_BROADCAST;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800193 if (ipv4_is_multicast(addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194 return RTN_MULTICAST;
195
196#ifdef CONFIG_IP_MULTIPLE_TABLES
197 res.r = NULL;
198#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900199
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800200 local_table = fib_get_table(net, RT_TABLE_LOCAL);
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700201 if (local_table) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202 ret = RTN_UNICAST;
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000203 if (!fib_table_lookup(local_table, &fl, &res)) {
Laszlo Attila Toth055381162007-12-04 23:28:46 -0800204 if (!dev || dev == res.fi->fib_dev)
205 ret = res.type;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 fib_res_put(&res);
207 }
208 }
209 return ret;
210}
211
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800212unsigned int inet_addr_type(struct net *net, __be32 addr)
Laszlo Attila Toth055381162007-12-04 23:28:46 -0800213{
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800214 return __inet_dev_addr_type(net, NULL, addr);
Laszlo Attila Toth055381162007-12-04 23:28:46 -0800215}
216
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800217unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
218 __be32 addr)
Laszlo Attila Toth055381162007-12-04 23:28:46 -0800219{
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800220 return __inet_dev_addr_type(net, dev, addr);
Laszlo Attila Toth055381162007-12-04 23:28:46 -0800221}
222
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223/* Given (packet source, input interface) and optional (dst, oif, tos):
224 - (main) check, that source is valid i.e. not broadcast or our local
225 address.
226 - figure out what "logical" interface this packet arrived
227 and calculate "specific destination" address.
228 - check, that packet arrived from expected physical interface.
229 */
230
Al Virod9c9df82006-09-26 21:28:14 -0700231int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
jamalb0c110c2009-10-18 02:12:33 +0000232 struct net_device *dev, __be32 *spec_dst,
233 u32 *itag, u32 mark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234{
235 struct in_device *in_dev;
236 struct flowi fl = { .nl_u = { .ip4_u =
237 { .daddr = src,
238 .saddr = dst,
239 .tos = tos } },
jamalb0c110c2009-10-18 02:12:33 +0000240 .mark = mark,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241 .iif = oif };
jamalb0c110c2009-10-18 02:12:33 +0000242
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243 struct fib_result res;
Patrick McHardy8153a102009-12-03 01:25:58 +0000244 int no_addr, rpf, accept_local;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245 int ret;
Denis V. Lunev5b707aa2008-01-21 17:33:15 -0800246 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247
Patrick McHardy8153a102009-12-03 01:25:58 +0000248 no_addr = rpf = accept_local = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249 rcu_read_lock();
Herbert Xue5ed6392005-10-03 14:35:55 -0700250 in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251 if (in_dev) {
252 no_addr = in_dev->ifa_list == NULL;
253 rpf = IN_DEV_RPFILTER(in_dev);
Patrick McHardy8153a102009-12-03 01:25:58 +0000254 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 }
256 rcu_read_unlock();
257
258 if (in_dev == NULL)
259 goto e_inval;
260
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900261 net = dev_net(dev);
Denis V. Lunev5b707aa2008-01-21 17:33:15 -0800262 if (fib_lookup(net, &fl, &res))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263 goto last_resort;
Patrick McHardy8153a102009-12-03 01:25:58 +0000264 if (res.type != RTN_UNICAST) {
265 if (res.type != RTN_LOCAL || !accept_local)
266 goto e_inval_res;
267 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268 *spec_dst = FIB_RES_PREFSRC(res);
269 fib_combine_itag(itag, &res);
270#ifdef CONFIG_IP_ROUTE_MULTIPATH
271 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
272#else
273 if (FIB_RES_DEV(res) == dev)
274#endif
275 {
276 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
277 fib_res_put(&res);
278 return ret;
279 }
280 fib_res_put(&res);
281 if (no_addr)
282 goto last_resort;
Stephen Hemmingerc1cf8422009-02-20 08:25:36 +0000283 if (rpf == 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284 goto e_inval;
285 fl.oif = dev->ifindex;
286
287 ret = 0;
Denis V. Lunev5b707aa2008-01-21 17:33:15 -0800288 if (fib_lookup(net, &fl, &res) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289 if (res.type == RTN_UNICAST) {
290 *spec_dst = FIB_RES_PREFSRC(res);
291 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
292 }
293 fib_res_put(&res);
294 }
295 return ret;
296
297last_resort:
298 if (rpf)
299 goto e_inval;
300 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
301 *itag = 0;
302 return 0;
303
304e_inval_res:
305 fib_res_put(&res);
306e_inval:
307 return -EINVAL;
308}
309
Al Viro81f7bf62006-09-27 18:40:00 -0700310static inline __be32 sk_extract_addr(struct sockaddr *addr)
Thomas Graf4e902c52006-08-17 18:14:52 -0700311{
312 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
313}
314
315static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
316{
317 struct nlattr *nla;
318
319 nla = (struct nlattr *) ((char *) mx + len);
320 nla->nla_type = type;
321 nla->nla_len = nla_attr_size(4);
322 *(u32 *) nla_data(nla) = value;
323
324 return len + nla_total_size(4);
325}
326
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800327static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
Thomas Graf4e902c52006-08-17 18:14:52 -0700328 struct fib_config *cfg)
329{
Al Viro6d85c102006-09-26 22:15:46 -0700330 __be32 addr;
Thomas Graf4e902c52006-08-17 18:14:52 -0700331 int plen;
332
333 memset(cfg, 0, sizeof(*cfg));
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800334 cfg->fc_nlinfo.nl_net = net;
Thomas Graf4e902c52006-08-17 18:14:52 -0700335
336 if (rt->rt_dst.sa_family != AF_INET)
337 return -EAFNOSUPPORT;
338
339 /*
340 * Check mask for validity:
341 * a) it must be contiguous.
342 * b) destination must have all host bits clear.
343 * c) if application forgot to set correct family (AF_INET),
344 * reject request unless it is absolutely clear i.e.
345 * both family and mask are zero.
346 */
347 plen = 32;
348 addr = sk_extract_addr(&rt->rt_dst);
349 if (!(rt->rt_flags & RTF_HOST)) {
Al Viro81f7bf62006-09-27 18:40:00 -0700350 __be32 mask = sk_extract_addr(&rt->rt_genmask);
Thomas Graf4e902c52006-08-17 18:14:52 -0700351
352 if (rt->rt_genmask.sa_family != AF_INET) {
353 if (mask || rt->rt_genmask.sa_family)
354 return -EAFNOSUPPORT;
355 }
356
357 if (bad_mask(mask, addr))
358 return -EINVAL;
359
360 plen = inet_mask_len(mask);
361 }
362
363 cfg->fc_dst_len = plen;
364 cfg->fc_dst = addr;
365
366 if (cmd != SIOCDELRT) {
367 cfg->fc_nlflags = NLM_F_CREATE;
368 cfg->fc_protocol = RTPROT_BOOT;
369 }
370
371 if (rt->rt_metric)
372 cfg->fc_priority = rt->rt_metric - 1;
373
374 if (rt->rt_flags & RTF_REJECT) {
375 cfg->fc_scope = RT_SCOPE_HOST;
376 cfg->fc_type = RTN_UNREACHABLE;
377 return 0;
378 }
379
380 cfg->fc_scope = RT_SCOPE_NOWHERE;
381 cfg->fc_type = RTN_UNICAST;
382
383 if (rt->rt_dev) {
384 char *colon;
385 struct net_device *dev;
386 char devname[IFNAMSIZ];
387
388 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
389 return -EFAULT;
390
391 devname[IFNAMSIZ-1] = 0;
392 colon = strchr(devname, ':');
393 if (colon)
394 *colon = 0;
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800395 dev = __dev_get_by_name(net, devname);
Thomas Graf4e902c52006-08-17 18:14:52 -0700396 if (!dev)
397 return -ENODEV;
398 cfg->fc_oif = dev->ifindex;
399 if (colon) {
400 struct in_ifaddr *ifa;
401 struct in_device *in_dev = __in_dev_get_rtnl(dev);
402 if (!in_dev)
403 return -ENODEV;
404 *colon = ':';
405 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
406 if (strcmp(ifa->ifa_label, devname) == 0)
407 break;
408 if (ifa == NULL)
409 return -ENODEV;
410 cfg->fc_prefsrc = ifa->ifa_local;
411 }
412 }
413
414 addr = sk_extract_addr(&rt->rt_gateway);
415 if (rt->rt_gateway.sa_family == AF_INET && addr) {
416 cfg->fc_gw = addr;
417 if (rt->rt_flags & RTF_GATEWAY &&
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800418 inet_addr_type(net, addr) == RTN_UNICAST)
Thomas Graf4e902c52006-08-17 18:14:52 -0700419 cfg->fc_scope = RT_SCOPE_UNIVERSE;
420 }
421
422 if (cmd == SIOCDELRT)
423 return 0;
424
425 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
426 return -EINVAL;
427
428 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
429 cfg->fc_scope = RT_SCOPE_LINK;
430
431 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
432 struct nlattr *mx;
433 int len = 0;
434
435 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900436 if (mx == NULL)
Thomas Graf4e902c52006-08-17 18:14:52 -0700437 return -ENOMEM;
438
439 if (rt->rt_flags & RTF_MTU)
440 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
441
442 if (rt->rt_flags & RTF_WINDOW)
443 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
444
445 if (rt->rt_flags & RTF_IRTT)
446 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
447
448 cfg->fc_mx = mx;
449 cfg->fc_mx_len = len;
450 }
451
452 return 0;
453}
454
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455/*
456 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
457 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900458
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800459int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460{
Thomas Graf4e902c52006-08-17 18:14:52 -0700461 struct fib_config cfg;
462 struct rtentry rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700464
465 switch (cmd) {
466 case SIOCADDRT: /* Add a route */
467 case SIOCDELRT: /* Delete a route */
468 if (!capable(CAP_NET_ADMIN))
469 return -EPERM;
Thomas Graf4e902c52006-08-17 18:14:52 -0700470
471 if (copy_from_user(&rt, arg, sizeof(rt)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700472 return -EFAULT;
Thomas Graf4e902c52006-08-17 18:14:52 -0700473
Linus Torvalds1da177e2005-04-16 15:20:36 -0700474 rtnl_lock();
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800475 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476 if (err == 0) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700477 struct fib_table *tb;
478
Linus Torvalds1da177e2005-04-16 15:20:36 -0700479 if (cmd == SIOCDELRT) {
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800480 tb = fib_get_table(net, cfg.fc_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700481 if (tb)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000482 err = fib_table_delete(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700483 else
484 err = -ESRCH;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700485 } else {
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800486 tb = fib_new_table(net, cfg.fc_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700487 if (tb)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000488 err = fib_table_insert(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700489 else
490 err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700492
493 /* allocated by rtentry_to_fib_config() */
494 kfree(cfg.fc_mx);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495 }
496 rtnl_unlock();
497 return err;
498 }
499 return -EINVAL;
500}
501
Patrick McHardyef7c79e2007-06-05 12:38:30 -0700502const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
Thomas Graf4e902c52006-08-17 18:14:52 -0700503 [RTA_DST] = { .type = NLA_U32 },
504 [RTA_SRC] = { .type = NLA_U32 },
505 [RTA_IIF] = { .type = NLA_U32 },
506 [RTA_OIF] = { .type = NLA_U32 },
507 [RTA_GATEWAY] = { .type = NLA_U32 },
508 [RTA_PRIORITY] = { .type = NLA_U32 },
509 [RTA_PREFSRC] = { .type = NLA_U32 },
510 [RTA_METRICS] = { .type = NLA_NESTED },
Thomas Graf5176f912006-08-26 20:13:18 -0700511 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
Thomas Graf4e902c52006-08-17 18:14:52 -0700512 [RTA_FLOW] = { .type = NLA_U32 },
Thomas Graf4e902c52006-08-17 18:14:52 -0700513};
Linus Torvalds1da177e2005-04-16 15:20:36 -0700514
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800515static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
516 struct nlmsghdr *nlh, struct fib_config *cfg)
Thomas Graf4e902c52006-08-17 18:14:52 -0700517{
518 struct nlattr *attr;
519 int err, remaining;
520 struct rtmsg *rtm;
521
522 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
523 if (err < 0)
524 goto errout;
525
526 memset(cfg, 0, sizeof(*cfg));
527
528 rtm = nlmsg_data(nlh);
Thomas Graf4e902c52006-08-17 18:14:52 -0700529 cfg->fc_dst_len = rtm->rtm_dst_len;
Thomas Graf4e902c52006-08-17 18:14:52 -0700530 cfg->fc_tos = rtm->rtm_tos;
531 cfg->fc_table = rtm->rtm_table;
532 cfg->fc_protocol = rtm->rtm_protocol;
533 cfg->fc_scope = rtm->rtm_scope;
534 cfg->fc_type = rtm->rtm_type;
535 cfg->fc_flags = rtm->rtm_flags;
536 cfg->fc_nlflags = nlh->nlmsg_flags;
537
538 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
539 cfg->fc_nlinfo.nlh = nlh;
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800540 cfg->fc_nlinfo.nl_net = net;
Thomas Graf4e902c52006-08-17 18:14:52 -0700541
Thomas Grafa0ee18b2007-03-24 20:32:54 -0700542 if (cfg->fc_type > RTN_MAX) {
543 err = -EINVAL;
544 goto errout;
545 }
546
Thomas Graf4e902c52006-08-17 18:14:52 -0700547 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +0200548 switch (nla_type(attr)) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700549 case RTA_DST:
Al Viro17fb2c62006-09-26 22:15:25 -0700550 cfg->fc_dst = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700551 break;
Thomas Graf4e902c52006-08-17 18:14:52 -0700552 case RTA_OIF:
553 cfg->fc_oif = nla_get_u32(attr);
554 break;
555 case RTA_GATEWAY:
Al Viro17fb2c62006-09-26 22:15:25 -0700556 cfg->fc_gw = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700557 break;
558 case RTA_PRIORITY:
559 cfg->fc_priority = nla_get_u32(attr);
560 break;
561 case RTA_PREFSRC:
Al Viro17fb2c62006-09-26 22:15:25 -0700562 cfg->fc_prefsrc = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700563 break;
564 case RTA_METRICS:
565 cfg->fc_mx = nla_data(attr);
566 cfg->fc_mx_len = nla_len(attr);
567 break;
568 case RTA_MULTIPATH:
569 cfg->fc_mp = nla_data(attr);
570 cfg->fc_mp_len = nla_len(attr);
571 break;
572 case RTA_FLOW:
573 cfg->fc_flow = nla_get_u32(attr);
574 break;
Thomas Graf4e902c52006-08-17 18:14:52 -0700575 case RTA_TABLE:
576 cfg->fc_table = nla_get_u32(attr);
577 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700578 }
579 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700580
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581 return 0;
Thomas Graf4e902c52006-08-17 18:14:52 -0700582errout:
583 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584}
585
Jianjun Kong6ed2533e2008-11-03 00:25:16 -0800586static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700587{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900588 struct net *net = sock_net(skb->sk);
Thomas Graf4e902c52006-08-17 18:14:52 -0700589 struct fib_config cfg;
590 struct fib_table *tb;
591 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800593 err = rtm_to_fib_config(net, skb, nlh, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700594 if (err < 0)
595 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596
Denis V. Lunev8ad49422008-01-10 03:24:11 -0800597 tb = fib_get_table(net, cfg.fc_table);
Thomas Graf4e902c52006-08-17 18:14:52 -0700598 if (tb == NULL) {
599 err = -ESRCH;
600 goto errout;
601 }
602
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000603 err = fib_table_delete(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700604errout:
605 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700606}
607
Jianjun Kong6ed2533e2008-11-03 00:25:16 -0800608static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900610 struct net *net = sock_net(skb->sk);
Thomas Graf4e902c52006-08-17 18:14:52 -0700611 struct fib_config cfg;
612 struct fib_table *tb;
613 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800615 err = rtm_to_fib_config(net, skb, nlh, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700616 if (err < 0)
617 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618
Denis V. Lunev226b0b4a52008-01-10 03:30:24 -0800619 tb = fib_new_table(net, cfg.fc_table);
Thomas Graf4e902c52006-08-17 18:14:52 -0700620 if (tb == NULL) {
621 err = -ENOBUFS;
622 goto errout;
623 }
624
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000625 err = fib_table_insert(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700626errout:
627 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700628}
629
Thomas Graf63f34442007-03-22 11:55:17 -0700630static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900632 struct net *net = sock_net(skb->sk);
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700633 unsigned int h, s_h;
634 unsigned int e = 0, s_e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700636 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800637 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700638 int dumped = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700639
Thomas Grafbe403ea2006-08-17 18:15:17 -0700640 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
641 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642 return ip_rt_dump(skb, cb);
643
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700644 s_h = cb->args[0];
645 s_e = cb->args[1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700647 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
648 e = 0;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800649 head = &net->ipv4.fib_table_hash[h];
650 hlist_for_each_entry(tb, node, head, tb_hlist) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700651 if (e < s_e)
652 goto next;
653 if (dumped)
654 memset(&cb->args[2], 0, sizeof(cb->args) -
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900655 2 * sizeof(cb->args[0]));
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000656 if (fib_table_dump(tb, skb, cb) < 0)
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700657 goto out;
658 dumped = 1;
659next:
660 e++;
661 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700662 }
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700663out:
664 cb->args[1] = e;
665 cb->args[0] = h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700666
667 return skb->len;
668}
669
670/* Prepare and feed intra-kernel routing request.
671 Really, it should be netlink message, but :-( netlink
672 can be not configured, so that we feed it directly
673 to fib engine. It is legal, because all events occur
674 only when netlink is already locked.
675 */
676
Al Viro81f7bf62006-09-27 18:40:00 -0700677static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900679 struct net *net = dev_net(ifa->ifa_dev->dev);
Thomas Graf4e902c52006-08-17 18:14:52 -0700680 struct fib_table *tb;
681 struct fib_config cfg = {
682 .fc_protocol = RTPROT_KERNEL,
683 .fc_type = type,
684 .fc_dst = dst,
685 .fc_dst_len = dst_len,
686 .fc_prefsrc = ifa->ifa_local,
687 .fc_oif = ifa->ifa_dev->dev->ifindex,
688 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800689 .fc_nlinfo = {
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800690 .nl_net = net,
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800691 },
Thomas Graf4e902c52006-08-17 18:14:52 -0700692 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700693
694 if (type == RTN_UNICAST)
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800695 tb = fib_new_table(net, RT_TABLE_MAIN);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700696 else
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800697 tb = fib_new_table(net, RT_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700698
699 if (tb == NULL)
700 return;
701
Thomas Graf4e902c52006-08-17 18:14:52 -0700702 cfg.fc_table = tb->tb_id;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700703
Thomas Graf4e902c52006-08-17 18:14:52 -0700704 if (type != RTN_LOCAL)
705 cfg.fc_scope = RT_SCOPE_LINK;
706 else
707 cfg.fc_scope = RT_SCOPE_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708
709 if (cmd == RTM_NEWROUTE)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000710 fib_table_insert(tb, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700711 else
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000712 fib_table_delete(tb, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700713}
714
Jamal Hadi Salim0ff60a42005-11-22 14:47:37 -0800715void fib_add_ifaddr(struct in_ifaddr *ifa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716{
717 struct in_device *in_dev = ifa->ifa_dev;
718 struct net_device *dev = in_dev->dev;
719 struct in_ifaddr *prim = ifa;
Al Viroa144ea42006-09-28 18:00:55 -0700720 __be32 mask = ifa->ifa_mask;
721 __be32 addr = ifa->ifa_local;
722 __be32 prefix = ifa->ifa_address&mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700723
724 if (ifa->ifa_flags&IFA_F_SECONDARY) {
725 prim = inet_ifa_byprefix(in_dev, prefix, mask);
726 if (prim == NULL) {
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800727 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700728 return;
729 }
730 }
731
732 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
733
734 if (!(dev->flags&IFF_UP))
735 return;
736
737 /* Add broadcast address, if it is explicitly assigned. */
Al Viroa144ea42006-09-28 18:00:55 -0700738 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
740
Joe Perchesf97c1e02007-12-16 13:45:43 -0800741 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700742 (prefix != addr || ifa->ifa_prefixlen < 32)) {
743 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
744 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
745
746 /* Add network specific broadcasts, when it takes a sense */
747 if (ifa->ifa_prefixlen < 31) {
748 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
749 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
750 }
751 }
752}
753
754static void fib_del_ifaddr(struct in_ifaddr *ifa)
755{
756 struct in_device *in_dev = ifa->ifa_dev;
757 struct net_device *dev = in_dev->dev;
758 struct in_ifaddr *ifa1;
759 struct in_ifaddr *prim = ifa;
Al Viroa144ea42006-09-28 18:00:55 -0700760 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
761 __be32 any = ifa->ifa_address&ifa->ifa_mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762#define LOCAL_OK 1
763#define BRD_OK 2
764#define BRD0_OK 4
765#define BRD1_OK 8
766 unsigned ok = 0;
767
768 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
769 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
770 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
771 else {
772 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
773 if (prim == NULL) {
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800774 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700775 return;
776 }
777 }
778
779 /* Deletion is more complicated than add.
780 We should take care of not to delete too much :-)
781
782 Scan address list to be sure that addresses are really gone.
783 */
784
785 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
786 if (ifa->ifa_local == ifa1->ifa_local)
787 ok |= LOCAL_OK;
788 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
789 ok |= BRD_OK;
790 if (brd == ifa1->ifa_broadcast)
791 ok |= BRD1_OK;
792 if (any == ifa1->ifa_broadcast)
793 ok |= BRD0_OK;
794 }
795
796 if (!(ok&BRD_OK))
797 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
798 if (!(ok&BRD1_OK))
799 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
800 if (!(ok&BRD0_OK))
801 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
802 if (!(ok&LOCAL_OK)) {
803 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
804
805 /* Check, that this local address finally disappeared. */
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900806 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807 /* And the last, but not the least thing.
808 We must flush stray FIB entries.
809
810 First of all, we scan fib_info list searching
811 for stray nexthop entries, then ignite fib_flush.
812 */
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900813 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
814 fib_flush(dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815 }
816 }
817#undef LOCAL_OK
818#undef BRD_OK
819#undef BRD0_OK
820#undef BRD1_OK
821}
822
Robert Olsson246955f2005-06-20 13:36:39 -0700823static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
824{
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900825
Robert Olsson246955f2005-06-20 13:36:39 -0700826 struct fib_result res;
Thomas Graf5f3008932006-11-09 15:21:41 -0800827 struct flowi fl = { .mark = frn->fl_mark,
Thomas Graf47dcf0c2006-11-09 15:20:38 -0800828 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
Robert Olsson246955f2005-06-20 13:36:39 -0700829 .tos = frn->fl_tos,
830 .scope = frn->fl_scope } } };
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700831
Sergey Vlasov912a41a2007-04-27 02:17:19 -0700832#ifdef CONFIG_IP_MULTIPLE_TABLES
833 res.r = NULL;
834#endif
835
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700836 frn->err = -ENOENT;
Robert Olsson246955f2005-06-20 13:36:39 -0700837 if (tb) {
838 local_bh_disable();
839
840 frn->tb_id = tb->tb_id;
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000841 frn->err = fib_table_lookup(tb, &fl, &res);
Robert Olsson246955f2005-06-20 13:36:39 -0700842
843 if (!frn->err) {
844 frn->prefixlen = res.prefixlen;
845 frn->nh_sel = res.nh_sel;
846 frn->type = res.type;
847 frn->scope = res.scope;
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700848 fib_res_put(&res);
Robert Olsson246955f2005-06-20 13:36:39 -0700849 }
850 local_bh_enable();
851 }
852}
853
David S. Miller28f7b0362007-10-10 21:32:39 -0700854static void nl_fib_input(struct sk_buff *skb)
Robert Olsson246955f2005-06-20 13:36:39 -0700855{
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800856 struct net *net;
Robert Olsson246955f2005-06-20 13:36:39 -0700857 struct fib_result_nl *frn;
David S. Miller28f7b0362007-10-10 21:32:39 -0700858 struct nlmsghdr *nlh;
Robert Olsson246955f2005-06-20 13:36:39 -0700859 struct fib_table *tb;
David S. Miller28f7b0362007-10-10 21:32:39 -0700860 u32 pid;
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700861
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900862 net = sock_net(skb->sk);
Arnaldo Carvalho de Melob529ccf2007-04-25 19:08:35 -0700863 nlh = nlmsg_hdr(skb);
Thomas Grafea865752005-12-01 14:30:00 -0800864 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
Denis V. Lunevd883a032007-12-21 02:01:53 -0800865 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
Thomas Grafea865752005-12-01 14:30:00 -0800866 return;
Denis V. Lunevd883a032007-12-21 02:01:53 -0800867
868 skb = skb_clone(skb, GFP_KERNEL);
869 if (skb == NULL)
870 return;
871 nlh = nlmsg_hdr(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900872
Robert Olsson246955f2005-06-20 13:36:39 -0700873 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800874 tb = fib_get_table(net, frn->tb_id_in);
Robert Olsson246955f2005-06-20 13:36:39 -0700875
876 nl_fib_lookup(frn, tb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900877
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700878 pid = NETLINK_CB(skb).pid; /* pid of sending process */
Robert Olsson246955f2005-06-20 13:36:39 -0700879 NETLINK_CB(skb).pid = 0; /* from kernel */
Patrick McHardyac6d4392005-08-14 19:29:52 -0700880 NETLINK_CB(skb).dst_group = 0; /* unicast */
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800881 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900882}
Robert Olsson246955f2005-06-20 13:36:39 -0700883
Denis V. Lunev7b1a74fd2008-01-10 03:22:17 -0800884static int nl_fib_lookup_init(struct net *net)
Robert Olsson246955f2005-06-20 13:36:39 -0700885{
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800886 struct sock *sk;
887 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
888 nl_fib_input, NULL, THIS_MODULE);
889 if (sk == NULL)
Denis V. Lunev7b1a74fd2008-01-10 03:22:17 -0800890 return -EAFNOSUPPORT;
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800891 net->ipv4.fibnl = sk;
Denis V. Lunev7b1a74fd2008-01-10 03:22:17 -0800892 return 0;
893}
894
895static void nl_fib_lookup_exit(struct net *net)
896{
Denis V. Lunevb7c6ba62008-01-28 14:41:19 -0800897 netlink_kernel_release(net->ipv4.fibnl);
Denis V. Lunev775516b2008-01-18 23:55:19 -0800898 net->ipv4.fibnl = NULL;
Robert Olsson246955f2005-06-20 13:36:39 -0700899}
900
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000901static void fib_disable_ip(struct net_device *dev, int force, int delay)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902{
Denis V. Lunev85326fa2008-01-31 18:48:47 -0800903 if (fib_sync_down_dev(dev, force))
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900904 fib_flush(dev_net(dev));
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000905 rt_cache_flush(dev_net(dev), delay);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906 arp_ifdown(dev);
907}
908
909static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
910{
Jianjun Kong6ed2533e2008-11-03 00:25:16 -0800911 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700912 struct net_device *dev = ifa->ifa_dev->dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913
914 switch (event) {
915 case NETDEV_UP:
916 fib_add_ifaddr(ifa);
917#ifdef CONFIG_IP_ROUTE_MULTIPATH
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700918 fib_sync_up(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700919#endif
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700920 rt_cache_flush(dev_net(dev), -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921 break;
922 case NETDEV_DOWN:
923 fib_del_ifaddr(ifa);
Jayachandran C9fcc2e82005-10-27 15:10:01 -0700924 if (ifa->ifa_dev->ifa_list == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925 /* Last address was deleted from this interface.
926 Disable IP.
927 */
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000928 fib_disable_ip(dev, 1, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929 } else {
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700930 rt_cache_flush(dev_net(dev), -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700931 }
932 break;
933 }
934 return NOTIFY_DONE;
935}
936
937static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
938{
939 struct net_device *dev = ptr;
Herbert Xue5ed6392005-10-03 14:35:55 -0700940 struct in_device *in_dev = __in_dev_get_rtnl(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700941
942 if (event == NETDEV_UNREGISTER) {
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000943 fib_disable_ip(dev, 2, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944 return NOTIFY_DONE;
945 }
946
947 if (!in_dev)
948 return NOTIFY_DONE;
949
950 switch (event) {
951 case NETDEV_UP:
952 for_ifa(in_dev) {
953 fib_add_ifaddr(ifa);
954 } endfor_ifa(in_dev);
955#ifdef CONFIG_IP_ROUTE_MULTIPATH
956 fib_sync_up(dev);
957#endif
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700958 rt_cache_flush(dev_net(dev), -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959 break;
960 case NETDEV_DOWN:
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000961 fib_disable_ip(dev, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700962 break;
963 case NETDEV_CHANGEMTU:
964 case NETDEV_CHANGE:
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700965 rt_cache_flush(dev_net(dev), 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966 break;
Eric W. Biedermana5ee1552009-11-29 15:45:58 +0000967 case NETDEV_UNREGISTER_BATCH:
968 rt_cache_flush_batch();
969 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700970 }
971 return NOTIFY_DONE;
972}
973
974static struct notifier_block fib_inetaddr_notifier = {
Jianjun Kong6ed2533e2008-11-03 00:25:16 -0800975 .notifier_call = fib_inetaddr_event,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976};
977
978static struct notifier_block fib_netdev_notifier = {
Jianjun Kong6ed2533e2008-11-03 00:25:16 -0800979 .notifier_call = fib_netdev_event,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700980};
981
Denis V. Lunev7b1a74fd2008-01-10 03:22:17 -0800982static int __net_init ip_fib_net_init(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700983{
Denis V. Lunevdce5cbe2008-01-31 18:44:53 -0800984 int err;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700985 unsigned int i;
986
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800987 net->ipv4.fib_table_hash = kzalloc(
988 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
989 if (net->ipv4.fib_table_hash == NULL)
990 return -ENOMEM;
991
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700992 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800993 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -0800994
Denis V. Lunevdce5cbe2008-01-31 18:44:53 -0800995 err = fib4_rules_init(net);
996 if (err < 0)
997 goto fail;
998 return 0;
999
1000fail:
1001 kfree(net->ipv4.fib_table_hash);
1002 return err;
Denis V. Lunev7b1a74fd2008-01-10 03:22:17 -08001003}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004
Denis V. Lunev7b1a74fd2008-01-10 03:22:17 -08001005static void __net_exit ip_fib_net_exit(struct net *net)
1006{
1007 unsigned int i;
Thomas Graf63f34442007-03-22 11:55:17 -07001008
Denis V. Lunev7b1a74fd2008-01-10 03:22:17 -08001009#ifdef CONFIG_IP_MULTIPLE_TABLES
1010 fib4_rules_exit(net);
1011#endif
1012
1013 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1014 struct fib_table *tb;
1015 struct hlist_head *head;
1016 struct hlist_node *node, *tmp;
1017
Denis V. Luneve4aef8a2008-01-10 03:28:24 -08001018 head = &net->ipv4.fib_table_hash[i];
Denis V. Lunev7b1a74fd2008-01-10 03:22:17 -08001019 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1020 hlist_del(node);
Stephen Hemminger16c6cf82009-09-20 10:35:36 +00001021 fib_table_flush(tb);
Denis V. Lunev7b1a74fd2008-01-10 03:22:17 -08001022 kfree(tb);
1023 }
1024 }
Denis V. Luneve4aef8a2008-01-10 03:28:24 -08001025 kfree(net->ipv4.fib_table_hash);
Denis V. Lunev7b1a74fd2008-01-10 03:22:17 -08001026}
1027
1028static int __net_init fib_net_init(struct net *net)
1029{
1030 int error;
1031
Denis V. Lunev7b1a74fd2008-01-10 03:22:17 -08001032 error = ip_fib_net_init(net);
1033 if (error < 0)
1034 goto out;
1035 error = nl_fib_lookup_init(net);
1036 if (error < 0)
1037 goto out_nlfl;
1038 error = fib_proc_init(net);
1039 if (error < 0)
1040 goto out_proc;
1041out:
1042 return error;
1043
1044out_proc:
1045 nl_fib_lookup_exit(net);
1046out_nlfl:
1047 ip_fib_net_exit(net);
1048 goto out;
1049}
1050
1051static void __net_exit fib_net_exit(struct net *net)
1052{
1053 fib_proc_exit(net);
1054 nl_fib_lookup_exit(net);
1055 ip_fib_net_exit(net);
1056}
1057
1058static struct pernet_operations fib_net_ops = {
1059 .init = fib_net_init,
1060 .exit = fib_net_exit,
1061};
1062
1063void __init ip_fib_init(void)
1064{
Thomas Graf63f34442007-03-22 11:55:17 -07001065 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1066 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1067 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
Denis V. Lunev7b1a74fd2008-01-10 03:22:17 -08001068
1069 register_pernet_subsys(&fib_net_ops);
1070 register_netdevice_notifier(&fib_netdev_notifier);
1071 register_inetaddr_notifier(&fib_inetaddr_notifier);
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08001072
1073 fib_hash_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001074}
1075
1076EXPORT_SYMBOL(inet_addr_type);
Laszlo Attila Toth055381162007-12-04 23:28:46 -08001077EXPORT_SYMBOL(inet_dev_addr_type);
Sean Heftya1e87332006-06-17 20:37:28 -07001078EXPORT_SYMBOL(ip_dev_find);