blob: e830f7a123bdaae5da731467a04624aeb26bd257 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/module.h>
17#include <asm/uaccess.h>
18#include <asm/system.h>
19#include <linux/bitops.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080020#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/types.h>
22#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/mm.h>
24#include <linux/string.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/errno.h>
28#include <linux/in.h>
29#include <linux/inet.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020030#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070031#include <linux/netdevice.h>
Thomas Graf18237302006-08-04 23:04:54 -070032#include <linux/if_addr.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/if_arp.h>
34#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035#include <linux/init.h>
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070036#include <linux/list.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090037#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038
39#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070044#include <net/arp.h>
45#include <net/ip_fib.h>
Thomas Graf63f34442007-03-22 11:55:17 -070046#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070047
Linus Torvalds1da177e2005-04-16 15:20:36 -070048#ifndef CONFIG_IP_MULTIPLE_TABLES
49
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -080050static int __net_init fib4_rules_init(struct net *net)
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -080051{
Denis V. Lunev93456b62008-01-10 03:23:38 -080052 struct fib_table *local_table, *main_table;
53
Stephen Hemminger7f9b8052008-01-14 23:14:20 -080054 local_table = fib_hash_table(RT_TABLE_LOCAL);
Denis V. Lunev93456b62008-01-10 03:23:38 -080055 if (local_table == NULL)
Denis V. Lunevdbb50162008-01-10 03:21:49 -080056 return -ENOMEM;
57
Stephen Hemminger7f9b8052008-01-14 23:14:20 -080058 main_table = fib_hash_table(RT_TABLE_MAIN);
Denis V. Lunev93456b62008-01-10 03:23:38 -080059 if (main_table == NULL)
Denis V. Lunevdbb50162008-01-10 03:21:49 -080060 goto fail;
61
Denis V. Lunev93456b62008-01-10 03:23:38 -080062 hlist_add_head_rcu(&local_table->tb_hlist,
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080063 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
Denis V. Lunev93456b62008-01-10 03:23:38 -080064 hlist_add_head_rcu(&main_table->tb_hlist,
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080065 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
Denis V. Lunevdbb50162008-01-10 03:21:49 -080066 return 0;
67
68fail:
Denis V. Lunev93456b62008-01-10 03:23:38 -080069 kfree(local_table);
Denis V. Lunevdbb50162008-01-10 03:21:49 -080070 return -ENOMEM;
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -080071}
Linus Torvalds1da177e2005-04-16 15:20:36 -070072#else
73
Denis V. Lunev8ad49422008-01-10 03:24:11 -080074struct fib_table *fib_new_table(struct net *net, u32 id)
Linus Torvalds1da177e2005-04-16 15:20:36 -070075{
76 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070077 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -070078
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070079 if (id == 0)
80 id = RT_TABLE_MAIN;
Denis V. Lunev8ad49422008-01-10 03:24:11 -080081 tb = fib_get_table(net, id);
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070082 if (tb)
83 return tb;
Stephen Hemminger7f9b8052008-01-14 23:14:20 -080084
85 tb = fib_hash_table(id);
Linus Torvalds1da177e2005-04-16 15:20:36 -070086 if (!tb)
87 return NULL;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070088 h = id & (FIB_TABLE_HASHSZ - 1);
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080089 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
Linus Torvalds1da177e2005-04-16 15:20:36 -070090 return tb;
91}
92
Denis V. Lunev8ad49422008-01-10 03:24:11 -080093struct fib_table *fib_get_table(struct net *net, u32 id)
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070094{
95 struct fib_table *tb;
96 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -080097 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070098 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -070099
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700100 if (id == 0)
101 id = RT_TABLE_MAIN;
102 h = id & (FIB_TABLE_HASHSZ - 1);
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800103
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700104 rcu_read_lock();
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800105 head = &net->ipv4.fib_table_hash[h];
106 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700107 if (tb->tb_id == id) {
108 rcu_read_unlock();
109 return tb;
110 }
111 }
112 rcu_read_unlock();
113 return NULL;
114}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115#endif /* CONFIG_IP_MULTIPLE_TABLES */
116
Denis V. Lunev010278e2008-01-22 22:04:04 -0800117void fib_select_default(struct net *net,
118 const struct flowi *flp, struct fib_result *res)
Denis V. Lunev64c2d532008-01-22 22:03:33 -0800119{
120 struct fib_table *tb;
121 int table = RT_TABLE_MAIN;
122#ifdef CONFIG_IP_MULTIPLE_TABLES
123 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
124 return;
125 table = res->r->table;
126#endif
Denis V. Lunev010278e2008-01-22 22:04:04 -0800127 tb = fib_get_table(net, table);
Denis V. Lunev64c2d532008-01-22 22:03:33 -0800128 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000129 fib_table_select_default(tb, flp, res);
Denis V. Lunev64c2d532008-01-22 22:03:33 -0800130}
131
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800132static void fib_flush(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133{
134 int flushed = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700136 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800137 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700138 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700140 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800141 head = &net->ipv4.fib_table_hash[h];
142 hlist_for_each_entry(tb, node, head, tb_hlist)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000143 flushed += fib_table_flush(tb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145
146 if (flushed)
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700147 rt_cache_flush(net, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148}
149
150/*
151 * Find the first device with a given source address.
152 */
153
Denis V. Lunev1ab35272008-01-22 22:04:30 -0800154struct net_device * ip_dev_find(struct net *net, __be32 addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700155{
156 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
157 struct fib_result res;
158 struct net_device *dev = NULL;
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700159 struct fib_table *local_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160
161#ifdef CONFIG_IP_MULTIPLE_TABLES
162 res.r = NULL;
163#endif
164
Denis V. Lunev1ab35272008-01-22 22:04:30 -0800165 local_table = fib_get_table(net, RT_TABLE_LOCAL);
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000166 if (!local_table || fib_table_lookup(local_table, &fl, &res))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 return NULL;
168 if (res.type != RTN_LOCAL)
169 goto out;
170 dev = FIB_RES_DEV(res);
171
172 if (dev)
173 dev_hold(dev);
174out:
175 fib_res_put(&res);
176 return dev;
177}
178
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800179/*
180 * Find address type as if only "dev" was present in the system. If
181 * on_dev is NULL then all interfaces are taken into consideration.
182 */
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800183static inline unsigned __inet_dev_addr_type(struct net *net,
184 const struct net_device *dev,
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800185 __be32 addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186{
187 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
188 struct fib_result res;
189 unsigned ret = RTN_BROADCAST;
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700190 struct fib_table *local_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191
Jan Engelhardt1e637c72008-01-21 03:18:08 -0800192 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700193 return RTN_BROADCAST;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800194 if (ipv4_is_multicast(addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700195 return RTN_MULTICAST;
196
197#ifdef CONFIG_IP_MULTIPLE_TABLES
198 res.r = NULL;
199#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900200
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800201 local_table = fib_get_table(net, RT_TABLE_LOCAL);
Pavel Emelyanov03cf7862007-10-23 21:17:27 -0700202 if (local_table) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203 ret = RTN_UNICAST;
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000204 if (!fib_table_lookup(local_table, &fl, &res)) {
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800205 if (!dev || dev == res.fi->fib_dev)
206 ret = res.type;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207 fib_res_put(&res);
208 }
209 }
210 return ret;
211}
212
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800213unsigned int inet_addr_type(struct net *net, __be32 addr)
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800214{
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800215 return __inet_dev_addr_type(net, NULL, addr);
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800216}
217
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800218unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
219 __be32 addr)
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800220{
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800221 return __inet_dev_addr_type(net, dev, addr);
Laszlo Attila Toth05538112007-12-04 23:28:46 -0800222}
223
Linus Torvalds1da177e2005-04-16 15:20:36 -0700224/* Given (packet source, input interface) and optional (dst, oif, tos):
225 - (main) check, that source is valid i.e. not broadcast or our local
226 address.
227 - figure out what "logical" interface this packet arrived
228 and calculate "specific destination" address.
229 - check, that packet arrived from expected physical interface.
230 */
231
Al Virod9c9df82006-09-26 21:28:14 -0700232int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
jamalb0c110c2009-10-18 02:12:33 +0000233 struct net_device *dev, __be32 *spec_dst,
234 u32 *itag, u32 mark)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235{
236 struct in_device *in_dev;
237 struct flowi fl = { .nl_u = { .ip4_u =
238 { .daddr = src,
239 .saddr = dst,
240 .tos = tos } },
jamalb0c110c2009-10-18 02:12:33 +0000241 .mark = mark,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242 .iif = oif };
jamalb0c110c2009-10-18 02:12:33 +0000243
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244 struct fib_result res;
Patrick McHardy8153a102009-12-03 01:25:58 +0000245 int no_addr, rpf, accept_local;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246 int ret;
Denis V. Lunev5b707aa2008-01-21 17:33:15 -0800247 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248
Patrick McHardy8153a102009-12-03 01:25:58 +0000249 no_addr = rpf = accept_local = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250 rcu_read_lock();
Herbert Xue5ed6392005-10-03 14:35:55 -0700251 in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252 if (in_dev) {
253 no_addr = in_dev->ifa_list == NULL;
254 rpf = IN_DEV_RPFILTER(in_dev);
Patrick McHardy8153a102009-12-03 01:25:58 +0000255 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
Jamal Hadi Salim28f6aee2009-12-25 17:30:22 -0800256 if (mark && !IN_DEV_SRC_VMARK(in_dev))
257 fl.mark = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 }
259 rcu_read_unlock();
260
261 if (in_dev == NULL)
262 goto e_inval;
263
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900264 net = dev_net(dev);
Denis V. Lunev5b707aa2008-01-21 17:33:15 -0800265 if (fib_lookup(net, &fl, &res))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266 goto last_resort;
Patrick McHardy8153a102009-12-03 01:25:58 +0000267 if (res.type != RTN_UNICAST) {
268 if (res.type != RTN_LOCAL || !accept_local)
269 goto e_inval_res;
270 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 *spec_dst = FIB_RES_PREFSRC(res);
272 fib_combine_itag(itag, &res);
273#ifdef CONFIG_IP_ROUTE_MULTIPATH
274 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
275#else
276 if (FIB_RES_DEV(res) == dev)
277#endif
278 {
279 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
280 fib_res_put(&res);
281 return ret;
282 }
283 fib_res_put(&res);
284 if (no_addr)
285 goto last_resort;
Stephen Hemmingerc1cf8422009-02-20 08:25:36 +0000286 if (rpf == 1)
Eric Dumazetb5f7e752010-06-02 12:05:27 +0000287 goto e_rpf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700288 fl.oif = dev->ifindex;
289
290 ret = 0;
Denis V. Lunev5b707aa2008-01-21 17:33:15 -0800291 if (fib_lookup(net, &fl, &res) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292 if (res.type == RTN_UNICAST) {
293 *spec_dst = FIB_RES_PREFSRC(res);
294 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
295 }
296 fib_res_put(&res);
297 }
298 return ret;
299
300last_resort:
301 if (rpf)
Eric Dumazetb5f7e752010-06-02 12:05:27 +0000302 goto e_rpf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
304 *itag = 0;
305 return 0;
306
307e_inval_res:
308 fib_res_put(&res);
309e_inval:
310 return -EINVAL;
Eric Dumazetb5f7e752010-06-02 12:05:27 +0000311e_rpf:
312 return -EXDEV;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313}
314
Al Viro81f7bf62006-09-27 18:40:00 -0700315static inline __be32 sk_extract_addr(struct sockaddr *addr)
Thomas Graf4e902c52006-08-17 18:14:52 -0700316{
317 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
318}
319
320static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
321{
322 struct nlattr *nla;
323
324 nla = (struct nlattr *) ((char *) mx + len);
325 nla->nla_type = type;
326 nla->nla_len = nla_attr_size(4);
327 *(u32 *) nla_data(nla) = value;
328
329 return len + nla_total_size(4);
330}
331
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800332static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
Thomas Graf4e902c52006-08-17 18:14:52 -0700333 struct fib_config *cfg)
334{
Al Viro6d85c102006-09-26 22:15:46 -0700335 __be32 addr;
Thomas Graf4e902c52006-08-17 18:14:52 -0700336 int plen;
337
338 memset(cfg, 0, sizeof(*cfg));
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800339 cfg->fc_nlinfo.nl_net = net;
Thomas Graf4e902c52006-08-17 18:14:52 -0700340
341 if (rt->rt_dst.sa_family != AF_INET)
342 return -EAFNOSUPPORT;
343
344 /*
345 * Check mask for validity:
346 * a) it must be contiguous.
347 * b) destination must have all host bits clear.
348 * c) if application forgot to set correct family (AF_INET),
349 * reject request unless it is absolutely clear i.e.
350 * both family and mask are zero.
351 */
352 plen = 32;
353 addr = sk_extract_addr(&rt->rt_dst);
354 if (!(rt->rt_flags & RTF_HOST)) {
Al Viro81f7bf62006-09-27 18:40:00 -0700355 __be32 mask = sk_extract_addr(&rt->rt_genmask);
Thomas Graf4e902c52006-08-17 18:14:52 -0700356
357 if (rt->rt_genmask.sa_family != AF_INET) {
358 if (mask || rt->rt_genmask.sa_family)
359 return -EAFNOSUPPORT;
360 }
361
362 if (bad_mask(mask, addr))
363 return -EINVAL;
364
365 plen = inet_mask_len(mask);
366 }
367
368 cfg->fc_dst_len = plen;
369 cfg->fc_dst = addr;
370
371 if (cmd != SIOCDELRT) {
372 cfg->fc_nlflags = NLM_F_CREATE;
373 cfg->fc_protocol = RTPROT_BOOT;
374 }
375
376 if (rt->rt_metric)
377 cfg->fc_priority = rt->rt_metric - 1;
378
379 if (rt->rt_flags & RTF_REJECT) {
380 cfg->fc_scope = RT_SCOPE_HOST;
381 cfg->fc_type = RTN_UNREACHABLE;
382 return 0;
383 }
384
385 cfg->fc_scope = RT_SCOPE_NOWHERE;
386 cfg->fc_type = RTN_UNICAST;
387
388 if (rt->rt_dev) {
389 char *colon;
390 struct net_device *dev;
391 char devname[IFNAMSIZ];
392
393 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
394 return -EFAULT;
395
396 devname[IFNAMSIZ-1] = 0;
397 colon = strchr(devname, ':');
398 if (colon)
399 *colon = 0;
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800400 dev = __dev_get_by_name(net, devname);
Thomas Graf4e902c52006-08-17 18:14:52 -0700401 if (!dev)
402 return -ENODEV;
403 cfg->fc_oif = dev->ifindex;
404 if (colon) {
405 struct in_ifaddr *ifa;
406 struct in_device *in_dev = __in_dev_get_rtnl(dev);
407 if (!in_dev)
408 return -ENODEV;
409 *colon = ':';
410 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
411 if (strcmp(ifa->ifa_label, devname) == 0)
412 break;
413 if (ifa == NULL)
414 return -ENODEV;
415 cfg->fc_prefsrc = ifa->ifa_local;
416 }
417 }
418
419 addr = sk_extract_addr(&rt->rt_gateway);
420 if (rt->rt_gateway.sa_family == AF_INET && addr) {
421 cfg->fc_gw = addr;
422 if (rt->rt_flags & RTF_GATEWAY &&
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800423 inet_addr_type(net, addr) == RTN_UNICAST)
Thomas Graf4e902c52006-08-17 18:14:52 -0700424 cfg->fc_scope = RT_SCOPE_UNIVERSE;
425 }
426
427 if (cmd == SIOCDELRT)
428 return 0;
429
430 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
431 return -EINVAL;
432
433 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
434 cfg->fc_scope = RT_SCOPE_LINK;
435
436 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
437 struct nlattr *mx;
438 int len = 0;
439
440 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900441 if (mx == NULL)
Thomas Graf4e902c52006-08-17 18:14:52 -0700442 return -ENOMEM;
443
444 if (rt->rt_flags & RTF_MTU)
445 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
446
447 if (rt->rt_flags & RTF_WINDOW)
448 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
449
450 if (rt->rt_flags & RTF_IRTT)
451 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
452
453 cfg->fc_mx = mx;
454 cfg->fc_mx_len = len;
455 }
456
457 return 0;
458}
459
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460/*
461 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
462 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900463
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800464int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465{
Thomas Graf4e902c52006-08-17 18:14:52 -0700466 struct fib_config cfg;
467 struct rtentry rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700469
470 switch (cmd) {
471 case SIOCADDRT: /* Add a route */
472 case SIOCDELRT: /* Delete a route */
473 if (!capable(CAP_NET_ADMIN))
474 return -EPERM;
Thomas Graf4e902c52006-08-17 18:14:52 -0700475
476 if (copy_from_user(&rt, arg, sizeof(rt)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700477 return -EFAULT;
Thomas Graf4e902c52006-08-17 18:14:52 -0700478
Linus Torvalds1da177e2005-04-16 15:20:36 -0700479 rtnl_lock();
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800480 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700481 if (err == 0) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700482 struct fib_table *tb;
483
Linus Torvalds1da177e2005-04-16 15:20:36 -0700484 if (cmd == SIOCDELRT) {
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800485 tb = fib_get_table(net, cfg.fc_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700486 if (tb)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000487 err = fib_table_delete(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700488 else
489 err = -ESRCH;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490 } else {
Denis V. Lunev1bad1182008-01-10 03:29:53 -0800491 tb = fib_new_table(net, cfg.fc_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492 if (tb)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000493 err = fib_table_insert(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700494 else
495 err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700496 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700497
498 /* allocated by rtentry_to_fib_config() */
499 kfree(cfg.fc_mx);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500 }
501 rtnl_unlock();
502 return err;
503 }
504 return -EINVAL;
505}
506
Patrick McHardyef7c79e2007-06-05 12:38:30 -0700507const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
Thomas Graf4e902c52006-08-17 18:14:52 -0700508 [RTA_DST] = { .type = NLA_U32 },
509 [RTA_SRC] = { .type = NLA_U32 },
510 [RTA_IIF] = { .type = NLA_U32 },
511 [RTA_OIF] = { .type = NLA_U32 },
512 [RTA_GATEWAY] = { .type = NLA_U32 },
513 [RTA_PRIORITY] = { .type = NLA_U32 },
514 [RTA_PREFSRC] = { .type = NLA_U32 },
515 [RTA_METRICS] = { .type = NLA_NESTED },
Thomas Graf5176f912006-08-26 20:13:18 -0700516 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
Thomas Graf4e902c52006-08-17 18:14:52 -0700517 [RTA_FLOW] = { .type = NLA_U32 },
Thomas Graf4e902c52006-08-17 18:14:52 -0700518};
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800520static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
521 struct nlmsghdr *nlh, struct fib_config *cfg)
Thomas Graf4e902c52006-08-17 18:14:52 -0700522{
523 struct nlattr *attr;
524 int err, remaining;
525 struct rtmsg *rtm;
526
527 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
528 if (err < 0)
529 goto errout;
530
531 memset(cfg, 0, sizeof(*cfg));
532
533 rtm = nlmsg_data(nlh);
Thomas Graf4e902c52006-08-17 18:14:52 -0700534 cfg->fc_dst_len = rtm->rtm_dst_len;
Thomas Graf4e902c52006-08-17 18:14:52 -0700535 cfg->fc_tos = rtm->rtm_tos;
536 cfg->fc_table = rtm->rtm_table;
537 cfg->fc_protocol = rtm->rtm_protocol;
538 cfg->fc_scope = rtm->rtm_scope;
539 cfg->fc_type = rtm->rtm_type;
540 cfg->fc_flags = rtm->rtm_flags;
541 cfg->fc_nlflags = nlh->nlmsg_flags;
542
543 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
544 cfg->fc_nlinfo.nlh = nlh;
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800545 cfg->fc_nlinfo.nl_net = net;
Thomas Graf4e902c52006-08-17 18:14:52 -0700546
Thomas Grafa0ee18b2007-03-24 20:32:54 -0700547 if (cfg->fc_type > RTN_MAX) {
548 err = -EINVAL;
549 goto errout;
550 }
551
Thomas Graf4e902c52006-08-17 18:14:52 -0700552 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +0200553 switch (nla_type(attr)) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700554 case RTA_DST:
Al Viro17fb2c62006-09-26 22:15:25 -0700555 cfg->fc_dst = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700556 break;
Thomas Graf4e902c52006-08-17 18:14:52 -0700557 case RTA_OIF:
558 cfg->fc_oif = nla_get_u32(attr);
559 break;
560 case RTA_GATEWAY:
Al Viro17fb2c62006-09-26 22:15:25 -0700561 cfg->fc_gw = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700562 break;
563 case RTA_PRIORITY:
564 cfg->fc_priority = nla_get_u32(attr);
565 break;
566 case RTA_PREFSRC:
Al Viro17fb2c62006-09-26 22:15:25 -0700567 cfg->fc_prefsrc = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700568 break;
569 case RTA_METRICS:
570 cfg->fc_mx = nla_data(attr);
571 cfg->fc_mx_len = nla_len(attr);
572 break;
573 case RTA_MULTIPATH:
574 cfg->fc_mp = nla_data(attr);
575 cfg->fc_mp_len = nla_len(attr);
576 break;
577 case RTA_FLOW:
578 cfg->fc_flow = nla_get_u32(attr);
579 break;
Thomas Graf4e902c52006-08-17 18:14:52 -0700580 case RTA_TABLE:
581 cfg->fc_table = nla_get_u32(attr);
582 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583 }
584 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700585
Linus Torvalds1da177e2005-04-16 15:20:36 -0700586 return 0;
Thomas Graf4e902c52006-08-17 18:14:52 -0700587errout:
588 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589}
590
Jianjun Kong6ed25332008-11-03 00:25:16 -0800591static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900593 struct net *net = sock_net(skb->sk);
Thomas Graf4e902c52006-08-17 18:14:52 -0700594 struct fib_config cfg;
595 struct fib_table *tb;
596 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800598 err = rtm_to_fib_config(net, skb, nlh, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700599 if (err < 0)
600 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601
Denis V. Lunev8ad49422008-01-10 03:24:11 -0800602 tb = fib_get_table(net, cfg.fc_table);
Thomas Graf4e902c52006-08-17 18:14:52 -0700603 if (tb == NULL) {
604 err = -ESRCH;
605 goto errout;
606 }
607
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000608 err = fib_table_delete(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700609errout:
610 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700611}
612
Jianjun Kong6ed25332008-11-03 00:25:16 -0800613static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900615 struct net *net = sock_net(skb->sk);
Thomas Graf4e902c52006-08-17 18:14:52 -0700616 struct fib_config cfg;
617 struct fib_table *tb;
618 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800620 err = rtm_to_fib_config(net, skb, nlh, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700621 if (err < 0)
622 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623
Denis V. Lunev226b0b4a52008-01-10 03:30:24 -0800624 tb = fib_new_table(net, cfg.fc_table);
Thomas Graf4e902c52006-08-17 18:14:52 -0700625 if (tb == NULL) {
626 err = -ENOBUFS;
627 goto errout;
628 }
629
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000630 err = fib_table_insert(tb, &cfg);
Thomas Graf4e902c52006-08-17 18:14:52 -0700631errout:
632 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633}
634
Thomas Graf63f34442007-03-22 11:55:17 -0700635static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900637 struct net *net = sock_net(skb->sk);
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700638 unsigned int h, s_h;
639 unsigned int e = 0, s_e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700641 struct hlist_node *node;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800642 struct hlist_head *head;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700643 int dumped = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644
Thomas Grafbe403ea2006-08-17 18:15:17 -0700645 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
646 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647 return ip_rt_dump(skb, cb);
648
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700649 s_h = cb->args[0];
650 s_e = cb->args[1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700651
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700652 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
653 e = 0;
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800654 head = &net->ipv4.fib_table_hash[h];
655 hlist_for_each_entry(tb, node, head, tb_hlist) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700656 if (e < s_e)
657 goto next;
658 if (dumped)
659 memset(&cb->args[2], 0, sizeof(cb->args) -
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900660 2 * sizeof(cb->args[0]));
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000661 if (fib_table_dump(tb, skb, cb) < 0)
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700662 goto out;
663 dumped = 1;
664next:
665 e++;
666 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667 }
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700668out:
669 cb->args[1] = e;
670 cb->args[0] = h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700671
672 return skb->len;
673}
674
675/* Prepare and feed intra-kernel routing request.
676 Really, it should be netlink message, but :-( netlink
677 can be not configured, so that we feed it directly
678 to fib engine. It is legal, because all events occur
679 only when netlink is already locked.
680 */
681
Al Viro81f7bf62006-09-27 18:40:00 -0700682static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900684 struct net *net = dev_net(ifa->ifa_dev->dev);
Thomas Graf4e902c52006-08-17 18:14:52 -0700685 struct fib_table *tb;
686 struct fib_config cfg = {
687 .fc_protocol = RTPROT_KERNEL,
688 .fc_type = type,
689 .fc_dst = dst,
690 .fc_dst_len = dst_len,
691 .fc_prefsrc = ifa->ifa_local,
692 .fc_oif = ifa->ifa_dev->dev->ifindex,
693 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800694 .fc_nlinfo = {
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800695 .nl_net = net,
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800696 },
Thomas Graf4e902c52006-08-17 18:14:52 -0700697 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700698
699 if (type == RTN_UNICAST)
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800700 tb = fib_new_table(net, RT_TABLE_MAIN);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700701 else
Denis V. Lunev4b5d47d2008-01-10 03:29:23 -0800702 tb = fib_new_table(net, RT_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700703
704 if (tb == NULL)
705 return;
706
Thomas Graf4e902c52006-08-17 18:14:52 -0700707 cfg.fc_table = tb->tb_id;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708
Thomas Graf4e902c52006-08-17 18:14:52 -0700709 if (type != RTN_LOCAL)
710 cfg.fc_scope = RT_SCOPE_LINK;
711 else
712 cfg.fc_scope = RT_SCOPE_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700713
714 if (cmd == RTM_NEWROUTE)
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000715 fib_table_insert(tb, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716 else
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000717 fib_table_delete(tb, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700718}
719
Jamal Hadi Salim0ff60a42005-11-22 14:47:37 -0800720void fib_add_ifaddr(struct in_ifaddr *ifa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700721{
722 struct in_device *in_dev = ifa->ifa_dev;
723 struct net_device *dev = in_dev->dev;
724 struct in_ifaddr *prim = ifa;
Al Viroa144ea42006-09-28 18:00:55 -0700725 __be32 mask = ifa->ifa_mask;
726 __be32 addr = ifa->ifa_local;
727 __be32 prefix = ifa->ifa_address&mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700728
729 if (ifa->ifa_flags&IFA_F_SECONDARY) {
730 prim = inet_ifa_byprefix(in_dev, prefix, mask);
731 if (prim == NULL) {
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800732 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733 return;
734 }
735 }
736
737 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
738
739 if (!(dev->flags&IFF_UP))
740 return;
741
742 /* Add broadcast address, if it is explicitly assigned. */
Al Viroa144ea42006-09-28 18:00:55 -0700743 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700744 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
745
Joe Perchesf97c1e02007-12-16 13:45:43 -0800746 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747 (prefix != addr || ifa->ifa_prefixlen < 32)) {
748 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
749 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
750
751 /* Add network specific broadcasts, when it takes a sense */
752 if (ifa->ifa_prefixlen < 31) {
753 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
754 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
755 }
756 }
757}
758
759static void fib_del_ifaddr(struct in_ifaddr *ifa)
760{
761 struct in_device *in_dev = ifa->ifa_dev;
762 struct net_device *dev = in_dev->dev;
763 struct in_ifaddr *ifa1;
764 struct in_ifaddr *prim = ifa;
Al Viroa144ea42006-09-28 18:00:55 -0700765 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
766 __be32 any = ifa->ifa_address&ifa->ifa_mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700767#define LOCAL_OK 1
768#define BRD_OK 2
769#define BRD0_OK 4
770#define BRD1_OK 8
771 unsigned ok = 0;
772
773 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
774 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
775 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
776 else {
777 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
778 if (prim == NULL) {
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800779 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780 return;
781 }
782 }
783
784 /* Deletion is more complicated than add.
785 We should take care of not to delete too much :-)
786
787 Scan address list to be sure that addresses are really gone.
788 */
789
790 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
791 if (ifa->ifa_local == ifa1->ifa_local)
792 ok |= LOCAL_OK;
793 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
794 ok |= BRD_OK;
795 if (brd == ifa1->ifa_broadcast)
796 ok |= BRD1_OK;
797 if (any == ifa1->ifa_broadcast)
798 ok |= BRD0_OK;
799 }
800
801 if (!(ok&BRD_OK))
802 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
803 if (!(ok&BRD1_OK))
804 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
805 if (!(ok&BRD0_OK))
806 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
807 if (!(ok&LOCAL_OK)) {
808 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
809
810 /* Check, that this local address finally disappeared. */
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900811 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812 /* And the last, but not the least thing.
813 We must flush stray FIB entries.
814
815 First of all, we scan fib_info list searching
816 for stray nexthop entries, then ignite fib_flush.
817 */
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900818 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
819 fib_flush(dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700820 }
821 }
822#undef LOCAL_OK
823#undef BRD_OK
824#undef BRD0_OK
825#undef BRD1_OK
826}
827
Robert Olsson246955f2005-06-20 13:36:39 -0700828static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
829{
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900830
Robert Olsson246955f2005-06-20 13:36:39 -0700831 struct fib_result res;
Thomas Graf5f3008932006-11-09 15:21:41 -0800832 struct flowi fl = { .mark = frn->fl_mark,
Thomas Graf47dcf0c2006-11-09 15:20:38 -0800833 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
Robert Olsson246955f2005-06-20 13:36:39 -0700834 .tos = frn->fl_tos,
835 .scope = frn->fl_scope } } };
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700836
Sergey Vlasov912a41a2007-04-27 02:17:19 -0700837#ifdef CONFIG_IP_MULTIPLE_TABLES
838 res.r = NULL;
839#endif
840
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700841 frn->err = -ENOENT;
Robert Olsson246955f2005-06-20 13:36:39 -0700842 if (tb) {
843 local_bh_disable();
844
845 frn->tb_id = tb->tb_id;
Stephen Hemminger16c6cf82009-09-20 10:35:36 +0000846 frn->err = fib_table_lookup(tb, &fl, &res);
Robert Olsson246955f2005-06-20 13:36:39 -0700847
848 if (!frn->err) {
849 frn->prefixlen = res.prefixlen;
850 frn->nh_sel = res.nh_sel;
851 frn->type = res.type;
852 frn->scope = res.scope;
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700853 fib_res_put(&res);
Robert Olsson246955f2005-06-20 13:36:39 -0700854 }
855 local_bh_enable();
856 }
857}
858
David S. Miller28f7b0362007-10-10 21:32:39 -0700859static void nl_fib_input(struct sk_buff *skb)
Robert Olsson246955f2005-06-20 13:36:39 -0700860{
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800861 struct net *net;
Robert Olsson246955f2005-06-20 13:36:39 -0700862 struct fib_result_nl *frn;
David S. Miller28f7b0362007-10-10 21:32:39 -0700863 struct nlmsghdr *nlh;
Robert Olsson246955f2005-06-20 13:36:39 -0700864 struct fib_table *tb;
David S. Miller28f7b0362007-10-10 21:32:39 -0700865 u32 pid;
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700866
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900867 net = sock_net(skb->sk);
Arnaldo Carvalho de Melob529ccf2007-04-25 19:08:35 -0700868 nlh = nlmsg_hdr(skb);
Thomas Grafea865752005-12-01 14:30:00 -0800869 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
Denis V. Lunevd883a032007-12-21 02:01:53 -0800870 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
Thomas Grafea865752005-12-01 14:30:00 -0800871 return;
Denis V. Lunevd883a032007-12-21 02:01:53 -0800872
873 skb = skb_clone(skb, GFP_KERNEL);
874 if (skb == NULL)
875 return;
876 nlh = nlmsg_hdr(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900877
Robert Olsson246955f2005-06-20 13:36:39 -0700878 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800879 tb = fib_get_table(net, frn->tb_id_in);
Robert Olsson246955f2005-06-20 13:36:39 -0700880
881 nl_fib_lookup(frn, tb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900882
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700883 pid = NETLINK_CB(skb).pid; /* pid of sending process */
Robert Olsson246955f2005-06-20 13:36:39 -0700884 NETLINK_CB(skb).pid = 0; /* from kernel */
Patrick McHardyac6d4392005-08-14 19:29:52 -0700885 NETLINK_CB(skb).dst_group = 0; /* unicast */
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800886 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900887}
Robert Olsson246955f2005-06-20 13:36:39 -0700888
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +0000889static int __net_init nl_fib_lookup_init(struct net *net)
Robert Olsson246955f2005-06-20 13:36:39 -0700890{
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800891 struct sock *sk;
892 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
893 nl_fib_input, NULL, THIS_MODULE);
894 if (sk == NULL)
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800895 return -EAFNOSUPPORT;
Denis V. Lunev6bd48fc2008-01-10 03:28:55 -0800896 net->ipv4.fibnl = sk;
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800897 return 0;
898}
899
900static void nl_fib_lookup_exit(struct net *net)
901{
Denis V. Lunevb7c6ba62008-01-28 14:41:19 -0800902 netlink_kernel_release(net->ipv4.fibnl);
Denis V. Lunev775516b2008-01-18 23:55:19 -0800903 net->ipv4.fibnl = NULL;
Robert Olsson246955f2005-06-20 13:36:39 -0700904}
905
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000906static void fib_disable_ip(struct net_device *dev, int force, int delay)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907{
Denis V. Lunev85326fa2008-01-31 18:48:47 -0800908 if (fib_sync_down_dev(dev, force))
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900909 fib_flush(dev_net(dev));
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000910 rt_cache_flush(dev_net(dev), delay);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911 arp_ifdown(dev);
912}
913
914static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
915{
Jianjun Kong6ed25332008-11-03 00:25:16 -0800916 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700917 struct net_device *dev = ifa->ifa_dev->dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918
919 switch (event) {
920 case NETDEV_UP:
921 fib_add_ifaddr(ifa);
922#ifdef CONFIG_IP_ROUTE_MULTIPATH
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700923 fib_sync_up(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700924#endif
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700925 rt_cache_flush(dev_net(dev), -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700926 break;
927 case NETDEV_DOWN:
928 fib_del_ifaddr(ifa);
Jayachandran C9fcc2e82005-10-27 15:10:01 -0700929 if (ifa->ifa_dev->ifa_list == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700930 /* Last address was deleted from this interface.
931 Disable IP.
932 */
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000933 fib_disable_ip(dev, 1, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934 } else {
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700935 rt_cache_flush(dev_net(dev), -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700936 }
937 break;
938 }
939 return NOTIFY_DONE;
940}
941
942static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
943{
944 struct net_device *dev = ptr;
Herbert Xue5ed6392005-10-03 14:35:55 -0700945 struct in_device *in_dev = __in_dev_get_rtnl(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946
947 if (event == NETDEV_UNREGISTER) {
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000948 fib_disable_ip(dev, 2, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700949 return NOTIFY_DONE;
950 }
951
952 if (!in_dev)
953 return NOTIFY_DONE;
954
955 switch (event) {
956 case NETDEV_UP:
957 for_ifa(in_dev) {
958 fib_add_ifaddr(ifa);
959 } endfor_ifa(in_dev);
960#ifdef CONFIG_IP_ROUTE_MULTIPATH
961 fib_sync_up(dev);
962#endif
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700963 rt_cache_flush(dev_net(dev), -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964 break;
965 case NETDEV_DOWN:
Octavian Purdilae2ce1462009-11-16 13:49:49 +0000966 fib_disable_ip(dev, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967 break;
968 case NETDEV_CHANGEMTU:
969 case NETDEV_CHANGE:
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -0700970 rt_cache_flush(dev_net(dev), 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971 break;
Eric W. Biedermana5ee1552009-11-29 15:45:58 +0000972 case NETDEV_UNREGISTER_BATCH:
973 rt_cache_flush_batch();
974 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700975 }
976 return NOTIFY_DONE;
977}
978
979static struct notifier_block fib_inetaddr_notifier = {
Jianjun Kong6ed25332008-11-03 00:25:16 -0800980 .notifier_call = fib_inetaddr_event,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981};
982
983static struct notifier_block fib_netdev_notifier = {
Jianjun Kong6ed25332008-11-03 00:25:16 -0800984 .notifier_call = fib_netdev_event,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700985};
986
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -0800987static int __net_init ip_fib_net_init(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700988{
Denis V. Lunevdce5cbe2008-01-31 18:44:53 -0800989 int err;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700990 unsigned int i;
991
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800992 net->ipv4.fib_table_hash = kzalloc(
993 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
994 if (net->ipv4.fib_table_hash == NULL)
995 return -ENOMEM;
996
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700997 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
Denis V. Luneve4aef8a2008-01-10 03:28:24 -0800998 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
Pavel Emelyanovc3e9a352007-11-06 23:34:04 -0800999
Denis V. Lunevdce5cbe2008-01-31 18:44:53 -08001000 err = fib4_rules_init(net);
1001 if (err < 0)
1002 goto fail;
1003 return 0;
1004
1005fail:
1006 kfree(net->ipv4.fib_table_hash);
1007 return err;
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001008}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00001010static void ip_fib_net_exit(struct net *net)
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001011{
1012 unsigned int i;
Thomas Graf63f34442007-03-22 11:55:17 -07001013
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001014#ifdef CONFIG_IP_MULTIPLE_TABLES
1015 fib4_rules_exit(net);
1016#endif
1017
1018 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1019 struct fib_table *tb;
1020 struct hlist_head *head;
1021 struct hlist_node *node, *tmp;
1022
Denis V. Luneve4aef8a2008-01-10 03:28:24 -08001023 head = &net->ipv4.fib_table_hash[i];
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001024 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1025 hlist_del(node);
Stephen Hemminger16c6cf82009-09-20 10:35:36 +00001026 fib_table_flush(tb);
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001027 kfree(tb);
1028 }
1029 }
Denis V. Luneve4aef8a2008-01-10 03:28:24 -08001030 kfree(net->ipv4.fib_table_hash);
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001031}
1032
1033static int __net_init fib_net_init(struct net *net)
1034{
1035 int error;
1036
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001037 error = ip_fib_net_init(net);
1038 if (error < 0)
1039 goto out;
1040 error = nl_fib_lookup_init(net);
1041 if (error < 0)
1042 goto out_nlfl;
1043 error = fib_proc_init(net);
1044 if (error < 0)
1045 goto out_proc;
1046out:
1047 return error;
1048
1049out_proc:
1050 nl_fib_lookup_exit(net);
1051out_nlfl:
1052 ip_fib_net_exit(net);
1053 goto out;
1054}
1055
1056static void __net_exit fib_net_exit(struct net *net)
1057{
1058 fib_proc_exit(net);
1059 nl_fib_lookup_exit(net);
1060 ip_fib_net_exit(net);
1061}
1062
1063static struct pernet_operations fib_net_ops = {
1064 .init = fib_net_init,
1065 .exit = fib_net_exit,
1066};
1067
1068void __init ip_fib_init(void)
1069{
Thomas Graf63f34442007-03-22 11:55:17 -07001070 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1071 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1072 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
Denis V. Lunev7b1a74f2008-01-10 03:22:17 -08001073
1074 register_pernet_subsys(&fib_net_ops);
1075 register_netdevice_notifier(&fib_netdev_notifier);
1076 register_inetaddr_notifier(&fib_inetaddr_notifier);
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08001077
1078 fib_hash_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001079}
1080
1081EXPORT_SYMBOL(inet_addr_type);
Laszlo Attila Toth05538112007-12-04 23:28:46 -08001082EXPORT_SYMBOL(inet_dev_addr_type);
Sean Heftya1e87332006-06-17 20:37:28 -07001083EXPORT_SYMBOL(ip_dev_find);