blob: a5cba2349605e75528216ff9936ceeb2db32a14d [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080022#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/types.h>
24#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070025#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020032#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/netdevice.h>
Thomas Graf18237302006-08-04 23:04:54 -070034#include <linux/if_addr.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035#include <linux/if_arp.h>
36#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070037#include <linux/init.h>
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070038#include <linux/list.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
Thomas Graf63f34442007-03-22 11:55:17 -070048#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070049
50#define FFprint(a...) printk(KERN_DEBUG a)
51
52#ifndef CONFIG_IP_MULTIPLE_TABLES
53
Linus Torvalds1da177e2005-04-16 15:20:36 -070054struct fib_table *ip_fib_local_table;
55struct fib_table *ip_fib_main_table;
56
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070057#define FIB_TABLE_HASHSZ 1
58static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
59
Linus Torvalds1da177e2005-04-16 15:20:36 -070060#else
61
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070062#define FIB_TABLE_HASHSZ 256
63static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
Linus Torvalds1da177e2005-04-16 15:20:36 -070064
Denis V. Lunevcd40b7d2007-10-10 21:15:29 -070065static struct sock *fibnl = NULL;
66
67
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070068struct fib_table *fib_new_table(u32 id)
Linus Torvalds1da177e2005-04-16 15:20:36 -070069{
70 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070071 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -070072
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070073 if (id == 0)
74 id = RT_TABLE_MAIN;
75 tb = fib_get_table(id);
76 if (tb)
77 return tb;
Linus Torvalds1da177e2005-04-16 15:20:36 -070078 tb = fib_hash_init(id);
79 if (!tb)
80 return NULL;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070081 h = id & (FIB_TABLE_HASHSZ - 1);
82 hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
Linus Torvalds1da177e2005-04-16 15:20:36 -070083 return tb;
84}
85
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070086struct fib_table *fib_get_table(u32 id)
87{
88 struct fib_table *tb;
89 struct hlist_node *node;
90 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -070091
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070092 if (id == 0)
93 id = RT_TABLE_MAIN;
94 h = id & (FIB_TABLE_HASHSZ - 1);
95 rcu_read_lock();
96 hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
97 if (tb->tb_id == id) {
98 rcu_read_unlock();
99 return tb;
100 }
101 }
102 rcu_read_unlock();
103 return NULL;
104}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105#endif /* CONFIG_IP_MULTIPLE_TABLES */
106
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107static void fib_flush(void)
108{
109 int flushed = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700110 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700111 struct hlist_node *node;
112 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700113
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700114 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
115 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
116 flushed += tb->tb_flush(tb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118
119 if (flushed)
120 rt_cache_flush(-1);
121}
122
123/*
124 * Find the first device with a given source address.
125 */
126
Al Viro60cad5d2006-09-26 22:17:09 -0700127struct net_device * ip_dev_find(__be32 addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128{
129 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
130 struct fib_result res;
131 struct net_device *dev = NULL;
132
133#ifdef CONFIG_IP_MULTIPLE_TABLES
134 res.r = NULL;
135#endif
136
137 if (!ip_fib_local_table ||
138 ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res))
139 return NULL;
140 if (res.type != RTN_LOCAL)
141 goto out;
142 dev = FIB_RES_DEV(res);
143
144 if (dev)
145 dev_hold(dev);
146out:
147 fib_res_put(&res);
148 return dev;
149}
150
Al Virofd683222006-09-26 22:17:51 -0700151unsigned inet_addr_type(__be32 addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152{
153 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
154 struct fib_result res;
155 unsigned ret = RTN_BROADCAST;
156
157 if (ZERONET(addr) || BADCLASS(addr))
158 return RTN_BROADCAST;
159 if (MULTICAST(addr))
160 return RTN_MULTICAST;
161
162#ifdef CONFIG_IP_MULTIPLE_TABLES
163 res.r = NULL;
164#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900165
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166 if (ip_fib_local_table) {
167 ret = RTN_UNICAST;
168 if (!ip_fib_local_table->tb_lookup(ip_fib_local_table,
169 &fl, &res)) {
170 ret = res.type;
171 fib_res_put(&res);
172 }
173 }
174 return ret;
175}
176
177/* Given (packet source, input interface) and optional (dst, oif, tos):
178 - (main) check, that source is valid i.e. not broadcast or our local
179 address.
180 - figure out what "logical" interface this packet arrived
181 and calculate "specific destination" address.
182 - check, that packet arrived from expected physical interface.
183 */
184
Al Virod9c9df82006-09-26 21:28:14 -0700185int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
186 struct net_device *dev, __be32 *spec_dst, u32 *itag)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187{
188 struct in_device *in_dev;
189 struct flowi fl = { .nl_u = { .ip4_u =
190 { .daddr = src,
191 .saddr = dst,
192 .tos = tos } },
193 .iif = oif };
194 struct fib_result res;
195 int no_addr, rpf;
196 int ret;
197
198 no_addr = rpf = 0;
199 rcu_read_lock();
Herbert Xue5ed6392005-10-03 14:35:55 -0700200 in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201 if (in_dev) {
202 no_addr = in_dev->ifa_list == NULL;
203 rpf = IN_DEV_RPFILTER(in_dev);
204 }
205 rcu_read_unlock();
206
207 if (in_dev == NULL)
208 goto e_inval;
209
210 if (fib_lookup(&fl, &res))
211 goto last_resort;
212 if (res.type != RTN_UNICAST)
213 goto e_inval_res;
214 *spec_dst = FIB_RES_PREFSRC(res);
215 fib_combine_itag(itag, &res);
216#ifdef CONFIG_IP_ROUTE_MULTIPATH
217 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
218#else
219 if (FIB_RES_DEV(res) == dev)
220#endif
221 {
222 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
223 fib_res_put(&res);
224 return ret;
225 }
226 fib_res_put(&res);
227 if (no_addr)
228 goto last_resort;
229 if (rpf)
230 goto e_inval;
231 fl.oif = dev->ifindex;
232
233 ret = 0;
234 if (fib_lookup(&fl, &res) == 0) {
235 if (res.type == RTN_UNICAST) {
236 *spec_dst = FIB_RES_PREFSRC(res);
237 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
238 }
239 fib_res_put(&res);
240 }
241 return ret;
242
243last_resort:
244 if (rpf)
245 goto e_inval;
246 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
247 *itag = 0;
248 return 0;
249
250e_inval_res:
251 fib_res_put(&res);
252e_inval:
253 return -EINVAL;
254}
255
Al Viro81f7bf62006-09-27 18:40:00 -0700256static inline __be32 sk_extract_addr(struct sockaddr *addr)
Thomas Graf4e902c52006-08-17 18:14:52 -0700257{
258 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
259}
260
261static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
262{
263 struct nlattr *nla;
264
265 nla = (struct nlattr *) ((char *) mx + len);
266 nla->nla_type = type;
267 nla->nla_len = nla_attr_size(4);
268 *(u32 *) nla_data(nla) = value;
269
270 return len + nla_total_size(4);
271}
272
273static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
274 struct fib_config *cfg)
275{
Al Viro6d85c102006-09-26 22:15:46 -0700276 __be32 addr;
Thomas Graf4e902c52006-08-17 18:14:52 -0700277 int plen;
278
279 memset(cfg, 0, sizeof(*cfg));
280
281 if (rt->rt_dst.sa_family != AF_INET)
282 return -EAFNOSUPPORT;
283
284 /*
285 * Check mask for validity:
286 * a) it must be contiguous.
287 * b) destination must have all host bits clear.
288 * c) if application forgot to set correct family (AF_INET),
289 * reject request unless it is absolutely clear i.e.
290 * both family and mask are zero.
291 */
292 plen = 32;
293 addr = sk_extract_addr(&rt->rt_dst);
294 if (!(rt->rt_flags & RTF_HOST)) {
Al Viro81f7bf62006-09-27 18:40:00 -0700295 __be32 mask = sk_extract_addr(&rt->rt_genmask);
Thomas Graf4e902c52006-08-17 18:14:52 -0700296
297 if (rt->rt_genmask.sa_family != AF_INET) {
298 if (mask || rt->rt_genmask.sa_family)
299 return -EAFNOSUPPORT;
300 }
301
302 if (bad_mask(mask, addr))
303 return -EINVAL;
304
305 plen = inet_mask_len(mask);
306 }
307
308 cfg->fc_dst_len = plen;
309 cfg->fc_dst = addr;
310
311 if (cmd != SIOCDELRT) {
312 cfg->fc_nlflags = NLM_F_CREATE;
313 cfg->fc_protocol = RTPROT_BOOT;
314 }
315
316 if (rt->rt_metric)
317 cfg->fc_priority = rt->rt_metric - 1;
318
319 if (rt->rt_flags & RTF_REJECT) {
320 cfg->fc_scope = RT_SCOPE_HOST;
321 cfg->fc_type = RTN_UNREACHABLE;
322 return 0;
323 }
324
325 cfg->fc_scope = RT_SCOPE_NOWHERE;
326 cfg->fc_type = RTN_UNICAST;
327
328 if (rt->rt_dev) {
329 char *colon;
330 struct net_device *dev;
331 char devname[IFNAMSIZ];
332
333 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
334 return -EFAULT;
335
336 devname[IFNAMSIZ-1] = 0;
337 colon = strchr(devname, ':');
338 if (colon)
339 *colon = 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -0700340 dev = __dev_get_by_name(&init_net, devname);
Thomas Graf4e902c52006-08-17 18:14:52 -0700341 if (!dev)
342 return -ENODEV;
343 cfg->fc_oif = dev->ifindex;
344 if (colon) {
345 struct in_ifaddr *ifa;
346 struct in_device *in_dev = __in_dev_get_rtnl(dev);
347 if (!in_dev)
348 return -ENODEV;
349 *colon = ':';
350 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
351 if (strcmp(ifa->ifa_label, devname) == 0)
352 break;
353 if (ifa == NULL)
354 return -ENODEV;
355 cfg->fc_prefsrc = ifa->ifa_local;
356 }
357 }
358
359 addr = sk_extract_addr(&rt->rt_gateway);
360 if (rt->rt_gateway.sa_family == AF_INET && addr) {
361 cfg->fc_gw = addr;
362 if (rt->rt_flags & RTF_GATEWAY &&
363 inet_addr_type(addr) == RTN_UNICAST)
364 cfg->fc_scope = RT_SCOPE_UNIVERSE;
365 }
366
367 if (cmd == SIOCDELRT)
368 return 0;
369
370 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
371 return -EINVAL;
372
373 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
374 cfg->fc_scope = RT_SCOPE_LINK;
375
376 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
377 struct nlattr *mx;
378 int len = 0;
379
380 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900381 if (mx == NULL)
Thomas Graf4e902c52006-08-17 18:14:52 -0700382 return -ENOMEM;
383
384 if (rt->rt_flags & RTF_MTU)
385 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
386
387 if (rt->rt_flags & RTF_WINDOW)
388 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
389
390 if (rt->rt_flags & RTF_IRTT)
391 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
392
393 cfg->fc_mx = mx;
394 cfg->fc_mx_len = len;
395 }
396
397 return 0;
398}
399
Linus Torvalds1da177e2005-04-16 15:20:36 -0700400/*
401 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
402 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900403
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404int ip_rt_ioctl(unsigned int cmd, void __user *arg)
405{
Thomas Graf4e902c52006-08-17 18:14:52 -0700406 struct fib_config cfg;
407 struct rtentry rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409
410 switch (cmd) {
411 case SIOCADDRT: /* Add a route */
412 case SIOCDELRT: /* Delete a route */
413 if (!capable(CAP_NET_ADMIN))
414 return -EPERM;
Thomas Graf4e902c52006-08-17 18:14:52 -0700415
416 if (copy_from_user(&rt, arg, sizeof(rt)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417 return -EFAULT;
Thomas Graf4e902c52006-08-17 18:14:52 -0700418
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419 rtnl_lock();
Thomas Graf4e902c52006-08-17 18:14:52 -0700420 err = rtentry_to_fib_config(cmd, &rt, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421 if (err == 0) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700422 struct fib_table *tb;
423
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424 if (cmd == SIOCDELRT) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700425 tb = fib_get_table(cfg.fc_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426 if (tb)
Thomas Graf4e902c52006-08-17 18:14:52 -0700427 err = tb->tb_delete(tb, &cfg);
428 else
429 err = -ESRCH;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430 } else {
Thomas Graf4e902c52006-08-17 18:14:52 -0700431 tb = fib_new_table(cfg.fc_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432 if (tb)
Thomas Graf4e902c52006-08-17 18:14:52 -0700433 err = tb->tb_insert(tb, &cfg);
434 else
435 err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700437
438 /* allocated by rtentry_to_fib_config() */
439 kfree(cfg.fc_mx);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700440 }
441 rtnl_unlock();
442 return err;
443 }
444 return -EINVAL;
445}
446
Patrick McHardyef7c79e2007-06-05 12:38:30 -0700447const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
Thomas Graf4e902c52006-08-17 18:14:52 -0700448 [RTA_DST] = { .type = NLA_U32 },
449 [RTA_SRC] = { .type = NLA_U32 },
450 [RTA_IIF] = { .type = NLA_U32 },
451 [RTA_OIF] = { .type = NLA_U32 },
452 [RTA_GATEWAY] = { .type = NLA_U32 },
453 [RTA_PRIORITY] = { .type = NLA_U32 },
454 [RTA_PREFSRC] = { .type = NLA_U32 },
455 [RTA_METRICS] = { .type = NLA_NESTED },
Thomas Graf5176f912006-08-26 20:13:18 -0700456 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
Thomas Graf4e902c52006-08-17 18:14:52 -0700457 [RTA_PROTOINFO] = { .type = NLA_U32 },
458 [RTA_FLOW] = { .type = NLA_U32 },
Thomas Graf4e902c52006-08-17 18:14:52 -0700459};
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460
Thomas Graf4e902c52006-08-17 18:14:52 -0700461static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
462 struct fib_config *cfg)
463{
464 struct nlattr *attr;
465 int err, remaining;
466 struct rtmsg *rtm;
467
468 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
469 if (err < 0)
470 goto errout;
471
472 memset(cfg, 0, sizeof(*cfg));
473
474 rtm = nlmsg_data(nlh);
Thomas Graf4e902c52006-08-17 18:14:52 -0700475 cfg->fc_dst_len = rtm->rtm_dst_len;
Thomas Graf4e902c52006-08-17 18:14:52 -0700476 cfg->fc_tos = rtm->rtm_tos;
477 cfg->fc_table = rtm->rtm_table;
478 cfg->fc_protocol = rtm->rtm_protocol;
479 cfg->fc_scope = rtm->rtm_scope;
480 cfg->fc_type = rtm->rtm_type;
481 cfg->fc_flags = rtm->rtm_flags;
482 cfg->fc_nlflags = nlh->nlmsg_flags;
483
484 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
485 cfg->fc_nlinfo.nlh = nlh;
486
Thomas Grafa0ee18b2007-03-24 20:32:54 -0700487 if (cfg->fc_type > RTN_MAX) {
488 err = -EINVAL;
489 goto errout;
490 }
491
Thomas Graf4e902c52006-08-17 18:14:52 -0700492 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +0200493 switch (nla_type(attr)) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700494 case RTA_DST:
Al Viro17fb2c62006-09-26 22:15:25 -0700495 cfg->fc_dst = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700496 break;
Thomas Graf4e902c52006-08-17 18:14:52 -0700497 case RTA_OIF:
498 cfg->fc_oif = nla_get_u32(attr);
499 break;
500 case RTA_GATEWAY:
Al Viro17fb2c62006-09-26 22:15:25 -0700501 cfg->fc_gw = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700502 break;
503 case RTA_PRIORITY:
504 cfg->fc_priority = nla_get_u32(attr);
505 break;
506 case RTA_PREFSRC:
Al Viro17fb2c62006-09-26 22:15:25 -0700507 cfg->fc_prefsrc = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700508 break;
509 case RTA_METRICS:
510 cfg->fc_mx = nla_data(attr);
511 cfg->fc_mx_len = nla_len(attr);
512 break;
513 case RTA_MULTIPATH:
514 cfg->fc_mp = nla_data(attr);
515 cfg->fc_mp_len = nla_len(attr);
516 break;
517 case RTA_FLOW:
518 cfg->fc_flow = nla_get_u32(attr);
519 break;
Thomas Graf4e902c52006-08-17 18:14:52 -0700520 case RTA_TABLE:
521 cfg->fc_table = nla_get_u32(attr);
522 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700523 }
524 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700525
Linus Torvalds1da177e2005-04-16 15:20:36 -0700526 return 0;
Thomas Graf4e902c52006-08-17 18:14:52 -0700527errout:
528 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700529}
530
Thomas Graf63f34442007-03-22 11:55:17 -0700531static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532{
Thomas Graf4e902c52006-08-17 18:14:52 -0700533 struct fib_config cfg;
534 struct fib_table *tb;
535 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536
Thomas Graf4e902c52006-08-17 18:14:52 -0700537 err = rtm_to_fib_config(skb, nlh, &cfg);
538 if (err < 0)
539 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700540
Thomas Graf4e902c52006-08-17 18:14:52 -0700541 tb = fib_get_table(cfg.fc_table);
542 if (tb == NULL) {
543 err = -ESRCH;
544 goto errout;
545 }
546
547 err = tb->tb_delete(tb, &cfg);
548errout:
549 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700550}
551
Thomas Graf63f34442007-03-22 11:55:17 -0700552static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700553{
Thomas Graf4e902c52006-08-17 18:14:52 -0700554 struct fib_config cfg;
555 struct fib_table *tb;
556 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557
Thomas Graf4e902c52006-08-17 18:14:52 -0700558 err = rtm_to_fib_config(skb, nlh, &cfg);
559 if (err < 0)
560 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561
Thomas Graf4e902c52006-08-17 18:14:52 -0700562 tb = fib_new_table(cfg.fc_table);
563 if (tb == NULL) {
564 err = -ENOBUFS;
565 goto errout;
566 }
567
568 err = tb->tb_insert(tb, &cfg);
569errout:
570 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700571}
572
Thomas Graf63f34442007-03-22 11:55:17 -0700573static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574{
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700575 unsigned int h, s_h;
576 unsigned int e = 0, s_e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700578 struct hlist_node *node;
579 int dumped = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700580
Thomas Grafbe403ea2006-08-17 18:15:17 -0700581 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
582 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583 return ip_rt_dump(skb, cb);
584
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700585 s_h = cb->args[0];
586 s_e = cb->args[1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700587
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700588 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
589 e = 0;
590 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
591 if (e < s_e)
592 goto next;
593 if (dumped)
594 memset(&cb->args[2], 0, sizeof(cb->args) -
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900595 2 * sizeof(cb->args[0]));
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700596 if (tb->tb_dump(tb, skb, cb) < 0)
597 goto out;
598 dumped = 1;
599next:
600 e++;
601 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602 }
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700603out:
604 cb->args[1] = e;
605 cb->args[0] = h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700606
607 return skb->len;
608}
609
610/* Prepare and feed intra-kernel routing request.
611 Really, it should be netlink message, but :-( netlink
612 can be not configured, so that we feed it directly
613 to fib engine. It is legal, because all events occur
614 only when netlink is already locked.
615 */
616
Al Viro81f7bf62006-09-27 18:40:00 -0700617static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618{
Thomas Graf4e902c52006-08-17 18:14:52 -0700619 struct fib_table *tb;
620 struct fib_config cfg = {
621 .fc_protocol = RTPROT_KERNEL,
622 .fc_type = type,
623 .fc_dst = dst,
624 .fc_dst_len = dst_len,
625 .fc_prefsrc = ifa->ifa_local,
626 .fc_oif = ifa->ifa_dev->dev->ifindex,
627 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
628 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629
630 if (type == RTN_UNICAST)
631 tb = fib_new_table(RT_TABLE_MAIN);
632 else
633 tb = fib_new_table(RT_TABLE_LOCAL);
634
635 if (tb == NULL)
636 return;
637
Thomas Graf4e902c52006-08-17 18:14:52 -0700638 cfg.fc_table = tb->tb_id;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700639
Thomas Graf4e902c52006-08-17 18:14:52 -0700640 if (type != RTN_LOCAL)
641 cfg.fc_scope = RT_SCOPE_LINK;
642 else
643 cfg.fc_scope = RT_SCOPE_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644
645 if (cmd == RTM_NEWROUTE)
Thomas Graf4e902c52006-08-17 18:14:52 -0700646 tb->tb_insert(tb, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647 else
Thomas Graf4e902c52006-08-17 18:14:52 -0700648 tb->tb_delete(tb, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649}
650
Jamal Hadi Salim0ff60a42005-11-22 14:47:37 -0800651void fib_add_ifaddr(struct in_ifaddr *ifa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652{
653 struct in_device *in_dev = ifa->ifa_dev;
654 struct net_device *dev = in_dev->dev;
655 struct in_ifaddr *prim = ifa;
Al Viroa144ea42006-09-28 18:00:55 -0700656 __be32 mask = ifa->ifa_mask;
657 __be32 addr = ifa->ifa_local;
658 __be32 prefix = ifa->ifa_address&mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700659
660 if (ifa->ifa_flags&IFA_F_SECONDARY) {
661 prim = inet_ifa_byprefix(in_dev, prefix, mask);
662 if (prim == NULL) {
663 printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
664 return;
665 }
666 }
667
668 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
669
670 if (!(dev->flags&IFF_UP))
671 return;
672
673 /* Add broadcast address, if it is explicitly assigned. */
Al Viroa144ea42006-09-28 18:00:55 -0700674 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700675 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
676
677 if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
678 (prefix != addr || ifa->ifa_prefixlen < 32)) {
679 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
680 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
681
682 /* Add network specific broadcasts, when it takes a sense */
683 if (ifa->ifa_prefixlen < 31) {
684 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
685 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
686 }
687 }
688}
689
690static void fib_del_ifaddr(struct in_ifaddr *ifa)
691{
692 struct in_device *in_dev = ifa->ifa_dev;
693 struct net_device *dev = in_dev->dev;
694 struct in_ifaddr *ifa1;
695 struct in_ifaddr *prim = ifa;
Al Viroa144ea42006-09-28 18:00:55 -0700696 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
697 __be32 any = ifa->ifa_address&ifa->ifa_mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700698#define LOCAL_OK 1
699#define BRD_OK 2
700#define BRD0_OK 4
701#define BRD1_OK 8
702 unsigned ok = 0;
703
704 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
705 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
706 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
707 else {
708 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
709 if (prim == NULL) {
710 printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
711 return;
712 }
713 }
714
715 /* Deletion is more complicated than add.
716 We should take care of not to delete too much :-)
717
718 Scan address list to be sure that addresses are really gone.
719 */
720
721 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
722 if (ifa->ifa_local == ifa1->ifa_local)
723 ok |= LOCAL_OK;
724 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
725 ok |= BRD_OK;
726 if (brd == ifa1->ifa_broadcast)
727 ok |= BRD1_OK;
728 if (any == ifa1->ifa_broadcast)
729 ok |= BRD0_OK;
730 }
731
732 if (!(ok&BRD_OK))
733 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
734 if (!(ok&BRD1_OK))
735 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
736 if (!(ok&BRD0_OK))
737 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
738 if (!(ok&LOCAL_OK)) {
739 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
740
741 /* Check, that this local address finally disappeared. */
742 if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
743 /* And the last, but not the least thing.
744 We must flush stray FIB entries.
745
746 First of all, we scan fib_info list searching
747 for stray nexthop entries, then ignite fib_flush.
748 */
749 if (fib_sync_down(ifa->ifa_local, NULL, 0))
750 fib_flush();
751 }
752 }
753#undef LOCAL_OK
754#undef BRD_OK
755#undef BRD0_OK
756#undef BRD1_OK
757}
758
Robert Olsson246955f2005-06-20 13:36:39 -0700759static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
760{
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900761
Robert Olsson246955f2005-06-20 13:36:39 -0700762 struct fib_result res;
Thomas Graf5f3008932006-11-09 15:21:41 -0800763 struct flowi fl = { .mark = frn->fl_mark,
Thomas Graf47dcf0c2006-11-09 15:20:38 -0800764 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
Robert Olsson246955f2005-06-20 13:36:39 -0700765 .tos = frn->fl_tos,
766 .scope = frn->fl_scope } } };
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700767
Sergey Vlasov912a41a2007-04-27 02:17:19 -0700768#ifdef CONFIG_IP_MULTIPLE_TABLES
769 res.r = NULL;
770#endif
771
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700772 frn->err = -ENOENT;
Robert Olsson246955f2005-06-20 13:36:39 -0700773 if (tb) {
774 local_bh_disable();
775
776 frn->tb_id = tb->tb_id;
777 frn->err = tb->tb_lookup(tb, &fl, &res);
778
779 if (!frn->err) {
780 frn->prefixlen = res.prefixlen;
781 frn->nh_sel = res.nh_sel;
782 frn->type = res.type;
783 frn->scope = res.scope;
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700784 fib_res_put(&res);
Robert Olsson246955f2005-06-20 13:36:39 -0700785 }
786 local_bh_enable();
787 }
788}
789
790static void nl_fib_input(struct sock *sk, int len)
791{
792 struct sk_buff *skb = NULL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900793 struct nlmsghdr *nlh = NULL;
Robert Olsson246955f2005-06-20 13:36:39 -0700794 struct fib_result_nl *frn;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900795 u32 pid;
Robert Olsson246955f2005-06-20 13:36:39 -0700796 struct fib_table *tb;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900797
Thomas Grafea865752005-12-01 14:30:00 -0800798 skb = skb_dequeue(&sk->sk_receive_queue);
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700799 if (skb == NULL)
800 return;
801
Arnaldo Carvalho de Melob529ccf2007-04-25 19:08:35 -0700802 nlh = nlmsg_hdr(skb);
Thomas Grafea865752005-12-01 14:30:00 -0800803 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
804 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
805 kfree_skb(skb);
806 return;
807 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900808
Robert Olsson246955f2005-06-20 13:36:39 -0700809 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
810 tb = fib_get_table(frn->tb_id_in);
811
812 nl_fib_lookup(frn, tb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900813
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700814 pid = NETLINK_CB(skb).pid; /* pid of sending process */
Robert Olsson246955f2005-06-20 13:36:39 -0700815 NETLINK_CB(skb).pid = 0; /* from kernel */
Patrick McHardyac6d4392005-08-14 19:29:52 -0700816 NETLINK_CB(skb).dst_group = 0; /* unicast */
Denis V. Lunevcd40b7d2007-10-10 21:15:29 -0700817 netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900818}
Robert Olsson246955f2005-06-20 13:36:39 -0700819
820static void nl_fib_lookup_init(void)
821{
Denis V. Lunevcd40b7d2007-10-10 21:15:29 -0700822 fibnl = netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0,
823 nl_fib_input, NULL, THIS_MODULE);
Robert Olsson246955f2005-06-20 13:36:39 -0700824}
825
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826static void fib_disable_ip(struct net_device *dev, int force)
827{
828 if (fib_sync_down(0, dev, force))
829 fib_flush();
830 rt_cache_flush(0);
831 arp_ifdown(dev);
832}
833
834static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
835{
836 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
837
838 switch (event) {
839 case NETDEV_UP:
840 fib_add_ifaddr(ifa);
841#ifdef CONFIG_IP_ROUTE_MULTIPATH
842 fib_sync_up(ifa->ifa_dev->dev);
843#endif
844 rt_cache_flush(-1);
845 break;
846 case NETDEV_DOWN:
847 fib_del_ifaddr(ifa);
Jayachandran C9fcc2e82005-10-27 15:10:01 -0700848 if (ifa->ifa_dev->ifa_list == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700849 /* Last address was deleted from this interface.
850 Disable IP.
851 */
852 fib_disable_ip(ifa->ifa_dev->dev, 1);
853 } else {
854 rt_cache_flush(-1);
855 }
856 break;
857 }
858 return NOTIFY_DONE;
859}
860
861static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
862{
863 struct net_device *dev = ptr;
Herbert Xue5ed6392005-10-03 14:35:55 -0700864 struct in_device *in_dev = __in_dev_get_rtnl(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865
Eric W. Biedermane9dc8652007-09-12 13:02:17 +0200866 if (dev->nd_net != &init_net)
867 return NOTIFY_DONE;
868
Linus Torvalds1da177e2005-04-16 15:20:36 -0700869 if (event == NETDEV_UNREGISTER) {
870 fib_disable_ip(dev, 2);
871 return NOTIFY_DONE;
872 }
873
874 if (!in_dev)
875 return NOTIFY_DONE;
876
877 switch (event) {
878 case NETDEV_UP:
879 for_ifa(in_dev) {
880 fib_add_ifaddr(ifa);
881 } endfor_ifa(in_dev);
882#ifdef CONFIG_IP_ROUTE_MULTIPATH
883 fib_sync_up(dev);
884#endif
885 rt_cache_flush(-1);
886 break;
887 case NETDEV_DOWN:
888 fib_disable_ip(dev, 0);
889 break;
890 case NETDEV_CHANGEMTU:
891 case NETDEV_CHANGE:
892 rt_cache_flush(0);
893 break;
894 }
895 return NOTIFY_DONE;
896}
897
898static struct notifier_block fib_inetaddr_notifier = {
899 .notifier_call =fib_inetaddr_event,
900};
901
902static struct notifier_block fib_netdev_notifier = {
903 .notifier_call =fib_netdev_event,
904};
905
906void __init ip_fib_init(void)
907{
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700908 unsigned int i;
909
910 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
911 INIT_HLIST_HEAD(&fib_table_hash[i]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700912#ifndef CONFIG_IP_MULTIPLE_TABLES
913 ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700914 hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915 ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN);
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700916 hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700917#else
Thomas Grafe1ef4bf2006-08-04 03:39:22 -0700918 fib4_rules_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700919#endif
920
921 register_netdevice_notifier(&fib_netdev_notifier);
922 register_inetaddr_notifier(&fib_inetaddr_notifier);
Robert Olsson246955f2005-06-20 13:36:39 -0700923 nl_fib_lookup_init();
Thomas Graf63f34442007-03-22 11:55:17 -0700924
925 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
926 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
927 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700928}
929
930EXPORT_SYMBOL(inet_addr_type);
Sean Heftya1e87332006-06-17 20:37:28 -0700931EXPORT_SYMBOL(ip_dev_find);