blob: 9ad1f6252a9729fd5bc9e9ec1af69e690ad75e3e [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/module.h>
19#include <asm/uaccess.h>
20#include <asm/system.h>
21#include <linux/bitops.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080022#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/types.h>
24#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070025#include <linux/mm.h>
26#include <linux/string.h>
27#include <linux/socket.h>
28#include <linux/sockios.h>
29#include <linux/errno.h>
30#include <linux/in.h>
31#include <linux/inet.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020032#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <linux/netdevice.h>
Thomas Graf18237302006-08-04 23:04:54 -070034#include <linux/if_addr.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035#include <linux/if_arp.h>
36#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070037#include <linux/init.h>
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070038#include <linux/list.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039
40#include <net/ip.h>
41#include <net/protocol.h>
42#include <net/route.h>
43#include <net/tcp.h>
44#include <net/sock.h>
45#include <net/icmp.h>
46#include <net/arp.h>
47#include <net/ip_fib.h>
Thomas Graf63f34442007-03-22 11:55:17 -070048#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070049
50#define FFprint(a...) printk(KERN_DEBUG a)
51
52#ifndef CONFIG_IP_MULTIPLE_TABLES
53
Linus Torvalds1da177e2005-04-16 15:20:36 -070054struct fib_table *ip_fib_local_table;
55struct fib_table *ip_fib_main_table;
56
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070057#define FIB_TABLE_HASHSZ 1
58static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
59
Linus Torvalds1da177e2005-04-16 15:20:36 -070060#else
61
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070062#define FIB_TABLE_HASHSZ 256
63static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
Linus Torvalds1da177e2005-04-16 15:20:36 -070064
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070065struct fib_table *fib_new_table(u32 id)
Linus Torvalds1da177e2005-04-16 15:20:36 -070066{
67 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070068 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -070069
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070070 if (id == 0)
71 id = RT_TABLE_MAIN;
72 tb = fib_get_table(id);
73 if (tb)
74 return tb;
Linus Torvalds1da177e2005-04-16 15:20:36 -070075 tb = fib_hash_init(id);
76 if (!tb)
77 return NULL;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070078 h = id & (FIB_TABLE_HASHSZ - 1);
79 hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
Linus Torvalds1da177e2005-04-16 15:20:36 -070080 return tb;
81}
82
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070083struct fib_table *fib_get_table(u32 id)
84{
85 struct fib_table *tb;
86 struct hlist_node *node;
87 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -070088
Patrick McHardy1af5a8c2006-08-10 23:10:46 -070089 if (id == 0)
90 id = RT_TABLE_MAIN;
91 h = id & (FIB_TABLE_HASHSZ - 1);
92 rcu_read_lock();
93 hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
94 if (tb->tb_id == id) {
95 rcu_read_unlock();
96 return tb;
97 }
98 }
99 rcu_read_unlock();
100 return NULL;
101}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700102#endif /* CONFIG_IP_MULTIPLE_TABLES */
103
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104static void fib_flush(void)
105{
106 int flushed = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700108 struct hlist_node *node;
109 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700110
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700111 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
112 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
113 flushed += tb->tb_flush(tb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115
116 if (flushed)
117 rt_cache_flush(-1);
118}
119
120/*
121 * Find the first device with a given source address.
122 */
123
Al Viro60cad5d2006-09-26 22:17:09 -0700124struct net_device * ip_dev_find(__be32 addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700125{
126 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
127 struct fib_result res;
128 struct net_device *dev = NULL;
129
130#ifdef CONFIG_IP_MULTIPLE_TABLES
131 res.r = NULL;
132#endif
133
134 if (!ip_fib_local_table ||
135 ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res))
136 return NULL;
137 if (res.type != RTN_LOCAL)
138 goto out;
139 dev = FIB_RES_DEV(res);
140
141 if (dev)
142 dev_hold(dev);
143out:
144 fib_res_put(&res);
145 return dev;
146}
147
Al Virofd683222006-09-26 22:17:51 -0700148unsigned inet_addr_type(__be32 addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700149{
150 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
151 struct fib_result res;
152 unsigned ret = RTN_BROADCAST;
153
154 if (ZERONET(addr) || BADCLASS(addr))
155 return RTN_BROADCAST;
156 if (MULTICAST(addr))
157 return RTN_MULTICAST;
158
159#ifdef CONFIG_IP_MULTIPLE_TABLES
160 res.r = NULL;
161#endif
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900162
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163 if (ip_fib_local_table) {
164 ret = RTN_UNICAST;
165 if (!ip_fib_local_table->tb_lookup(ip_fib_local_table,
166 &fl, &res)) {
167 ret = res.type;
168 fib_res_put(&res);
169 }
170 }
171 return ret;
172}
173
174/* Given (packet source, input interface) and optional (dst, oif, tos):
175 - (main) check, that source is valid i.e. not broadcast or our local
176 address.
177 - figure out what "logical" interface this packet arrived
178 and calculate "specific destination" address.
179 - check, that packet arrived from expected physical interface.
180 */
181
Al Virod9c9df82006-09-26 21:28:14 -0700182int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
183 struct net_device *dev, __be32 *spec_dst, u32 *itag)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700184{
185 struct in_device *in_dev;
186 struct flowi fl = { .nl_u = { .ip4_u =
187 { .daddr = src,
188 .saddr = dst,
189 .tos = tos } },
190 .iif = oif };
191 struct fib_result res;
192 int no_addr, rpf;
193 int ret;
194
195 no_addr = rpf = 0;
196 rcu_read_lock();
Herbert Xue5ed6392005-10-03 14:35:55 -0700197 in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700198 if (in_dev) {
199 no_addr = in_dev->ifa_list == NULL;
200 rpf = IN_DEV_RPFILTER(in_dev);
201 }
202 rcu_read_unlock();
203
204 if (in_dev == NULL)
205 goto e_inval;
206
207 if (fib_lookup(&fl, &res))
208 goto last_resort;
209 if (res.type != RTN_UNICAST)
210 goto e_inval_res;
211 *spec_dst = FIB_RES_PREFSRC(res);
212 fib_combine_itag(itag, &res);
213#ifdef CONFIG_IP_ROUTE_MULTIPATH
214 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
215#else
216 if (FIB_RES_DEV(res) == dev)
217#endif
218 {
219 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
220 fib_res_put(&res);
221 return ret;
222 }
223 fib_res_put(&res);
224 if (no_addr)
225 goto last_resort;
226 if (rpf)
227 goto e_inval;
228 fl.oif = dev->ifindex;
229
230 ret = 0;
231 if (fib_lookup(&fl, &res) == 0) {
232 if (res.type == RTN_UNICAST) {
233 *spec_dst = FIB_RES_PREFSRC(res);
234 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
235 }
236 fib_res_put(&res);
237 }
238 return ret;
239
240last_resort:
241 if (rpf)
242 goto e_inval;
243 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
244 *itag = 0;
245 return 0;
246
247e_inval_res:
248 fib_res_put(&res);
249e_inval:
250 return -EINVAL;
251}
252
Al Viro81f7bf62006-09-27 18:40:00 -0700253static inline __be32 sk_extract_addr(struct sockaddr *addr)
Thomas Graf4e902c52006-08-17 18:14:52 -0700254{
255 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
256}
257
258static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
259{
260 struct nlattr *nla;
261
262 nla = (struct nlattr *) ((char *) mx + len);
263 nla->nla_type = type;
264 nla->nla_len = nla_attr_size(4);
265 *(u32 *) nla_data(nla) = value;
266
267 return len + nla_total_size(4);
268}
269
270static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
271 struct fib_config *cfg)
272{
Al Viro6d85c102006-09-26 22:15:46 -0700273 __be32 addr;
Thomas Graf4e902c52006-08-17 18:14:52 -0700274 int plen;
275
276 memset(cfg, 0, sizeof(*cfg));
277
278 if (rt->rt_dst.sa_family != AF_INET)
279 return -EAFNOSUPPORT;
280
281 /*
282 * Check mask for validity:
283 * a) it must be contiguous.
284 * b) destination must have all host bits clear.
285 * c) if application forgot to set correct family (AF_INET),
286 * reject request unless it is absolutely clear i.e.
287 * both family and mask are zero.
288 */
289 plen = 32;
290 addr = sk_extract_addr(&rt->rt_dst);
291 if (!(rt->rt_flags & RTF_HOST)) {
Al Viro81f7bf62006-09-27 18:40:00 -0700292 __be32 mask = sk_extract_addr(&rt->rt_genmask);
Thomas Graf4e902c52006-08-17 18:14:52 -0700293
294 if (rt->rt_genmask.sa_family != AF_INET) {
295 if (mask || rt->rt_genmask.sa_family)
296 return -EAFNOSUPPORT;
297 }
298
299 if (bad_mask(mask, addr))
300 return -EINVAL;
301
302 plen = inet_mask_len(mask);
303 }
304
305 cfg->fc_dst_len = plen;
306 cfg->fc_dst = addr;
307
308 if (cmd != SIOCDELRT) {
309 cfg->fc_nlflags = NLM_F_CREATE;
310 cfg->fc_protocol = RTPROT_BOOT;
311 }
312
313 if (rt->rt_metric)
314 cfg->fc_priority = rt->rt_metric - 1;
315
316 if (rt->rt_flags & RTF_REJECT) {
317 cfg->fc_scope = RT_SCOPE_HOST;
318 cfg->fc_type = RTN_UNREACHABLE;
319 return 0;
320 }
321
322 cfg->fc_scope = RT_SCOPE_NOWHERE;
323 cfg->fc_type = RTN_UNICAST;
324
325 if (rt->rt_dev) {
326 char *colon;
327 struct net_device *dev;
328 char devname[IFNAMSIZ];
329
330 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
331 return -EFAULT;
332
333 devname[IFNAMSIZ-1] = 0;
334 colon = strchr(devname, ':');
335 if (colon)
336 *colon = 0;
337 dev = __dev_get_by_name(devname);
338 if (!dev)
339 return -ENODEV;
340 cfg->fc_oif = dev->ifindex;
341 if (colon) {
342 struct in_ifaddr *ifa;
343 struct in_device *in_dev = __in_dev_get_rtnl(dev);
344 if (!in_dev)
345 return -ENODEV;
346 *colon = ':';
347 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
348 if (strcmp(ifa->ifa_label, devname) == 0)
349 break;
350 if (ifa == NULL)
351 return -ENODEV;
352 cfg->fc_prefsrc = ifa->ifa_local;
353 }
354 }
355
356 addr = sk_extract_addr(&rt->rt_gateway);
357 if (rt->rt_gateway.sa_family == AF_INET && addr) {
358 cfg->fc_gw = addr;
359 if (rt->rt_flags & RTF_GATEWAY &&
360 inet_addr_type(addr) == RTN_UNICAST)
361 cfg->fc_scope = RT_SCOPE_UNIVERSE;
362 }
363
364 if (cmd == SIOCDELRT)
365 return 0;
366
367 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
368 return -EINVAL;
369
370 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
371 cfg->fc_scope = RT_SCOPE_LINK;
372
373 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
374 struct nlattr *mx;
375 int len = 0;
376
377 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900378 if (mx == NULL)
Thomas Graf4e902c52006-08-17 18:14:52 -0700379 return -ENOMEM;
380
381 if (rt->rt_flags & RTF_MTU)
382 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
383
384 if (rt->rt_flags & RTF_WINDOW)
385 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
386
387 if (rt->rt_flags & RTF_IRTT)
388 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
389
390 cfg->fc_mx = mx;
391 cfg->fc_mx_len = len;
392 }
393
394 return 0;
395}
396
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397/*
398 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
399 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900400
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401int ip_rt_ioctl(unsigned int cmd, void __user *arg)
402{
Thomas Graf4e902c52006-08-17 18:14:52 -0700403 struct fib_config cfg;
404 struct rtentry rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406
407 switch (cmd) {
408 case SIOCADDRT: /* Add a route */
409 case SIOCDELRT: /* Delete a route */
410 if (!capable(CAP_NET_ADMIN))
411 return -EPERM;
Thomas Graf4e902c52006-08-17 18:14:52 -0700412
413 if (copy_from_user(&rt, arg, sizeof(rt)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414 return -EFAULT;
Thomas Graf4e902c52006-08-17 18:14:52 -0700415
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416 rtnl_lock();
Thomas Graf4e902c52006-08-17 18:14:52 -0700417 err = rtentry_to_fib_config(cmd, &rt, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418 if (err == 0) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700419 struct fib_table *tb;
420
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421 if (cmd == SIOCDELRT) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700422 tb = fib_get_table(cfg.fc_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423 if (tb)
Thomas Graf4e902c52006-08-17 18:14:52 -0700424 err = tb->tb_delete(tb, &cfg);
425 else
426 err = -ESRCH;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427 } else {
Thomas Graf4e902c52006-08-17 18:14:52 -0700428 tb = fib_new_table(cfg.fc_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429 if (tb)
Thomas Graf4e902c52006-08-17 18:14:52 -0700430 err = tb->tb_insert(tb, &cfg);
431 else
432 err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700434
435 /* allocated by rtentry_to_fib_config() */
436 kfree(cfg.fc_mx);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437 }
438 rtnl_unlock();
439 return err;
440 }
441 return -EINVAL;
442}
443
Thomas Grafd889ce32006-08-17 18:15:44 -0700444struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = {
Thomas Graf4e902c52006-08-17 18:14:52 -0700445 [RTA_DST] = { .type = NLA_U32 },
446 [RTA_SRC] = { .type = NLA_U32 },
447 [RTA_IIF] = { .type = NLA_U32 },
448 [RTA_OIF] = { .type = NLA_U32 },
449 [RTA_GATEWAY] = { .type = NLA_U32 },
450 [RTA_PRIORITY] = { .type = NLA_U32 },
451 [RTA_PREFSRC] = { .type = NLA_U32 },
452 [RTA_METRICS] = { .type = NLA_NESTED },
Thomas Graf5176f912006-08-26 20:13:18 -0700453 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
Thomas Graf4e902c52006-08-17 18:14:52 -0700454 [RTA_PROTOINFO] = { .type = NLA_U32 },
455 [RTA_FLOW] = { .type = NLA_U32 },
456 [RTA_MP_ALGO] = { .type = NLA_U32 },
457};
Linus Torvalds1da177e2005-04-16 15:20:36 -0700458
Thomas Graf4e902c52006-08-17 18:14:52 -0700459static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
460 struct fib_config *cfg)
461{
462 struct nlattr *attr;
463 int err, remaining;
464 struct rtmsg *rtm;
465
466 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
467 if (err < 0)
468 goto errout;
469
470 memset(cfg, 0, sizeof(*cfg));
471
472 rtm = nlmsg_data(nlh);
Thomas Graf4e902c52006-08-17 18:14:52 -0700473 cfg->fc_dst_len = rtm->rtm_dst_len;
Thomas Graf4e902c52006-08-17 18:14:52 -0700474 cfg->fc_tos = rtm->rtm_tos;
475 cfg->fc_table = rtm->rtm_table;
476 cfg->fc_protocol = rtm->rtm_protocol;
477 cfg->fc_scope = rtm->rtm_scope;
478 cfg->fc_type = rtm->rtm_type;
479 cfg->fc_flags = rtm->rtm_flags;
480 cfg->fc_nlflags = nlh->nlmsg_flags;
481
482 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
483 cfg->fc_nlinfo.nlh = nlh;
484
Thomas Grafa0ee18b2007-03-24 20:32:54 -0700485 if (cfg->fc_type > RTN_MAX) {
486 err = -EINVAL;
487 goto errout;
488 }
489
Thomas Graf4e902c52006-08-17 18:14:52 -0700490 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
491 switch (attr->nla_type) {
492 case RTA_DST:
Al Viro17fb2c62006-09-26 22:15:25 -0700493 cfg->fc_dst = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700494 break;
Thomas Graf4e902c52006-08-17 18:14:52 -0700495 case RTA_OIF:
496 cfg->fc_oif = nla_get_u32(attr);
497 break;
498 case RTA_GATEWAY:
Al Viro17fb2c62006-09-26 22:15:25 -0700499 cfg->fc_gw = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700500 break;
501 case RTA_PRIORITY:
502 cfg->fc_priority = nla_get_u32(attr);
503 break;
504 case RTA_PREFSRC:
Al Viro17fb2c62006-09-26 22:15:25 -0700505 cfg->fc_prefsrc = nla_get_be32(attr);
Thomas Graf4e902c52006-08-17 18:14:52 -0700506 break;
507 case RTA_METRICS:
508 cfg->fc_mx = nla_data(attr);
509 cfg->fc_mx_len = nla_len(attr);
510 break;
511 case RTA_MULTIPATH:
512 cfg->fc_mp = nla_data(attr);
513 cfg->fc_mp_len = nla_len(attr);
514 break;
515 case RTA_FLOW:
516 cfg->fc_flow = nla_get_u32(attr);
517 break;
518 case RTA_MP_ALGO:
519 cfg->fc_mp_alg = nla_get_u32(attr);
520 break;
521 case RTA_TABLE:
522 cfg->fc_table = nla_get_u32(attr);
523 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700524 }
525 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700526
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527 return 0;
Thomas Graf4e902c52006-08-17 18:14:52 -0700528errout:
529 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700530}
531
Thomas Graf63f34442007-03-22 11:55:17 -0700532static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533{
Thomas Graf4e902c52006-08-17 18:14:52 -0700534 struct fib_config cfg;
535 struct fib_table *tb;
536 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537
Thomas Graf4e902c52006-08-17 18:14:52 -0700538 err = rtm_to_fib_config(skb, nlh, &cfg);
539 if (err < 0)
540 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700541
Thomas Graf4e902c52006-08-17 18:14:52 -0700542 tb = fib_get_table(cfg.fc_table);
543 if (tb == NULL) {
544 err = -ESRCH;
545 goto errout;
546 }
547
548 err = tb->tb_delete(tb, &cfg);
549errout:
550 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551}
552
Thomas Graf63f34442007-03-22 11:55:17 -0700553static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554{
Thomas Graf4e902c52006-08-17 18:14:52 -0700555 struct fib_config cfg;
556 struct fib_table *tb;
557 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558
Thomas Graf4e902c52006-08-17 18:14:52 -0700559 err = rtm_to_fib_config(skb, nlh, &cfg);
560 if (err < 0)
561 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562
Thomas Graf4e902c52006-08-17 18:14:52 -0700563 tb = fib_new_table(cfg.fc_table);
564 if (tb == NULL) {
565 err = -ENOBUFS;
566 goto errout;
567 }
568
569 err = tb->tb_insert(tb, &cfg);
570errout:
571 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700572}
573
Thomas Graf63f34442007-03-22 11:55:17 -0700574static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700575{
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700576 unsigned int h, s_h;
577 unsigned int e = 0, s_e;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700578 struct fib_table *tb;
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700579 struct hlist_node *node;
580 int dumped = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581
Thomas Grafbe403ea2006-08-17 18:15:17 -0700582 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
583 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584 return ip_rt_dump(skb, cb);
585
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700586 s_h = cb->args[0];
587 s_e = cb->args[1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700589 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
590 e = 0;
591 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
592 if (e < s_e)
593 goto next;
594 if (dumped)
595 memset(&cb->args[2], 0, sizeof(cb->args) -
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900596 2 * sizeof(cb->args[0]));
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700597 if (tb->tb_dump(tb, skb, cb) < 0)
598 goto out;
599 dumped = 1;
600next:
601 e++;
602 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603 }
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700604out:
605 cb->args[1] = e;
606 cb->args[0] = h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700607
608 return skb->len;
609}
610
611/* Prepare and feed intra-kernel routing request.
612 Really, it should be netlink message, but :-( netlink
613 can be not configured, so that we feed it directly
614 to fib engine. It is legal, because all events occur
615 only when netlink is already locked.
616 */
617
Al Viro81f7bf62006-09-27 18:40:00 -0700618static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619{
Thomas Graf4e902c52006-08-17 18:14:52 -0700620 struct fib_table *tb;
621 struct fib_config cfg = {
622 .fc_protocol = RTPROT_KERNEL,
623 .fc_type = type,
624 .fc_dst = dst,
625 .fc_dst_len = dst_len,
626 .fc_prefsrc = ifa->ifa_local,
627 .fc_oif = ifa->ifa_dev->dev->ifindex,
628 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
629 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630
631 if (type == RTN_UNICAST)
632 tb = fib_new_table(RT_TABLE_MAIN);
633 else
634 tb = fib_new_table(RT_TABLE_LOCAL);
635
636 if (tb == NULL)
637 return;
638
Thomas Graf4e902c52006-08-17 18:14:52 -0700639 cfg.fc_table = tb->tb_id;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640
Thomas Graf4e902c52006-08-17 18:14:52 -0700641 if (type != RTN_LOCAL)
642 cfg.fc_scope = RT_SCOPE_LINK;
643 else
644 cfg.fc_scope = RT_SCOPE_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645
646 if (cmd == RTM_NEWROUTE)
Thomas Graf4e902c52006-08-17 18:14:52 -0700647 tb->tb_insert(tb, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648 else
Thomas Graf4e902c52006-08-17 18:14:52 -0700649 tb->tb_delete(tb, &cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700650}
651
Jamal Hadi Salim0ff60a42005-11-22 14:47:37 -0800652void fib_add_ifaddr(struct in_ifaddr *ifa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700653{
654 struct in_device *in_dev = ifa->ifa_dev;
655 struct net_device *dev = in_dev->dev;
656 struct in_ifaddr *prim = ifa;
Al Viroa144ea42006-09-28 18:00:55 -0700657 __be32 mask = ifa->ifa_mask;
658 __be32 addr = ifa->ifa_local;
659 __be32 prefix = ifa->ifa_address&mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660
661 if (ifa->ifa_flags&IFA_F_SECONDARY) {
662 prim = inet_ifa_byprefix(in_dev, prefix, mask);
663 if (prim == NULL) {
664 printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
665 return;
666 }
667 }
668
669 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
670
671 if (!(dev->flags&IFF_UP))
672 return;
673
674 /* Add broadcast address, if it is explicitly assigned. */
Al Viroa144ea42006-09-28 18:00:55 -0700675 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700676 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
677
678 if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
679 (prefix != addr || ifa->ifa_prefixlen < 32)) {
680 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
681 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
682
683 /* Add network specific broadcasts, when it takes a sense */
684 if (ifa->ifa_prefixlen < 31) {
685 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
686 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
687 }
688 }
689}
690
691static void fib_del_ifaddr(struct in_ifaddr *ifa)
692{
693 struct in_device *in_dev = ifa->ifa_dev;
694 struct net_device *dev = in_dev->dev;
695 struct in_ifaddr *ifa1;
696 struct in_ifaddr *prim = ifa;
Al Viroa144ea42006-09-28 18:00:55 -0700697 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
698 __be32 any = ifa->ifa_address&ifa->ifa_mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699#define LOCAL_OK 1
700#define BRD_OK 2
701#define BRD0_OK 4
702#define BRD1_OK 8
703 unsigned ok = 0;
704
705 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
706 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
707 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
708 else {
709 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
710 if (prim == NULL) {
711 printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
712 return;
713 }
714 }
715
716 /* Deletion is more complicated than add.
717 We should take care of not to delete too much :-)
718
719 Scan address list to be sure that addresses are really gone.
720 */
721
722 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
723 if (ifa->ifa_local == ifa1->ifa_local)
724 ok |= LOCAL_OK;
725 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
726 ok |= BRD_OK;
727 if (brd == ifa1->ifa_broadcast)
728 ok |= BRD1_OK;
729 if (any == ifa1->ifa_broadcast)
730 ok |= BRD0_OK;
731 }
732
733 if (!(ok&BRD_OK))
734 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
735 if (!(ok&BRD1_OK))
736 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
737 if (!(ok&BRD0_OK))
738 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
739 if (!(ok&LOCAL_OK)) {
740 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
741
742 /* Check, that this local address finally disappeared. */
743 if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
744 /* And the last, but not the least thing.
745 We must flush stray FIB entries.
746
747 First of all, we scan fib_info list searching
748 for stray nexthop entries, then ignite fib_flush.
749 */
750 if (fib_sync_down(ifa->ifa_local, NULL, 0))
751 fib_flush();
752 }
753 }
754#undef LOCAL_OK
755#undef BRD_OK
756#undef BRD0_OK
757#undef BRD1_OK
758}
759
Robert Olsson246955f2005-06-20 13:36:39 -0700760static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
761{
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900762
Robert Olsson246955f2005-06-20 13:36:39 -0700763 struct fib_result res;
Thomas Graf5f3008932006-11-09 15:21:41 -0800764 struct flowi fl = { .mark = frn->fl_mark,
Thomas Graf47dcf0c2006-11-09 15:20:38 -0800765 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
Robert Olsson246955f2005-06-20 13:36:39 -0700766 .tos = frn->fl_tos,
767 .scope = frn->fl_scope } } };
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700768
Sergey Vlasov912a41a2007-04-27 02:17:19 -0700769#ifdef CONFIG_IP_MULTIPLE_TABLES
770 res.r = NULL;
771#endif
772
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700773 frn->err = -ENOENT;
Robert Olsson246955f2005-06-20 13:36:39 -0700774 if (tb) {
775 local_bh_disable();
776
777 frn->tb_id = tb->tb_id;
778 frn->err = tb->tb_lookup(tb, &fl, &res);
779
780 if (!frn->err) {
781 frn->prefixlen = res.prefixlen;
782 frn->nh_sel = res.nh_sel;
783 frn->type = res.type;
784 frn->scope = res.scope;
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700785 fib_res_put(&res);
Robert Olsson246955f2005-06-20 13:36:39 -0700786 }
787 local_bh_enable();
788 }
789}
790
791static void nl_fib_input(struct sock *sk, int len)
792{
793 struct sk_buff *skb = NULL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900794 struct nlmsghdr *nlh = NULL;
Robert Olsson246955f2005-06-20 13:36:39 -0700795 struct fib_result_nl *frn;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900796 u32 pid;
Robert Olsson246955f2005-06-20 13:36:39 -0700797 struct fib_table *tb;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900798
Thomas Grafea865752005-12-01 14:30:00 -0800799 skb = skb_dequeue(&sk->sk_receive_queue);
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700800 if (skb == NULL)
801 return;
802
Arnaldo Carvalho de Melob529ccf2007-04-25 19:08:35 -0700803 nlh = nlmsg_hdr(skb);
Thomas Grafea865752005-12-01 14:30:00 -0800804 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
805 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
806 kfree_skb(skb);
807 return;
808 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900809
Robert Olsson246955f2005-06-20 13:36:39 -0700810 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
811 tb = fib_get_table(frn->tb_id_in);
812
813 nl_fib_lookup(frn, tb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900814
Alexey Kuznetsov1194ed02007-04-25 13:07:28 -0700815 pid = NETLINK_CB(skb).pid; /* pid of sending process */
Robert Olsson246955f2005-06-20 13:36:39 -0700816 NETLINK_CB(skb).pid = 0; /* from kernel */
Patrick McHardyac6d4392005-08-14 19:29:52 -0700817 NETLINK_CB(skb).dst_group = 0; /* unicast */
Robert Olsson246955f2005-06-20 13:36:39 -0700818 netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900819}
Robert Olsson246955f2005-06-20 13:36:39 -0700820
821static void nl_fib_lookup_init(void)
822{
Patrick McHardyaf65bdf2007-04-20 14:14:21 -0700823 netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, NULL,
824 THIS_MODULE);
Robert Olsson246955f2005-06-20 13:36:39 -0700825}
826
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827static void fib_disable_ip(struct net_device *dev, int force)
828{
829 if (fib_sync_down(0, dev, force))
830 fib_flush();
831 rt_cache_flush(0);
832 arp_ifdown(dev);
833}
834
835static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
836{
837 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
838
839 switch (event) {
840 case NETDEV_UP:
841 fib_add_ifaddr(ifa);
842#ifdef CONFIG_IP_ROUTE_MULTIPATH
843 fib_sync_up(ifa->ifa_dev->dev);
844#endif
845 rt_cache_flush(-1);
846 break;
847 case NETDEV_DOWN:
848 fib_del_ifaddr(ifa);
Jayachandran C9fcc2e82005-10-27 15:10:01 -0700849 if (ifa->ifa_dev->ifa_list == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850 /* Last address was deleted from this interface.
851 Disable IP.
852 */
853 fib_disable_ip(ifa->ifa_dev->dev, 1);
854 } else {
855 rt_cache_flush(-1);
856 }
857 break;
858 }
859 return NOTIFY_DONE;
860}
861
862static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
863{
864 struct net_device *dev = ptr;
Herbert Xue5ed6392005-10-03 14:35:55 -0700865 struct in_device *in_dev = __in_dev_get_rtnl(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700866
867 if (event == NETDEV_UNREGISTER) {
868 fib_disable_ip(dev, 2);
869 return NOTIFY_DONE;
870 }
871
872 if (!in_dev)
873 return NOTIFY_DONE;
874
875 switch (event) {
876 case NETDEV_UP:
877 for_ifa(in_dev) {
878 fib_add_ifaddr(ifa);
879 } endfor_ifa(in_dev);
880#ifdef CONFIG_IP_ROUTE_MULTIPATH
881 fib_sync_up(dev);
882#endif
883 rt_cache_flush(-1);
884 break;
885 case NETDEV_DOWN:
886 fib_disable_ip(dev, 0);
887 break;
888 case NETDEV_CHANGEMTU:
889 case NETDEV_CHANGE:
890 rt_cache_flush(0);
891 break;
892 }
893 return NOTIFY_DONE;
894}
895
896static struct notifier_block fib_inetaddr_notifier = {
897 .notifier_call =fib_inetaddr_event,
898};
899
900static struct notifier_block fib_netdev_notifier = {
901 .notifier_call =fib_netdev_event,
902};
903
904void __init ip_fib_init(void)
905{
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700906 unsigned int i;
907
908 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
909 INIT_HLIST_HEAD(&fib_table_hash[i]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910#ifndef CONFIG_IP_MULTIPLE_TABLES
911 ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700912 hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913 ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN);
Patrick McHardy1af5a8c2006-08-10 23:10:46 -0700914 hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915#else
Thomas Grafe1ef4bf2006-08-04 03:39:22 -0700916 fib4_rules_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700917#endif
918
919 register_netdevice_notifier(&fib_netdev_notifier);
920 register_inetaddr_notifier(&fib_inetaddr_notifier);
Robert Olsson246955f2005-06-20 13:36:39 -0700921 nl_fib_lookup_init();
Thomas Graf63f34442007-03-22 11:55:17 -0700922
923 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
924 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
925 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700926}
927
928EXPORT_SYMBOL(inet_addr_type);
Sean Heftya1e87332006-06-17 20:37:28 -0700929EXPORT_SYMBOL(ip_dev_find);