blob: d853bd2bd0d2f2f5cacab102a37b6d19e7d359f5 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: semantics.
7 *
8 * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <asm/uaccess.h>
19#include <asm/system.h>
20#include <linux/bitops.h>
21#include <linux/types.h>
22#include <linux/kernel.h>
23#include <linux/jiffies.h>
24#include <linux/mm.h>
25#include <linux/string.h>
26#include <linux/socket.h>
27#include <linux/sockios.h>
28#include <linux/errno.h>
29#include <linux/in.h>
30#include <linux/inet.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020031#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032#include <linux/netdevice.h>
33#include <linux/if_arp.h>
34#include <linux/proc_fs.h>
35#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070036#include <linux/init.h>
37
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020038#include <net/arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
44#include <net/ip_fib.h>
45#include <net/ip_mp_alg.h>
Thomas Graff21c7bc2006-08-15 00:34:17 -070046#include <net/netlink.h>
Thomas Graf4e902c52006-08-17 18:14:52 -070047#include <net/nexthop.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070048
49#include "fib_lookup.h"
50
51#define FSprintk(a...)
52
Stephen Hemminger832b4c52006-08-29 16:48:09 -070053static DEFINE_SPINLOCK(fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070054static struct hlist_head *fib_info_hash;
55static struct hlist_head *fib_info_laddrhash;
56static unsigned int fib_hash_size;
57static unsigned int fib_info_cnt;
58
59#define DEVINDEX_HASHBITS 8
60#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
61static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
62
63#ifdef CONFIG_IP_ROUTE_MULTIPATH
64
65static DEFINE_SPINLOCK(fib_multipath_lock);
66
67#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
68for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
69
70#define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
71for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
72
73#else /* CONFIG_IP_ROUTE_MULTIPATH */
74
75/* Hope, that gcc will optimize it to get rid of dummy loop */
76
77#define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
78for (nhsel=0; nhsel < 1; nhsel++)
79
80#define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
81for (nhsel=0; nhsel < 1; nhsel++)
82
83#endif /* CONFIG_IP_ROUTE_MULTIPATH */
84
85#define endfor_nexthops(fi) }
86
87
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -080088static const struct
Linus Torvalds1da177e2005-04-16 15:20:36 -070089{
90 int error;
91 u8 scope;
92} fib_props[RTA_MAX + 1] = {
93 {
94 .error = 0,
95 .scope = RT_SCOPE_NOWHERE,
96 }, /* RTN_UNSPEC */
97 {
98 .error = 0,
99 .scope = RT_SCOPE_UNIVERSE,
100 }, /* RTN_UNICAST */
101 {
102 .error = 0,
103 .scope = RT_SCOPE_HOST,
104 }, /* RTN_LOCAL */
105 {
106 .error = 0,
107 .scope = RT_SCOPE_LINK,
108 }, /* RTN_BROADCAST */
109 {
110 .error = 0,
111 .scope = RT_SCOPE_LINK,
112 }, /* RTN_ANYCAST */
113 {
114 .error = 0,
115 .scope = RT_SCOPE_UNIVERSE,
116 }, /* RTN_MULTICAST */
117 {
118 .error = -EINVAL,
119 .scope = RT_SCOPE_UNIVERSE,
120 }, /* RTN_BLACKHOLE */
121 {
122 .error = -EHOSTUNREACH,
123 .scope = RT_SCOPE_UNIVERSE,
124 }, /* RTN_UNREACHABLE */
125 {
126 .error = -EACCES,
127 .scope = RT_SCOPE_UNIVERSE,
128 }, /* RTN_PROHIBIT */
129 {
130 .error = -EAGAIN,
131 .scope = RT_SCOPE_UNIVERSE,
132 }, /* RTN_THROW */
133 {
134 .error = -EINVAL,
135 .scope = RT_SCOPE_NOWHERE,
136 }, /* RTN_NAT */
137 {
138 .error = -EINVAL,
139 .scope = RT_SCOPE_NOWHERE,
140 }, /* RTN_XRESOLVE */
141};
142
143
144/* Release a nexthop info record */
145
146void free_fib_info(struct fib_info *fi)
147{
148 if (fi->fib_dead == 0) {
149 printk("Freeing alive fib_info %p\n", fi);
150 return;
151 }
152 change_nexthops(fi) {
153 if (nh->nh_dev)
154 dev_put(nh->nh_dev);
155 nh->nh_dev = NULL;
156 } endfor_nexthops(fi);
157 fib_info_cnt--;
158 kfree(fi);
159}
160
161void fib_release_info(struct fib_info *fi)
162{
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700163 spin_lock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164 if (fi && --fi->fib_treeref == 0) {
165 hlist_del(&fi->fib_hash);
166 if (fi->fib_prefsrc)
167 hlist_del(&fi->fib_lhash);
168 change_nexthops(fi) {
169 if (!nh->nh_dev)
170 continue;
171 hlist_del(&nh->nh_hash);
172 } endfor_nexthops(fi)
173 fi->fib_dead = 1;
174 fib_info_put(fi);
175 }
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700176 spin_unlock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177}
178
179static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
180{
181 const struct fib_nh *onh = ofi->fib_nh;
182
183 for_nexthops(fi) {
184 if (nh->nh_oif != onh->nh_oif ||
185 nh->nh_gw != onh->nh_gw ||
186 nh->nh_scope != onh->nh_scope ||
187#ifdef CONFIG_IP_ROUTE_MULTIPATH
188 nh->nh_weight != onh->nh_weight ||
189#endif
190#ifdef CONFIG_NET_CLS_ROUTE
191 nh->nh_tclassid != onh->nh_tclassid ||
192#endif
193 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
194 return -1;
195 onh++;
196 } endfor_nexthops(fi);
197 return 0;
198}
199
200static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
201{
202 unsigned int mask = (fib_hash_size - 1);
203 unsigned int val = fi->fib_nhs;
204
205 val ^= fi->fib_protocol;
206 val ^= fi->fib_prefsrc;
207 val ^= fi->fib_priority;
208
209 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
210}
211
212static struct fib_info *fib_find_info(const struct fib_info *nfi)
213{
214 struct hlist_head *head;
215 struct hlist_node *node;
216 struct fib_info *fi;
217 unsigned int hash;
218
219 hash = fib_info_hashfn(nfi);
220 head = &fib_info_hash[hash];
221
222 hlist_for_each_entry(fi, node, head, fib_hash) {
223 if (fi->fib_nhs != nfi->fib_nhs)
224 continue;
225 if (nfi->fib_protocol == fi->fib_protocol &&
226 nfi->fib_prefsrc == fi->fib_prefsrc &&
227 nfi->fib_priority == fi->fib_priority &&
228 memcmp(nfi->fib_metrics, fi->fib_metrics,
229 sizeof(fi->fib_metrics)) == 0 &&
230 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
231 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
232 return fi;
233 }
234
235 return NULL;
236}
237
238static inline unsigned int fib_devindex_hashfn(unsigned int val)
239{
240 unsigned int mask = DEVINDEX_HASHSIZE - 1;
241
242 return (val ^
243 (val >> DEVINDEX_HASHBITS) ^
244 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
245}
246
247/* Check, that the gateway is already configured.
248 Used only by redirect accept routine.
249 */
250
Al Virod878e72e2006-09-26 22:18:13 -0700251int ip_fib_check_default(__be32 gw, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252{
253 struct hlist_head *head;
254 struct hlist_node *node;
255 struct fib_nh *nh;
256 unsigned int hash;
257
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700258 spin_lock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259
260 hash = fib_devindex_hashfn(dev->ifindex);
261 head = &fib_info_devhash[hash];
262 hlist_for_each_entry(nh, node, head, nh_hash) {
263 if (nh->nh_dev == dev &&
264 nh->nh_gw == gw &&
265 !(nh->nh_flags&RTNH_F_DEAD)) {
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700266 spin_unlock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267 return 0;
268 }
269 }
270
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700271 spin_unlock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272
273 return -1;
274}
275
276void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
Thomas Graf4e902c52006-08-17 18:14:52 -0700277 int dst_len, u32 tb_id, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278{
279 struct sk_buff *skb;
Thomas Graff21c7bc2006-08-15 00:34:17 -0700280 int payload = sizeof(struct rtmsg) + 256;
Thomas Graf4e902c52006-08-17 18:14:52 -0700281 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
Thomas Graff21c7bc2006-08-15 00:34:17 -0700282 int err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283
Thomas Graff21c7bc2006-08-15 00:34:17 -0700284 skb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL);
285 if (skb == NULL)
286 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287
Thomas Graf4e902c52006-08-17 18:14:52 -0700288 err = fib_dump_info(skb, info->pid, seq, event, tb_id,
Thomas Grafbe403ea2006-08-17 18:15:17 -0700289 fa->fa_type, fa->fa_scope, key, dst_len,
Thomas Graf4e902c52006-08-17 18:14:52 -0700290 fa->fa_tos, fa->fa_info, 0);
Thomas Graff21c7bc2006-08-15 00:34:17 -0700291 if (err < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292 kfree_skb(skb);
Thomas Graff21c7bc2006-08-15 00:34:17 -0700293 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294 }
Thomas Graff21c7bc2006-08-15 00:34:17 -0700295
Thomas Graf4e902c52006-08-17 18:14:52 -0700296 err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE,
297 info->nlh, GFP_KERNEL);
Thomas Graff21c7bc2006-08-15 00:34:17 -0700298errout:
299 if (err < 0)
300 rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700301}
302
303/* Return the first fib alias matching TOS with
304 * priority less than or equal to PRIO.
305 */
306struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
307{
308 if (fah) {
309 struct fib_alias *fa;
310 list_for_each_entry(fa, fah, fa_list) {
311 if (fa->fa_tos > tos)
312 continue;
313 if (fa->fa_info->fib_priority >= prio ||
314 fa->fa_tos < tos)
315 return fa;
316 }
317 }
318 return NULL;
319}
320
321int fib_detect_death(struct fib_info *fi, int order,
322 struct fib_info **last_resort, int *last_idx, int *dflt)
323{
324 struct neighbour *n;
325 int state = NUD_NONE;
326
327 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
328 if (n) {
329 state = n->nud_state;
330 neigh_release(n);
331 }
332 if (state==NUD_REACHABLE)
333 return 0;
334 if ((state&NUD_VALID) && order != *dflt)
335 return 0;
336 if ((state&NUD_VALID) ||
337 (*last_idx<0 && order > *dflt)) {
338 *last_resort = fi;
339 *last_idx = order;
340 }
341 return 1;
342}
343
344#ifdef CONFIG_IP_ROUTE_MULTIPATH
345
Thomas Graf4e902c52006-08-17 18:14:52 -0700346static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700347{
348 int nhs = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349
Thomas Graf4e902c52006-08-17 18:14:52 -0700350 while (rtnh_ok(rtnh, remaining)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351 nhs++;
Thomas Graf4e902c52006-08-17 18:14:52 -0700352 rtnh = rtnh_next(rtnh, &remaining);
353 }
354
355 /* leftover implies invalid nexthop configuration, discard it */
356 return remaining > 0 ? 0 : nhs;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357}
358
Thomas Graf4e902c52006-08-17 18:14:52 -0700359static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
360 int remaining, struct fib_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700362 change_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700363 int attrlen;
364
365 if (!rtnh_ok(rtnh, remaining))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700366 return -EINVAL;
Thomas Graf4e902c52006-08-17 18:14:52 -0700367
368 nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
369 nh->nh_oif = rtnh->rtnh_ifindex;
370 nh->nh_weight = rtnh->rtnh_hops + 1;
371
372 attrlen = rtnh_attrlen(rtnh);
373 if (attrlen > 0) {
374 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
375
376 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
Al Viro17fb2c62006-09-26 22:15:25 -0700377 nh->nh_gw = nla ? nla_get_be32(nla) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700378#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700379 nla = nla_find(attrs, attrlen, RTA_FLOW);
380 nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381#endif
382 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700383
384 rtnh = rtnh_next(rtnh, &remaining);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385 } endfor_nexthops(fi);
Thomas Graf4e902c52006-08-17 18:14:52 -0700386
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387 return 0;
388}
389
390#endif
391
Thomas Graf4e902c52006-08-17 18:14:52 -0700392int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393{
394#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700395 struct rtnexthop *rtnh;
396 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397#endif
398
Thomas Graf4e902c52006-08-17 18:14:52 -0700399 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700400 return 1;
401
Thomas Graf4e902c52006-08-17 18:14:52 -0700402 if (cfg->fc_oif || cfg->fc_gw) {
403 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
404 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405 return 0;
406 return 1;
407 }
408
409#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700410 if (cfg->fc_mp == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700411 return 0;
Thomas Graf4e902c52006-08-17 18:14:52 -0700412
413 rtnh = cfg->fc_mp;
414 remaining = cfg->fc_mp_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415
416 for_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700417 int attrlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418
Thomas Graf4e902c52006-08-17 18:14:52 -0700419 if (!rtnh_ok(rtnh, remaining))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420 return -EINVAL;
Thomas Graf4e902c52006-08-17 18:14:52 -0700421
422 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423 return 1;
Thomas Graf4e902c52006-08-17 18:14:52 -0700424
425 attrlen = rtnh_attrlen(rtnh);
426 if (attrlen < 0) {
427 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
428
429 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
Al Viro17fb2c62006-09-26 22:15:25 -0700430 if (nla && nla_get_be32(nla) != nh->nh_gw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431 return 1;
432#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700433 nla = nla_find(attrs, attrlen, RTA_FLOW);
434 if (nla && nla_get_u32(nla) != nh->nh_tclassid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435 return 1;
436#endif
437 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700438
439 rtnh = rtnh_next(rtnh, &remaining);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700440 } endfor_nexthops(fi);
441#endif
442 return 0;
443}
444
445
446/*
447 Picture
448 -------
449
450 Semantics of nexthop is very messy by historical reasons.
451 We have to take into account, that:
452 a) gateway can be actually local interface address,
453 so that gatewayed route is direct.
454 b) gateway must be on-link address, possibly
455 described not by an ifaddr, but also by a direct route.
456 c) If both gateway and interface are specified, they should not
457 contradict.
458 d) If we use tunnel routes, gateway could be not on-link.
459
460 Attempt to reconcile all of these (alas, self-contradictory) conditions
461 results in pretty ugly and hairy code with obscure logic.
462
463 I chose to generalized it instead, so that the size
464 of code does not increase practically, but it becomes
465 much more general.
466 Every prefix is assigned a "scope" value: "host" is local address,
467 "link" is direct route,
468 [ ... "site" ... "interior" ... ]
469 and "universe" is true gateway route with global meaning.
470
471 Every prefix refers to a set of "nexthop"s (gw, oif),
472 where gw must have narrower scope. This recursion stops
473 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
474 which means that gw is forced to be on link.
475
476 Code is still hairy, but now it is apparently logically
477 consistent and very flexible. F.e. as by-product it allows
478 to co-exists in peace independent exterior and interior
479 routing processes.
480
481 Normally it looks as following.
482
483 {universe prefix} -> (gw, oif) [scope link]
484 |
485 |-> {link prefix} -> (gw, oif) [scope local]
486 |
487 |-> {local prefix} (terminal node)
488 */
489
Thomas Graf4e902c52006-08-17 18:14:52 -0700490static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
491 struct fib_nh *nh)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492{
493 int err;
494
495 if (nh->nh_gw) {
496 struct fib_result res;
497
498#ifdef CONFIG_IP_ROUTE_PERVASIVE
499 if (nh->nh_flags&RTNH_F_PERVASIVE)
500 return 0;
501#endif
502 if (nh->nh_flags&RTNH_F_ONLINK) {
503 struct net_device *dev;
504
Thomas Graf4e902c52006-08-17 18:14:52 -0700505 if (cfg->fc_scope >= RT_SCOPE_LINK)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506 return -EINVAL;
507 if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
508 return -EINVAL;
509 if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
510 return -ENODEV;
511 if (!(dev->flags&IFF_UP))
512 return -ENETDOWN;
513 nh->nh_dev = dev;
514 dev_hold(dev);
515 nh->nh_scope = RT_SCOPE_LINK;
516 return 0;
517 }
518 {
Thomas Graf4e902c52006-08-17 18:14:52 -0700519 struct flowi fl = {
520 .nl_u = {
521 .ip4_u = {
522 .daddr = nh->nh_gw,
523 .scope = cfg->fc_scope + 1,
524 },
525 },
526 .oif = nh->nh_oif,
527 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528
529 /* It is not necessary, but requires a bit of thinking */
530 if (fl.fl4_scope < RT_SCOPE_LINK)
531 fl.fl4_scope = RT_SCOPE_LINK;
532 if ((err = fib_lookup(&fl, &res)) != 0)
533 return err;
534 }
535 err = -EINVAL;
536 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
537 goto out;
538 nh->nh_scope = res.scope;
539 nh->nh_oif = FIB_RES_OIF(res);
540 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
541 goto out;
542 dev_hold(nh->nh_dev);
543 err = -ENETDOWN;
544 if (!(nh->nh_dev->flags & IFF_UP))
545 goto out;
546 err = 0;
547out:
548 fib_res_put(&res);
549 return err;
550 } else {
551 struct in_device *in_dev;
552
553 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
554 return -EINVAL;
555
556 in_dev = inetdev_by_index(nh->nh_oif);
557 if (in_dev == NULL)
558 return -ENODEV;
559 if (!(in_dev->dev->flags&IFF_UP)) {
560 in_dev_put(in_dev);
561 return -ENETDOWN;
562 }
563 nh->nh_dev = in_dev->dev;
564 dev_hold(nh->nh_dev);
565 nh->nh_scope = RT_SCOPE_HOST;
566 in_dev_put(in_dev);
567 }
568 return 0;
569}
570
571static inline unsigned int fib_laddr_hashfn(u32 val)
572{
573 unsigned int mask = (fib_hash_size - 1);
574
575 return (val ^ (val >> 7) ^ (val >> 14)) & mask;
576}
577
578static struct hlist_head *fib_hash_alloc(int bytes)
579{
580 if (bytes <= PAGE_SIZE)
581 return kmalloc(bytes, GFP_KERNEL);
582 else
583 return (struct hlist_head *)
584 __get_free_pages(GFP_KERNEL, get_order(bytes));
585}
586
587static void fib_hash_free(struct hlist_head *hash, int bytes)
588{
589 if (!hash)
590 return;
591
592 if (bytes <= PAGE_SIZE)
593 kfree(hash);
594 else
595 free_pages((unsigned long) hash, get_order(bytes));
596}
597
598static void fib_hash_move(struct hlist_head *new_info_hash,
599 struct hlist_head *new_laddrhash,
600 unsigned int new_size)
601{
David S. Millerb7656e72005-08-05 04:12:48 -0700602 struct hlist_head *old_info_hash, *old_laddrhash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603 unsigned int old_size = fib_hash_size;
David S. Millerb7656e72005-08-05 04:12:48 -0700604 unsigned int i, bytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700605
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700606 spin_lock_bh(&fib_info_lock);
David S. Millerb7656e72005-08-05 04:12:48 -0700607 old_info_hash = fib_info_hash;
608 old_laddrhash = fib_info_laddrhash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609 fib_hash_size = new_size;
610
611 for (i = 0; i < old_size; i++) {
612 struct hlist_head *head = &fib_info_hash[i];
613 struct hlist_node *node, *n;
614 struct fib_info *fi;
615
616 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
617 struct hlist_head *dest;
618 unsigned int new_hash;
619
620 hlist_del(&fi->fib_hash);
621
622 new_hash = fib_info_hashfn(fi);
623 dest = &new_info_hash[new_hash];
624 hlist_add_head(&fi->fib_hash, dest);
625 }
626 }
627 fib_info_hash = new_info_hash;
628
629 for (i = 0; i < old_size; i++) {
630 struct hlist_head *lhead = &fib_info_laddrhash[i];
631 struct hlist_node *node, *n;
632 struct fib_info *fi;
633
634 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
635 struct hlist_head *ldest;
636 unsigned int new_hash;
637
638 hlist_del(&fi->fib_lhash);
639
640 new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
641 ldest = &new_laddrhash[new_hash];
642 hlist_add_head(&fi->fib_lhash, ldest);
643 }
644 }
645 fib_info_laddrhash = new_laddrhash;
646
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700647 spin_unlock_bh(&fib_info_lock);
David S. Millerb7656e72005-08-05 04:12:48 -0700648
649 bytes = old_size * sizeof(struct hlist_head *);
650 fib_hash_free(old_info_hash, bytes);
651 fib_hash_free(old_laddrhash, bytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652}
653
Thomas Graf4e902c52006-08-17 18:14:52 -0700654struct fib_info *fib_create_info(struct fib_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655{
656 int err;
657 struct fib_info *fi = NULL;
658 struct fib_info *ofi;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700659 int nhs = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660
661 /* Fast check to catch the most weird cases */
Thomas Graf4e902c52006-08-17 18:14:52 -0700662 if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700663 goto err_inval;
664
665#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700666 if (cfg->fc_mp) {
667 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668 if (nhs == 0)
669 goto err_inval;
670 }
671#endif
672#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
Thomas Graf4e902c52006-08-17 18:14:52 -0700673 if (cfg->fc_mp_alg) {
674 if (cfg->fc_mp_alg < IP_MP_ALG_NONE ||
675 cfg->fc_mp_alg > IP_MP_ALG_MAX)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700676 goto err_inval;
677 }
678#endif
679
680 err = -ENOBUFS;
681 if (fib_info_cnt >= fib_hash_size) {
682 unsigned int new_size = fib_hash_size << 1;
683 struct hlist_head *new_info_hash;
684 struct hlist_head *new_laddrhash;
685 unsigned int bytes;
686
687 if (!new_size)
688 new_size = 1;
689 bytes = new_size * sizeof(struct hlist_head *);
690 new_info_hash = fib_hash_alloc(bytes);
691 new_laddrhash = fib_hash_alloc(bytes);
692 if (!new_info_hash || !new_laddrhash) {
693 fib_hash_free(new_info_hash, bytes);
694 fib_hash_free(new_laddrhash, bytes);
695 } else {
696 memset(new_info_hash, 0, bytes);
697 memset(new_laddrhash, 0, bytes);
698
699 fib_hash_move(new_info_hash, new_laddrhash, new_size);
700 }
701
702 if (!fib_hash_size)
703 goto failure;
704 }
705
Panagiotis Issaris0da974f2006-07-21 14:51:30 -0700706 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700707 if (fi == NULL)
708 goto failure;
709 fib_info_cnt++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700710
Thomas Graf4e902c52006-08-17 18:14:52 -0700711 fi->fib_protocol = cfg->fc_protocol;
712 fi->fib_flags = cfg->fc_flags;
713 fi->fib_priority = cfg->fc_priority;
714 fi->fib_prefsrc = cfg->fc_prefsrc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700715
716 fi->fib_nhs = nhs;
717 change_nexthops(fi) {
718 nh->nh_parent = fi;
719 } endfor_nexthops(fi)
720
Thomas Graf4e902c52006-08-17 18:14:52 -0700721 if (cfg->fc_mx) {
722 struct nlattr *nla;
723 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700724
Thomas Graf4e902c52006-08-17 18:14:52 -0700725 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
726 int type = nla->nla_type;
727
728 if (type) {
729 if (type > RTAX_MAX)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730 goto err_inval;
Thomas Graf4e902c52006-08-17 18:14:52 -0700731 fi->fib_metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733 }
734 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735
Thomas Graf4e902c52006-08-17 18:14:52 -0700736 if (cfg->fc_mp) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700738 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
739 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740 goto failure;
Thomas Graf4e902c52006-08-17 18:14:52 -0700741 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700742 goto err_inval;
Thomas Graf4e902c52006-08-17 18:14:52 -0700743 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700744 goto err_inval;
745#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700746 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747 goto err_inval;
748#endif
749#else
750 goto err_inval;
751#endif
752 } else {
753 struct fib_nh *nh = fi->fib_nh;
Thomas Graf4e902c52006-08-17 18:14:52 -0700754
755 nh->nh_oif = cfg->fc_oif;
756 nh->nh_gw = cfg->fc_gw;
757 nh->nh_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700759 nh->nh_tclassid = cfg->fc_flow;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761#ifdef CONFIG_IP_ROUTE_MULTIPATH
762 nh->nh_weight = 1;
763#endif
764 }
765
766#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
Thomas Graf4e902c52006-08-17 18:14:52 -0700767 fi->fib_mp_alg = cfg->fc_mp_alg;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768#endif
769
Thomas Graf4e902c52006-08-17 18:14:52 -0700770 if (fib_props[cfg->fc_type].error) {
771 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772 goto err_inval;
773 goto link_it;
774 }
775
Thomas Graf4e902c52006-08-17 18:14:52 -0700776 if (cfg->fc_scope > RT_SCOPE_HOST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777 goto err_inval;
778
Thomas Graf4e902c52006-08-17 18:14:52 -0700779 if (cfg->fc_scope == RT_SCOPE_HOST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780 struct fib_nh *nh = fi->fib_nh;
781
782 /* Local address is added. */
783 if (nhs != 1 || nh->nh_gw)
784 goto err_inval;
785 nh->nh_scope = RT_SCOPE_NOWHERE;
786 nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
787 err = -ENODEV;
788 if (nh->nh_dev == NULL)
789 goto failure;
790 } else {
791 change_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700792 if ((err = fib_check_nh(cfg, fi, nh)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700793 goto failure;
794 } endfor_nexthops(fi)
795 }
796
797 if (fi->fib_prefsrc) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700798 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
799 fi->fib_prefsrc != cfg->fc_dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800 if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
801 goto err_inval;
802 }
803
804link_it:
805 if ((ofi = fib_find_info(fi)) != NULL) {
806 fi->fib_dead = 1;
807 free_fib_info(fi);
808 ofi->fib_treeref++;
809 return ofi;
810 }
811
812 fi->fib_treeref++;
813 atomic_inc(&fi->fib_clntref);
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700814 spin_lock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815 hlist_add_head(&fi->fib_hash,
816 &fib_info_hash[fib_info_hashfn(fi)]);
817 if (fi->fib_prefsrc) {
818 struct hlist_head *head;
819
820 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
821 hlist_add_head(&fi->fib_lhash, head);
822 }
823 change_nexthops(fi) {
824 struct hlist_head *head;
825 unsigned int hash;
826
827 if (!nh->nh_dev)
828 continue;
829 hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
830 head = &fib_info_devhash[hash];
831 hlist_add_head(&nh->nh_hash, head);
832 } endfor_nexthops(fi)
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700833 spin_unlock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834 return fi;
835
836err_inval:
837 err = -EINVAL;
838
839failure:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700840 if (fi) {
841 fi->fib_dead = 1;
842 free_fib_info(fi);
843 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700844
845 return ERR_PTR(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846}
847
Robert Olssone5b43762005-08-25 13:01:03 -0700848/* Note! fib_semantic_match intentionally uses RCU list functions. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700849int fib_semantic_match(struct list_head *head, const struct flowi *flp,
850 struct fib_result *res, __u32 zone, __u32 mask,
851 int prefixlen)
852{
853 struct fib_alias *fa;
854 int nh_sel = 0;
855
Robert Olssone5b43762005-08-25 13:01:03 -0700856 list_for_each_entry_rcu(fa, head, fa_list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700857 int err;
858
859 if (fa->fa_tos &&
860 fa->fa_tos != flp->fl4_tos)
861 continue;
862
863 if (fa->fa_scope < flp->fl4_scope)
864 continue;
865
866 fa->fa_state |= FA_S_ACCESSED;
867
868 err = fib_props[fa->fa_type].error;
869 if (err == 0) {
870 struct fib_info *fi = fa->fa_info;
871
872 if (fi->fib_flags & RTNH_F_DEAD)
873 continue;
874
875 switch (fa->fa_type) {
876 case RTN_UNICAST:
877 case RTN_LOCAL:
878 case RTN_BROADCAST:
879 case RTN_ANYCAST:
880 case RTN_MULTICAST:
881 for_nexthops(fi) {
882 if (nh->nh_flags&RTNH_F_DEAD)
883 continue;
884 if (!flp->oif || flp->oif == nh->nh_oif)
885 break;
886 }
887#ifdef CONFIG_IP_ROUTE_MULTIPATH
888 if (nhsel < fi->fib_nhs) {
889 nh_sel = nhsel;
890 goto out_fill_res;
891 }
892#else
893 if (nhsel < 1) {
894 goto out_fill_res;
895 }
896#endif
897 endfor_nexthops(fi);
898 continue;
899
900 default:
901 printk(KERN_DEBUG "impossible 102\n");
902 return -EINVAL;
903 };
904 }
905 return err;
906 }
907 return 1;
908
909out_fill_res:
910 res->prefixlen = prefixlen;
911 res->nh_sel = nh_sel;
912 res->type = fa->fa_type;
913 res->scope = fa->fa_scope;
914 res->fi = fa->fa_info;
915#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
916 res->netmask = mask;
917 res->network = zone &
918 (0xFFFFFFFF >> (32 - prefixlen));
919#endif
920 atomic_inc(&res->fi->fib_clntref);
921 return 0;
922}
923
924/* Find appropriate source address to this destination */
925
Al Virob83738a2006-09-26 22:14:15 -0700926__be32 __fib_res_prefsrc(struct fib_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700927{
928 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
929}
930
Thomas Grafbe403ea2006-08-17 18:15:17 -0700931int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
932 u32 tb_id, u8 type, u8 scope, u32 dst, int dst_len, u8 tos,
933 struct fib_info *fi, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934{
Thomas Grafbe403ea2006-08-17 18:15:17 -0700935 struct nlmsghdr *nlh;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700936 struct rtmsg *rtm;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937
Thomas Grafbe403ea2006-08-17 18:15:17 -0700938 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
939 if (nlh == NULL)
940 return -ENOBUFS;
941
942 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943 rtm->rtm_family = AF_INET;
944 rtm->rtm_dst_len = dst_len;
945 rtm->rtm_src_len = 0;
946 rtm->rtm_tos = tos;
947 rtm->rtm_table = tb_id;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700948 NLA_PUT_U32(skb, RTA_TABLE, tb_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700949 rtm->rtm_type = type;
950 rtm->rtm_flags = fi->fib_flags;
951 rtm->rtm_scope = scope;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952 rtm->rtm_protocol = fi->fib_protocol;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700953
954 if (rtm->rtm_dst_len)
Al Viro17fb2c62006-09-26 22:15:25 -0700955 NLA_PUT_BE32(skb, RTA_DST, dst);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700956
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957 if (fi->fib_priority)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700958 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
959
Linus Torvalds1da177e2005-04-16 15:20:36 -0700960 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700961 goto nla_put_failure;
962
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963 if (fi->fib_prefsrc)
Al Viro17fb2c62006-09-26 22:15:25 -0700964 NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700965
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966 if (fi->fib_nhs == 1) {
967 if (fi->fib_nh->nh_gw)
Al Viro17fb2c62006-09-26 22:15:25 -0700968 NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700969
Linus Torvalds1da177e2005-04-16 15:20:36 -0700970 if (fi->fib_nh->nh_oif)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700971 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
Patrick McHardy8265abc2006-07-21 15:09:55 -0700972#ifdef CONFIG_NET_CLS_ROUTE
973 if (fi->fib_nh[0].nh_tclassid)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700974 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
Patrick McHardy8265abc2006-07-21 15:09:55 -0700975#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976 }
977#ifdef CONFIG_IP_ROUTE_MULTIPATH
978 if (fi->fib_nhs > 1) {
Thomas Grafbe403ea2006-08-17 18:15:17 -0700979 struct rtnexthop *rtnh;
980 struct nlattr *mp;
981
982 mp = nla_nest_start(skb, RTA_MULTIPATH);
983 if (mp == NULL)
984 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700985
986 for_nexthops(fi) {
Thomas Grafbe403ea2006-08-17 18:15:17 -0700987 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
988 if (rtnh == NULL)
989 goto nla_put_failure;
990
991 rtnh->rtnh_flags = nh->nh_flags & 0xFF;
992 rtnh->rtnh_hops = nh->nh_weight - 1;
993 rtnh->rtnh_ifindex = nh->nh_oif;
994
Linus Torvalds1da177e2005-04-16 15:20:36 -0700995 if (nh->nh_gw)
Al Viro17fb2c62006-09-26 22:15:25 -0700996 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
Patrick McHardy8265abc2006-07-21 15:09:55 -0700997#ifdef CONFIG_NET_CLS_ROUTE
998 if (nh->nh_tclassid)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700999 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
Patrick McHardy8265abc2006-07-21 15:09:55 -07001000#endif
Thomas Grafbe403ea2006-08-17 18:15:17 -07001001 /* length of rtnetlink header + attributes */
1002 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001003 } endfor_nexthops(fi);
Thomas Grafbe403ea2006-08-17 18:15:17 -07001004
1005 nla_nest_end(skb, mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001006 }
1007#endif
Thomas Grafbe403ea2006-08-17 18:15:17 -07001008 return nlmsg_end(skb, nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009
Thomas Grafbe403ea2006-08-17 18:15:17 -07001010nla_put_failure:
1011 return nlmsg_cancel(skb, nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012}
1013
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014/*
1015 Update FIB if:
1016 - local address disappeared -> we must delete all the entries
1017 referring to it.
1018 - device went down -> we must shutdown all nexthops going via it.
1019 */
1020
1021int fib_sync_down(u32 local, struct net_device *dev, int force)
1022{
1023 int ret = 0;
1024 int scope = RT_SCOPE_NOWHERE;
1025
1026 if (force)
1027 scope = -1;
1028
1029 if (local && fib_info_laddrhash) {
1030 unsigned int hash = fib_laddr_hashfn(local);
1031 struct hlist_head *head = &fib_info_laddrhash[hash];
1032 struct hlist_node *node;
1033 struct fib_info *fi;
1034
1035 hlist_for_each_entry(fi, node, head, fib_lhash) {
1036 if (fi->fib_prefsrc == local) {
1037 fi->fib_flags |= RTNH_F_DEAD;
1038 ret++;
1039 }
1040 }
1041 }
1042
1043 if (dev) {
1044 struct fib_info *prev_fi = NULL;
1045 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1046 struct hlist_head *head = &fib_info_devhash[hash];
1047 struct hlist_node *node;
1048 struct fib_nh *nh;
1049
1050 hlist_for_each_entry(nh, node, head, nh_hash) {
1051 struct fib_info *fi = nh->nh_parent;
1052 int dead;
1053
1054 BUG_ON(!fi->fib_nhs);
1055 if (nh->nh_dev != dev || fi == prev_fi)
1056 continue;
1057 prev_fi = fi;
1058 dead = 0;
1059 change_nexthops(fi) {
1060 if (nh->nh_flags&RTNH_F_DEAD)
1061 dead++;
1062 else if (nh->nh_dev == dev &&
1063 nh->nh_scope != scope) {
1064 nh->nh_flags |= RTNH_F_DEAD;
1065#ifdef CONFIG_IP_ROUTE_MULTIPATH
1066 spin_lock_bh(&fib_multipath_lock);
1067 fi->fib_power -= nh->nh_power;
1068 nh->nh_power = 0;
1069 spin_unlock_bh(&fib_multipath_lock);
1070#endif
1071 dead++;
1072 }
1073#ifdef CONFIG_IP_ROUTE_MULTIPATH
1074 if (force > 1 && nh->nh_dev == dev) {
1075 dead = fi->fib_nhs;
1076 break;
1077 }
1078#endif
1079 } endfor_nexthops(fi)
1080 if (dead == fi->fib_nhs) {
1081 fi->fib_flags |= RTNH_F_DEAD;
1082 ret++;
1083 }
1084 }
1085 }
1086
1087 return ret;
1088}
1089
1090#ifdef CONFIG_IP_ROUTE_MULTIPATH
1091
1092/*
1093 Dead device goes up. We wake up dead nexthops.
1094 It takes sense only on multipath routes.
1095 */
1096
1097int fib_sync_up(struct net_device *dev)
1098{
1099 struct fib_info *prev_fi;
1100 unsigned int hash;
1101 struct hlist_head *head;
1102 struct hlist_node *node;
1103 struct fib_nh *nh;
1104 int ret;
1105
1106 if (!(dev->flags&IFF_UP))
1107 return 0;
1108
1109 prev_fi = NULL;
1110 hash = fib_devindex_hashfn(dev->ifindex);
1111 head = &fib_info_devhash[hash];
1112 ret = 0;
1113
1114 hlist_for_each_entry(nh, node, head, nh_hash) {
1115 struct fib_info *fi = nh->nh_parent;
1116 int alive;
1117
1118 BUG_ON(!fi->fib_nhs);
1119 if (nh->nh_dev != dev || fi == prev_fi)
1120 continue;
1121
1122 prev_fi = fi;
1123 alive = 0;
1124 change_nexthops(fi) {
1125 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1126 alive++;
1127 continue;
1128 }
1129 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1130 continue;
Herbert Xue5ed6392005-10-03 14:35:55 -07001131 if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001132 continue;
1133 alive++;
1134 spin_lock_bh(&fib_multipath_lock);
1135 nh->nh_power = 0;
1136 nh->nh_flags &= ~RTNH_F_DEAD;
1137 spin_unlock_bh(&fib_multipath_lock);
1138 } endfor_nexthops(fi)
1139
1140 if (alive > 0) {
1141 fi->fib_flags &= ~RTNH_F_DEAD;
1142 ret++;
1143 }
1144 }
1145
1146 return ret;
1147}
1148
1149/*
1150 The algorithm is suboptimal, but it provides really
1151 fair weighted route distribution.
1152 */
1153
1154void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1155{
1156 struct fib_info *fi = res->fi;
1157 int w;
1158
1159 spin_lock_bh(&fib_multipath_lock);
1160 if (fi->fib_power <= 0) {
1161 int power = 0;
1162 change_nexthops(fi) {
1163 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1164 power += nh->nh_weight;
1165 nh->nh_power = nh->nh_weight;
1166 }
1167 } endfor_nexthops(fi);
1168 fi->fib_power = power;
1169 if (power <= 0) {
1170 spin_unlock_bh(&fib_multipath_lock);
1171 /* Race condition: route has just become dead. */
1172 res->nh_sel = 0;
1173 return;
1174 }
1175 }
1176
1177
1178 /* w should be random number [0..fi->fib_power-1],
1179 it is pretty bad approximation.
1180 */
1181
1182 w = jiffies % fi->fib_power;
1183
1184 change_nexthops(fi) {
1185 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1186 if ((w -= nh->nh_power) <= 0) {
1187 nh->nh_power--;
1188 fi->fib_power--;
1189 res->nh_sel = nhsel;
1190 spin_unlock_bh(&fib_multipath_lock);
1191 return;
1192 }
1193 }
1194 } endfor_nexthops(fi);
1195
1196 /* Race condition: route has just become dead. */
1197 res->nh_sel = 0;
1198 spin_unlock_bh(&fib_multipath_lock);
1199}
1200#endif