blob: 406ea7050aede7c0dd6926fe20d0349b2109eee2 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: semantics.
7 *
8 * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <asm/uaccess.h>
19#include <asm/system.h>
20#include <linux/bitops.h>
21#include <linux/types.h>
22#include <linux/kernel.h>
23#include <linux/jiffies.h>
24#include <linux/mm.h>
25#include <linux/string.h>
26#include <linux/socket.h>
27#include <linux/sockios.h>
28#include <linux/errno.h>
29#include <linux/in.h>
30#include <linux/inet.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020031#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032#include <linux/netdevice.h>
33#include <linux/if_arp.h>
34#include <linux/proc_fs.h>
35#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070036#include <linux/init.h>
37
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020038#include <net/arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
44#include <net/ip_fib.h>
45#include <net/ip_mp_alg.h>
Thomas Graff21c7bc2006-08-15 00:34:17 -070046#include <net/netlink.h>
Thomas Graf4e902c52006-08-17 18:14:52 -070047#include <net/nexthop.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070048
49#include "fib_lookup.h"
50
51#define FSprintk(a...)
52
Stephen Hemminger832b4c52006-08-29 16:48:09 -070053static DEFINE_SPINLOCK(fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070054static struct hlist_head *fib_info_hash;
55static struct hlist_head *fib_info_laddrhash;
56static unsigned int fib_hash_size;
57static unsigned int fib_info_cnt;
58
59#define DEVINDEX_HASHBITS 8
60#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
61static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
62
63#ifdef CONFIG_IP_ROUTE_MULTIPATH
64
65static DEFINE_SPINLOCK(fib_multipath_lock);
66
67#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
68for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
69
70#define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
71for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
72
73#else /* CONFIG_IP_ROUTE_MULTIPATH */
74
75/* Hope, that gcc will optimize it to get rid of dummy loop */
76
77#define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
78for (nhsel=0; nhsel < 1; nhsel++)
79
80#define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
81for (nhsel=0; nhsel < 1; nhsel++)
82
83#endif /* CONFIG_IP_ROUTE_MULTIPATH */
84
85#define endfor_nexthops(fi) }
86
87
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090088static const struct
Linus Torvalds1da177e2005-04-16 15:20:36 -070089{
90 int error;
91 u8 scope;
Thomas Grafa0ee18b2007-03-24 20:32:54 -070092} fib_props[RTN_MAX + 1] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090093 {
Linus Torvalds1da177e2005-04-16 15:20:36 -070094 .error = 0,
95 .scope = RT_SCOPE_NOWHERE,
96 }, /* RTN_UNSPEC */
97 {
98 .error = 0,
99 .scope = RT_SCOPE_UNIVERSE,
100 }, /* RTN_UNICAST */
101 {
102 .error = 0,
103 .scope = RT_SCOPE_HOST,
104 }, /* RTN_LOCAL */
105 {
106 .error = 0,
107 .scope = RT_SCOPE_LINK,
108 }, /* RTN_BROADCAST */
109 {
110 .error = 0,
111 .scope = RT_SCOPE_LINK,
112 }, /* RTN_ANYCAST */
113 {
114 .error = 0,
115 .scope = RT_SCOPE_UNIVERSE,
116 }, /* RTN_MULTICAST */
117 {
118 .error = -EINVAL,
119 .scope = RT_SCOPE_UNIVERSE,
120 }, /* RTN_BLACKHOLE */
121 {
122 .error = -EHOSTUNREACH,
123 .scope = RT_SCOPE_UNIVERSE,
124 }, /* RTN_UNREACHABLE */
125 {
126 .error = -EACCES,
127 .scope = RT_SCOPE_UNIVERSE,
128 }, /* RTN_PROHIBIT */
129 {
130 .error = -EAGAIN,
131 .scope = RT_SCOPE_UNIVERSE,
132 }, /* RTN_THROW */
133 {
134 .error = -EINVAL,
135 .scope = RT_SCOPE_NOWHERE,
136 }, /* RTN_NAT */
137 {
138 .error = -EINVAL,
139 .scope = RT_SCOPE_NOWHERE,
140 }, /* RTN_XRESOLVE */
141};
142
143
144/* Release a nexthop info record */
145
146void free_fib_info(struct fib_info *fi)
147{
148 if (fi->fib_dead == 0) {
149 printk("Freeing alive fib_info %p\n", fi);
150 return;
151 }
152 change_nexthops(fi) {
153 if (nh->nh_dev)
154 dev_put(nh->nh_dev);
155 nh->nh_dev = NULL;
156 } endfor_nexthops(fi);
157 fib_info_cnt--;
158 kfree(fi);
159}
160
161void fib_release_info(struct fib_info *fi)
162{
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700163 spin_lock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164 if (fi && --fi->fib_treeref == 0) {
165 hlist_del(&fi->fib_hash);
166 if (fi->fib_prefsrc)
167 hlist_del(&fi->fib_lhash);
168 change_nexthops(fi) {
169 if (!nh->nh_dev)
170 continue;
171 hlist_del(&nh->nh_hash);
172 } endfor_nexthops(fi)
173 fi->fib_dead = 1;
174 fib_info_put(fi);
175 }
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700176 spin_unlock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177}
178
179static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
180{
181 const struct fib_nh *onh = ofi->fib_nh;
182
183 for_nexthops(fi) {
184 if (nh->nh_oif != onh->nh_oif ||
185 nh->nh_gw != onh->nh_gw ||
186 nh->nh_scope != onh->nh_scope ||
187#ifdef CONFIG_IP_ROUTE_MULTIPATH
188 nh->nh_weight != onh->nh_weight ||
189#endif
190#ifdef CONFIG_NET_CLS_ROUTE
191 nh->nh_tclassid != onh->nh_tclassid ||
192#endif
193 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
194 return -1;
195 onh++;
196 } endfor_nexthops(fi);
197 return 0;
198}
199
200static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
201{
202 unsigned int mask = (fib_hash_size - 1);
203 unsigned int val = fi->fib_nhs;
204
205 val ^= fi->fib_protocol;
Al Viro81f7bf62006-09-27 18:40:00 -0700206 val ^= (__force u32)fi->fib_prefsrc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207 val ^= fi->fib_priority;
208
209 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
210}
211
212static struct fib_info *fib_find_info(const struct fib_info *nfi)
213{
214 struct hlist_head *head;
215 struct hlist_node *node;
216 struct fib_info *fi;
217 unsigned int hash;
218
219 hash = fib_info_hashfn(nfi);
220 head = &fib_info_hash[hash];
221
222 hlist_for_each_entry(fi, node, head, fib_hash) {
223 if (fi->fib_nhs != nfi->fib_nhs)
224 continue;
225 if (nfi->fib_protocol == fi->fib_protocol &&
226 nfi->fib_prefsrc == fi->fib_prefsrc &&
227 nfi->fib_priority == fi->fib_priority &&
228 memcmp(nfi->fib_metrics, fi->fib_metrics,
229 sizeof(fi->fib_metrics)) == 0 &&
230 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
231 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
232 return fi;
233 }
234
235 return NULL;
236}
237
238static inline unsigned int fib_devindex_hashfn(unsigned int val)
239{
240 unsigned int mask = DEVINDEX_HASHSIZE - 1;
241
242 return (val ^
243 (val >> DEVINDEX_HASHBITS) ^
244 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
245}
246
247/* Check, that the gateway is already configured.
248 Used only by redirect accept routine.
249 */
250
Al Virod878e72e2006-09-26 22:18:13 -0700251int ip_fib_check_default(__be32 gw, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252{
253 struct hlist_head *head;
254 struct hlist_node *node;
255 struct fib_nh *nh;
256 unsigned int hash;
257
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700258 spin_lock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259
260 hash = fib_devindex_hashfn(dev->ifindex);
261 head = &fib_info_devhash[hash];
262 hlist_for_each_entry(nh, node, head, nh_hash) {
263 if (nh->nh_dev == dev &&
264 nh->nh_gw == gw &&
265 !(nh->nh_flags&RTNH_F_DEAD)) {
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700266 spin_unlock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267 return 0;
268 }
269 }
270
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700271 spin_unlock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272
273 return -1;
274}
275
Thomas Graf339bf982006-11-10 14:10:15 -0800276static inline size_t fib_nlmsg_size(struct fib_info *fi)
277{
278 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
279 + nla_total_size(4) /* RTA_TABLE */
280 + nla_total_size(4) /* RTA_DST */
281 + nla_total_size(4) /* RTA_PRIORITY */
282 + nla_total_size(4); /* RTA_PREFSRC */
283
284 /* space for nested metrics */
285 payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
286
287 if (fi->fib_nhs) {
288 /* Also handles the special case fib_nhs == 1 */
289
290 /* each nexthop is packed in an attribute */
291 size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
292
293 /* may contain flow and gateway attribute */
294 nhsize += 2 * nla_total_size(4);
295
296 /* all nexthops are packed in a nested attribute */
297 payload += nla_total_size(fi->fib_nhs * nhsize);
298 }
299
300 return payload;
301}
302
Al Viro81f7bf62006-09-27 18:40:00 -0700303void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
Thomas Graf4e902c52006-08-17 18:14:52 -0700304 int dst_len, u32 tb_id, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305{
306 struct sk_buff *skb;
Thomas Graf4e902c52006-08-17 18:14:52 -0700307 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
Thomas Graff21c7bc2006-08-15 00:34:17 -0700308 int err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309
Thomas Graf339bf982006-11-10 14:10:15 -0800310 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
Thomas Graff21c7bc2006-08-15 00:34:17 -0700311 if (skb == NULL)
312 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313
Thomas Graf4e902c52006-08-17 18:14:52 -0700314 err = fib_dump_info(skb, info->pid, seq, event, tb_id,
Thomas Grafbe403ea2006-08-17 18:15:17 -0700315 fa->fa_type, fa->fa_scope, key, dst_len,
Thomas Graf4e902c52006-08-17 18:14:52 -0700316 fa->fa_tos, fa->fa_info, 0);
Patrick McHardy26932562007-01-31 23:16:40 -0800317 if (err < 0) {
318 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
319 WARN_ON(err == -EMSGSIZE);
320 kfree_skb(skb);
321 goto errout;
322 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700323 err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE,
324 info->nlh, GFP_KERNEL);
Thomas Graff21c7bc2006-08-15 00:34:17 -0700325errout:
326 if (err < 0)
327 rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328}
329
330/* Return the first fib alias matching TOS with
331 * priority less than or equal to PRIO.
332 */
333struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
334{
335 if (fah) {
336 struct fib_alias *fa;
337 list_for_each_entry(fa, fah, fa_list) {
338 if (fa->fa_tos > tos)
339 continue;
340 if (fa->fa_info->fib_priority >= prio ||
341 fa->fa_tos < tos)
342 return fa;
343 }
344 }
345 return NULL;
346}
347
348int fib_detect_death(struct fib_info *fi, int order,
349 struct fib_info **last_resort, int *last_idx, int *dflt)
350{
351 struct neighbour *n;
352 int state = NUD_NONE;
353
354 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
355 if (n) {
356 state = n->nud_state;
357 neigh_release(n);
358 }
359 if (state==NUD_REACHABLE)
360 return 0;
361 if ((state&NUD_VALID) && order != *dflt)
362 return 0;
363 if ((state&NUD_VALID) ||
364 (*last_idx<0 && order > *dflt)) {
365 *last_resort = fi;
366 *last_idx = order;
367 }
368 return 1;
369}
370
371#ifdef CONFIG_IP_ROUTE_MULTIPATH
372
Thomas Graf4e902c52006-08-17 18:14:52 -0700373static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374{
375 int nhs = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700376
Thomas Graf4e902c52006-08-17 18:14:52 -0700377 while (rtnh_ok(rtnh, remaining)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700378 nhs++;
Thomas Graf4e902c52006-08-17 18:14:52 -0700379 rtnh = rtnh_next(rtnh, &remaining);
380 }
381
382 /* leftover implies invalid nexthop configuration, discard it */
383 return remaining > 0 ? 0 : nhs;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700384}
385
Thomas Graf4e902c52006-08-17 18:14:52 -0700386static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
387 int remaining, struct fib_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700388{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389 change_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700390 int attrlen;
391
392 if (!rtnh_ok(rtnh, remaining))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393 return -EINVAL;
Thomas Graf4e902c52006-08-17 18:14:52 -0700394
395 nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
396 nh->nh_oif = rtnh->rtnh_ifindex;
397 nh->nh_weight = rtnh->rtnh_hops + 1;
398
399 attrlen = rtnh_attrlen(rtnh);
400 if (attrlen > 0) {
401 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
402
403 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
Al Viro17fb2c62006-09-26 22:15:25 -0700404 nh->nh_gw = nla ? nla_get_be32(nla) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700406 nla = nla_find(attrs, attrlen, RTA_FLOW);
407 nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408#endif
409 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700410
411 rtnh = rtnh_next(rtnh, &remaining);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412 } endfor_nexthops(fi);
Thomas Graf4e902c52006-08-17 18:14:52 -0700413
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414 return 0;
415}
416
417#endif
418
Thomas Graf4e902c52006-08-17 18:14:52 -0700419int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420{
421#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700422 struct rtnexthop *rtnh;
423 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424#endif
425
Thomas Graf4e902c52006-08-17 18:14:52 -0700426 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427 return 1;
428
Thomas Graf4e902c52006-08-17 18:14:52 -0700429 if (cfg->fc_oif || cfg->fc_gw) {
430 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
431 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432 return 0;
433 return 1;
434 }
435
436#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700437 if (cfg->fc_mp == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438 return 0;
Thomas Graf4e902c52006-08-17 18:14:52 -0700439
440 rtnh = cfg->fc_mp;
441 remaining = cfg->fc_mp_len;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900442
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443 for_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700444 int attrlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445
Thomas Graf4e902c52006-08-17 18:14:52 -0700446 if (!rtnh_ok(rtnh, remaining))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447 return -EINVAL;
Thomas Graf4e902c52006-08-17 18:14:52 -0700448
449 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450 return 1;
Thomas Graf4e902c52006-08-17 18:14:52 -0700451
452 attrlen = rtnh_attrlen(rtnh);
453 if (attrlen < 0) {
454 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
455
456 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
Al Viro17fb2c62006-09-26 22:15:25 -0700457 if (nla && nla_get_be32(nla) != nh->nh_gw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700458 return 1;
459#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700460 nla = nla_find(attrs, attrlen, RTA_FLOW);
461 if (nla && nla_get_u32(nla) != nh->nh_tclassid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462 return 1;
463#endif
464 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700465
466 rtnh = rtnh_next(rtnh, &remaining);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467 } endfor_nexthops(fi);
468#endif
469 return 0;
470}
471
472
473/*
474 Picture
475 -------
476
477 Semantics of nexthop is very messy by historical reasons.
478 We have to take into account, that:
479 a) gateway can be actually local interface address,
480 so that gatewayed route is direct.
481 b) gateway must be on-link address, possibly
482 described not by an ifaddr, but also by a direct route.
483 c) If both gateway and interface are specified, they should not
484 contradict.
485 d) If we use tunnel routes, gateway could be not on-link.
486
487 Attempt to reconcile all of these (alas, self-contradictory) conditions
488 results in pretty ugly and hairy code with obscure logic.
489
490 I chose to generalized it instead, so that the size
491 of code does not increase practically, but it becomes
492 much more general.
493 Every prefix is assigned a "scope" value: "host" is local address,
494 "link" is direct route,
495 [ ... "site" ... "interior" ... ]
496 and "universe" is true gateway route with global meaning.
497
498 Every prefix refers to a set of "nexthop"s (gw, oif),
499 where gw must have narrower scope. This recursion stops
500 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
501 which means that gw is forced to be on link.
502
503 Code is still hairy, but now it is apparently logically
504 consistent and very flexible. F.e. as by-product it allows
505 to co-exists in peace independent exterior and interior
506 routing processes.
507
508 Normally it looks as following.
509
510 {universe prefix} -> (gw, oif) [scope link]
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900511 |
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512 |-> {link prefix} -> (gw, oif) [scope local]
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900513 |
Linus Torvalds1da177e2005-04-16 15:20:36 -0700514 |-> {local prefix} (terminal node)
515 */
516
Thomas Graf4e902c52006-08-17 18:14:52 -0700517static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
518 struct fib_nh *nh)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519{
520 int err;
521
522 if (nh->nh_gw) {
523 struct fib_result res;
524
525#ifdef CONFIG_IP_ROUTE_PERVASIVE
526 if (nh->nh_flags&RTNH_F_PERVASIVE)
527 return 0;
528#endif
529 if (nh->nh_flags&RTNH_F_ONLINK) {
530 struct net_device *dev;
531
Thomas Graf4e902c52006-08-17 18:14:52 -0700532 if (cfg->fc_scope >= RT_SCOPE_LINK)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533 return -EINVAL;
534 if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
535 return -EINVAL;
536 if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
537 return -ENODEV;
538 if (!(dev->flags&IFF_UP))
539 return -ENETDOWN;
540 nh->nh_dev = dev;
541 dev_hold(dev);
542 nh->nh_scope = RT_SCOPE_LINK;
543 return 0;
544 }
545 {
Thomas Graf4e902c52006-08-17 18:14:52 -0700546 struct flowi fl = {
547 .nl_u = {
548 .ip4_u = {
549 .daddr = nh->nh_gw,
550 .scope = cfg->fc_scope + 1,
551 },
552 },
553 .oif = nh->nh_oif,
554 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555
556 /* It is not necessary, but requires a bit of thinking */
557 if (fl.fl4_scope < RT_SCOPE_LINK)
558 fl.fl4_scope = RT_SCOPE_LINK;
559 if ((err = fib_lookup(&fl, &res)) != 0)
560 return err;
561 }
562 err = -EINVAL;
563 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
564 goto out;
565 nh->nh_scope = res.scope;
566 nh->nh_oif = FIB_RES_OIF(res);
567 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
568 goto out;
569 dev_hold(nh->nh_dev);
570 err = -ENETDOWN;
571 if (!(nh->nh_dev->flags & IFF_UP))
572 goto out;
573 err = 0;
574out:
575 fib_res_put(&res);
576 return err;
577 } else {
578 struct in_device *in_dev;
579
580 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
581 return -EINVAL;
582
583 in_dev = inetdev_by_index(nh->nh_oif);
584 if (in_dev == NULL)
585 return -ENODEV;
586 if (!(in_dev->dev->flags&IFF_UP)) {
587 in_dev_put(in_dev);
588 return -ENETDOWN;
589 }
590 nh->nh_dev = in_dev->dev;
591 dev_hold(nh->nh_dev);
592 nh->nh_scope = RT_SCOPE_HOST;
593 in_dev_put(in_dev);
594 }
595 return 0;
596}
597
Al Viro81f7bf62006-09-27 18:40:00 -0700598static inline unsigned int fib_laddr_hashfn(__be32 val)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599{
600 unsigned int mask = (fib_hash_size - 1);
601
Al Viro81f7bf62006-09-27 18:40:00 -0700602 return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603}
604
605static struct hlist_head *fib_hash_alloc(int bytes)
606{
607 if (bytes <= PAGE_SIZE)
608 return kmalloc(bytes, GFP_KERNEL);
609 else
610 return (struct hlist_head *)
611 __get_free_pages(GFP_KERNEL, get_order(bytes));
612}
613
614static void fib_hash_free(struct hlist_head *hash, int bytes)
615{
616 if (!hash)
617 return;
618
619 if (bytes <= PAGE_SIZE)
620 kfree(hash);
621 else
622 free_pages((unsigned long) hash, get_order(bytes));
623}
624
625static void fib_hash_move(struct hlist_head *new_info_hash,
626 struct hlist_head *new_laddrhash,
627 unsigned int new_size)
628{
David S. Millerb7656e72005-08-05 04:12:48 -0700629 struct hlist_head *old_info_hash, *old_laddrhash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630 unsigned int old_size = fib_hash_size;
David S. Millerb7656e72005-08-05 04:12:48 -0700631 unsigned int i, bytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700633 spin_lock_bh(&fib_info_lock);
David S. Millerb7656e72005-08-05 04:12:48 -0700634 old_info_hash = fib_info_hash;
635 old_laddrhash = fib_info_laddrhash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636 fib_hash_size = new_size;
637
638 for (i = 0; i < old_size; i++) {
639 struct hlist_head *head = &fib_info_hash[i];
640 struct hlist_node *node, *n;
641 struct fib_info *fi;
642
643 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
644 struct hlist_head *dest;
645 unsigned int new_hash;
646
647 hlist_del(&fi->fib_hash);
648
649 new_hash = fib_info_hashfn(fi);
650 dest = &new_info_hash[new_hash];
651 hlist_add_head(&fi->fib_hash, dest);
652 }
653 }
654 fib_info_hash = new_info_hash;
655
656 for (i = 0; i < old_size; i++) {
657 struct hlist_head *lhead = &fib_info_laddrhash[i];
658 struct hlist_node *node, *n;
659 struct fib_info *fi;
660
661 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
662 struct hlist_head *ldest;
663 unsigned int new_hash;
664
665 hlist_del(&fi->fib_lhash);
666
667 new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
668 ldest = &new_laddrhash[new_hash];
669 hlist_add_head(&fi->fib_lhash, ldest);
670 }
671 }
672 fib_info_laddrhash = new_laddrhash;
673
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700674 spin_unlock_bh(&fib_info_lock);
David S. Millerb7656e72005-08-05 04:12:48 -0700675
676 bytes = old_size * sizeof(struct hlist_head *);
677 fib_hash_free(old_info_hash, bytes);
678 fib_hash_free(old_laddrhash, bytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679}
680
Thomas Graf4e902c52006-08-17 18:14:52 -0700681struct fib_info *fib_create_info(struct fib_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682{
683 int err;
684 struct fib_info *fi = NULL;
685 struct fib_info *ofi;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686 int nhs = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700687
688 /* Fast check to catch the most weird cases */
Thomas Graf4e902c52006-08-17 18:14:52 -0700689 if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700690 goto err_inval;
691
692#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700693 if (cfg->fc_mp) {
694 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695 if (nhs == 0)
696 goto err_inval;
697 }
698#endif
699#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
Thomas Graf4e902c52006-08-17 18:14:52 -0700700 if (cfg->fc_mp_alg) {
701 if (cfg->fc_mp_alg < IP_MP_ALG_NONE ||
702 cfg->fc_mp_alg > IP_MP_ALG_MAX)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700703 goto err_inval;
704 }
705#endif
706
707 err = -ENOBUFS;
708 if (fib_info_cnt >= fib_hash_size) {
709 unsigned int new_size = fib_hash_size << 1;
710 struct hlist_head *new_info_hash;
711 struct hlist_head *new_laddrhash;
712 unsigned int bytes;
713
714 if (!new_size)
715 new_size = 1;
716 bytes = new_size * sizeof(struct hlist_head *);
717 new_info_hash = fib_hash_alloc(bytes);
718 new_laddrhash = fib_hash_alloc(bytes);
719 if (!new_info_hash || !new_laddrhash) {
720 fib_hash_free(new_info_hash, bytes);
721 fib_hash_free(new_laddrhash, bytes);
722 } else {
723 memset(new_info_hash, 0, bytes);
724 memset(new_laddrhash, 0, bytes);
725
726 fib_hash_move(new_info_hash, new_laddrhash, new_size);
727 }
728
729 if (!fib_hash_size)
730 goto failure;
731 }
732
Panagiotis Issaris0da974f2006-07-21 14:51:30 -0700733 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734 if (fi == NULL)
735 goto failure;
736 fib_info_cnt++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737
Thomas Graf4e902c52006-08-17 18:14:52 -0700738 fi->fib_protocol = cfg->fc_protocol;
739 fi->fib_flags = cfg->fc_flags;
740 fi->fib_priority = cfg->fc_priority;
741 fi->fib_prefsrc = cfg->fc_prefsrc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700742
743 fi->fib_nhs = nhs;
744 change_nexthops(fi) {
745 nh->nh_parent = fi;
746 } endfor_nexthops(fi)
747
Thomas Graf4e902c52006-08-17 18:14:52 -0700748 if (cfg->fc_mx) {
749 struct nlattr *nla;
750 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751
Thomas Graf4e902c52006-08-17 18:14:52 -0700752 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
753 int type = nla->nla_type;
754
755 if (type) {
756 if (type > RTAX_MAX)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700757 goto err_inval;
Thomas Graf4e902c52006-08-17 18:14:52 -0700758 fi->fib_metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760 }
761 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762
Thomas Graf4e902c52006-08-17 18:14:52 -0700763 if (cfg->fc_mp) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700765 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
766 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700767 goto failure;
Thomas Graf4e902c52006-08-17 18:14:52 -0700768 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769 goto err_inval;
Thomas Graf4e902c52006-08-17 18:14:52 -0700770 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700771 goto err_inval;
772#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700773 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700774 goto err_inval;
775#endif
776#else
777 goto err_inval;
778#endif
779 } else {
780 struct fib_nh *nh = fi->fib_nh;
Thomas Graf4e902c52006-08-17 18:14:52 -0700781
782 nh->nh_oif = cfg->fc_oif;
783 nh->nh_gw = cfg->fc_gw;
784 nh->nh_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700785#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700786 nh->nh_tclassid = cfg->fc_flow;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700787#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788#ifdef CONFIG_IP_ROUTE_MULTIPATH
789 nh->nh_weight = 1;
790#endif
791 }
792
793#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
Thomas Graf4e902c52006-08-17 18:14:52 -0700794 fi->fib_mp_alg = cfg->fc_mp_alg;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700795#endif
796
Thomas Graf4e902c52006-08-17 18:14:52 -0700797 if (fib_props[cfg->fc_type].error) {
798 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700799 goto err_inval;
800 goto link_it;
801 }
802
Thomas Graf4e902c52006-08-17 18:14:52 -0700803 if (cfg->fc_scope > RT_SCOPE_HOST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804 goto err_inval;
805
Thomas Graf4e902c52006-08-17 18:14:52 -0700806 if (cfg->fc_scope == RT_SCOPE_HOST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807 struct fib_nh *nh = fi->fib_nh;
808
809 /* Local address is added. */
810 if (nhs != 1 || nh->nh_gw)
811 goto err_inval;
812 nh->nh_scope = RT_SCOPE_NOWHERE;
813 nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
814 err = -ENODEV;
815 if (nh->nh_dev == NULL)
816 goto failure;
817 } else {
818 change_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700819 if ((err = fib_check_nh(cfg, fi, nh)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700820 goto failure;
821 } endfor_nexthops(fi)
822 }
823
824 if (fi->fib_prefsrc) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700825 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
826 fi->fib_prefsrc != cfg->fc_dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827 if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
828 goto err_inval;
829 }
830
831link_it:
832 if ((ofi = fib_find_info(fi)) != NULL) {
833 fi->fib_dead = 1;
834 free_fib_info(fi);
835 ofi->fib_treeref++;
836 return ofi;
837 }
838
839 fi->fib_treeref++;
840 atomic_inc(&fi->fib_clntref);
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700841 spin_lock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700842 hlist_add_head(&fi->fib_hash,
843 &fib_info_hash[fib_info_hashfn(fi)]);
844 if (fi->fib_prefsrc) {
845 struct hlist_head *head;
846
847 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
848 hlist_add_head(&fi->fib_lhash, head);
849 }
850 change_nexthops(fi) {
851 struct hlist_head *head;
852 unsigned int hash;
853
854 if (!nh->nh_dev)
855 continue;
856 hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
857 head = &fib_info_devhash[hash];
858 hlist_add_head(&nh->nh_hash, head);
859 } endfor_nexthops(fi)
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700860 spin_unlock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700861 return fi;
862
863err_inval:
864 err = -EINVAL;
865
866failure:
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900867 if (fi) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700868 fi->fib_dead = 1;
869 free_fib_info(fi);
870 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700871
872 return ERR_PTR(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700873}
874
Robert Olssone5b43762005-08-25 13:01:03 -0700875/* Note! fib_semantic_match intentionally uses RCU list functions. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876int fib_semantic_match(struct list_head *head, const struct flowi *flp,
Al Viro1ef1b8c2006-09-26 22:20:56 -0700877 struct fib_result *res, __be32 zone, __be32 mask,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700878 int prefixlen)
879{
880 struct fib_alias *fa;
881 int nh_sel = 0;
882
Robert Olssone5b43762005-08-25 13:01:03 -0700883 list_for_each_entry_rcu(fa, head, fa_list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884 int err;
885
886 if (fa->fa_tos &&
887 fa->fa_tos != flp->fl4_tos)
888 continue;
889
890 if (fa->fa_scope < flp->fl4_scope)
891 continue;
892
893 fa->fa_state |= FA_S_ACCESSED;
894
895 err = fib_props[fa->fa_type].error;
896 if (err == 0) {
897 struct fib_info *fi = fa->fa_info;
898
899 if (fi->fib_flags & RTNH_F_DEAD)
900 continue;
901
902 switch (fa->fa_type) {
903 case RTN_UNICAST:
904 case RTN_LOCAL:
905 case RTN_BROADCAST:
906 case RTN_ANYCAST:
907 case RTN_MULTICAST:
908 for_nexthops(fi) {
909 if (nh->nh_flags&RTNH_F_DEAD)
910 continue;
911 if (!flp->oif || flp->oif == nh->nh_oif)
912 break;
913 }
914#ifdef CONFIG_IP_ROUTE_MULTIPATH
915 if (nhsel < fi->fib_nhs) {
916 nh_sel = nhsel;
917 goto out_fill_res;
918 }
919#else
920 if (nhsel < 1) {
921 goto out_fill_res;
922 }
923#endif
924 endfor_nexthops(fi);
925 continue;
926
927 default:
928 printk(KERN_DEBUG "impossible 102\n");
929 return -EINVAL;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -0700930 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700931 }
932 return err;
933 }
934 return 1;
935
936out_fill_res:
937 res->prefixlen = prefixlen;
938 res->nh_sel = nh_sel;
939 res->type = fa->fa_type;
940 res->scope = fa->fa_scope;
941 res->fi = fa->fa_info;
942#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
943 res->netmask = mask;
Al Viro1e8aa6f2006-09-26 22:21:22 -0700944 res->network = zone & inet_make_mask(prefixlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700945#endif
946 atomic_inc(&res->fi->fib_clntref);
947 return 0;
948}
949
950/* Find appropriate source address to this destination */
951
Al Virob83738a2006-09-26 22:14:15 -0700952__be32 __fib_res_prefsrc(struct fib_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953{
954 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
955}
956
Thomas Grafbe403ea2006-08-17 18:15:17 -0700957int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
Al Viro81f7bf62006-09-27 18:40:00 -0700958 u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
Thomas Grafbe403ea2006-08-17 18:15:17 -0700959 struct fib_info *fi, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700960{
Thomas Grafbe403ea2006-08-17 18:15:17 -0700961 struct nlmsghdr *nlh;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700962 struct rtmsg *rtm;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963
Thomas Grafbe403ea2006-08-17 18:15:17 -0700964 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
965 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -0800966 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700967
968 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969 rtm->rtm_family = AF_INET;
970 rtm->rtm_dst_len = dst_len;
971 rtm->rtm_src_len = 0;
972 rtm->rtm_tos = tos;
973 rtm->rtm_table = tb_id;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700974 NLA_PUT_U32(skb, RTA_TABLE, tb_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700975 rtm->rtm_type = type;
976 rtm->rtm_flags = fi->fib_flags;
977 rtm->rtm_scope = scope;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978 rtm->rtm_protocol = fi->fib_protocol;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700979
980 if (rtm->rtm_dst_len)
Al Viro17fb2c62006-09-26 22:15:25 -0700981 NLA_PUT_BE32(skb, RTA_DST, dst);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700982
Linus Torvalds1da177e2005-04-16 15:20:36 -0700983 if (fi->fib_priority)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700984 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
985
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700987 goto nla_put_failure;
988
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989 if (fi->fib_prefsrc)
Al Viro17fb2c62006-09-26 22:15:25 -0700990 NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700991
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992 if (fi->fib_nhs == 1) {
993 if (fi->fib_nh->nh_gw)
Al Viro17fb2c62006-09-26 22:15:25 -0700994 NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700995
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996 if (fi->fib_nh->nh_oif)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700997 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
Patrick McHardy8265abc2006-07-21 15:09:55 -0700998#ifdef CONFIG_NET_CLS_ROUTE
999 if (fi->fib_nh[0].nh_tclassid)
Thomas Grafbe403ea2006-08-17 18:15:17 -07001000 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
Patrick McHardy8265abc2006-07-21 15:09:55 -07001001#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001002 }
1003#ifdef CONFIG_IP_ROUTE_MULTIPATH
1004 if (fi->fib_nhs > 1) {
Thomas Grafbe403ea2006-08-17 18:15:17 -07001005 struct rtnexthop *rtnh;
1006 struct nlattr *mp;
1007
1008 mp = nla_nest_start(skb, RTA_MULTIPATH);
1009 if (mp == NULL)
1010 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001011
1012 for_nexthops(fi) {
Thomas Grafbe403ea2006-08-17 18:15:17 -07001013 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
1014 if (rtnh == NULL)
1015 goto nla_put_failure;
1016
1017 rtnh->rtnh_flags = nh->nh_flags & 0xFF;
1018 rtnh->rtnh_hops = nh->nh_weight - 1;
1019 rtnh->rtnh_ifindex = nh->nh_oif;
1020
Linus Torvalds1da177e2005-04-16 15:20:36 -07001021 if (nh->nh_gw)
Al Viro17fb2c62006-09-26 22:15:25 -07001022 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
Patrick McHardy8265abc2006-07-21 15:09:55 -07001023#ifdef CONFIG_NET_CLS_ROUTE
1024 if (nh->nh_tclassid)
Thomas Grafbe403ea2006-08-17 18:15:17 -07001025 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
Patrick McHardy8265abc2006-07-21 15:09:55 -07001026#endif
Thomas Grafbe403ea2006-08-17 18:15:17 -07001027 /* length of rtnetlink header + attributes */
1028 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029 } endfor_nexthops(fi);
Thomas Grafbe403ea2006-08-17 18:15:17 -07001030
1031 nla_nest_end(skb, mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032 }
1033#endif
Thomas Grafbe403ea2006-08-17 18:15:17 -07001034 return nlmsg_end(skb, nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001035
Thomas Grafbe403ea2006-08-17 18:15:17 -07001036nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08001037 nlmsg_cancel(skb, nlh);
1038 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039}
1040
Linus Torvalds1da177e2005-04-16 15:20:36 -07001041/*
1042 Update FIB if:
1043 - local address disappeared -> we must delete all the entries
1044 referring to it.
1045 - device went down -> we must shutdown all nexthops going via it.
1046 */
1047
Al Viro81f7bf62006-09-27 18:40:00 -07001048int fib_sync_down(__be32 local, struct net_device *dev, int force)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001049{
1050 int ret = 0;
1051 int scope = RT_SCOPE_NOWHERE;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001052
Linus Torvalds1da177e2005-04-16 15:20:36 -07001053 if (force)
1054 scope = -1;
1055
1056 if (local && fib_info_laddrhash) {
1057 unsigned int hash = fib_laddr_hashfn(local);
1058 struct hlist_head *head = &fib_info_laddrhash[hash];
1059 struct hlist_node *node;
1060 struct fib_info *fi;
1061
1062 hlist_for_each_entry(fi, node, head, fib_lhash) {
1063 if (fi->fib_prefsrc == local) {
1064 fi->fib_flags |= RTNH_F_DEAD;
1065 ret++;
1066 }
1067 }
1068 }
1069
1070 if (dev) {
1071 struct fib_info *prev_fi = NULL;
1072 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1073 struct hlist_head *head = &fib_info_devhash[hash];
1074 struct hlist_node *node;
1075 struct fib_nh *nh;
1076
1077 hlist_for_each_entry(nh, node, head, nh_hash) {
1078 struct fib_info *fi = nh->nh_parent;
1079 int dead;
1080
1081 BUG_ON(!fi->fib_nhs);
1082 if (nh->nh_dev != dev || fi == prev_fi)
1083 continue;
1084 prev_fi = fi;
1085 dead = 0;
1086 change_nexthops(fi) {
1087 if (nh->nh_flags&RTNH_F_DEAD)
1088 dead++;
1089 else if (nh->nh_dev == dev &&
1090 nh->nh_scope != scope) {
1091 nh->nh_flags |= RTNH_F_DEAD;
1092#ifdef CONFIG_IP_ROUTE_MULTIPATH
1093 spin_lock_bh(&fib_multipath_lock);
1094 fi->fib_power -= nh->nh_power;
1095 nh->nh_power = 0;
1096 spin_unlock_bh(&fib_multipath_lock);
1097#endif
1098 dead++;
1099 }
1100#ifdef CONFIG_IP_ROUTE_MULTIPATH
1101 if (force > 1 && nh->nh_dev == dev) {
1102 dead = fi->fib_nhs;
1103 break;
1104 }
1105#endif
1106 } endfor_nexthops(fi)
1107 if (dead == fi->fib_nhs) {
1108 fi->fib_flags |= RTNH_F_DEAD;
1109 ret++;
1110 }
1111 }
1112 }
1113
1114 return ret;
1115}
1116
1117#ifdef CONFIG_IP_ROUTE_MULTIPATH
1118
1119/*
1120 Dead device goes up. We wake up dead nexthops.
1121 It takes sense only on multipath routes.
1122 */
1123
1124int fib_sync_up(struct net_device *dev)
1125{
1126 struct fib_info *prev_fi;
1127 unsigned int hash;
1128 struct hlist_head *head;
1129 struct hlist_node *node;
1130 struct fib_nh *nh;
1131 int ret;
1132
1133 if (!(dev->flags&IFF_UP))
1134 return 0;
1135
1136 prev_fi = NULL;
1137 hash = fib_devindex_hashfn(dev->ifindex);
1138 head = &fib_info_devhash[hash];
1139 ret = 0;
1140
1141 hlist_for_each_entry(nh, node, head, nh_hash) {
1142 struct fib_info *fi = nh->nh_parent;
1143 int alive;
1144
1145 BUG_ON(!fi->fib_nhs);
1146 if (nh->nh_dev != dev || fi == prev_fi)
1147 continue;
1148
1149 prev_fi = fi;
1150 alive = 0;
1151 change_nexthops(fi) {
1152 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1153 alive++;
1154 continue;
1155 }
1156 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1157 continue;
Herbert Xue5ed6392005-10-03 14:35:55 -07001158 if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001159 continue;
1160 alive++;
1161 spin_lock_bh(&fib_multipath_lock);
1162 nh->nh_power = 0;
1163 nh->nh_flags &= ~RTNH_F_DEAD;
1164 spin_unlock_bh(&fib_multipath_lock);
1165 } endfor_nexthops(fi)
1166
1167 if (alive > 0) {
1168 fi->fib_flags &= ~RTNH_F_DEAD;
1169 ret++;
1170 }
1171 }
1172
1173 return ret;
1174}
1175
1176/*
1177 The algorithm is suboptimal, but it provides really
1178 fair weighted route distribution.
1179 */
1180
1181void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1182{
1183 struct fib_info *fi = res->fi;
1184 int w;
1185
1186 spin_lock_bh(&fib_multipath_lock);
1187 if (fi->fib_power <= 0) {
1188 int power = 0;
1189 change_nexthops(fi) {
1190 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1191 power += nh->nh_weight;
1192 nh->nh_power = nh->nh_weight;
1193 }
1194 } endfor_nexthops(fi);
1195 fi->fib_power = power;
1196 if (power <= 0) {
1197 spin_unlock_bh(&fib_multipath_lock);
1198 /* Race condition: route has just become dead. */
1199 res->nh_sel = 0;
1200 return;
1201 }
1202 }
1203
1204
1205 /* w should be random number [0..fi->fib_power-1],
1206 it is pretty bad approximation.
1207 */
1208
1209 w = jiffies % fi->fib_power;
1210
1211 change_nexthops(fi) {
1212 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1213 if ((w -= nh->nh_power) <= 0) {
1214 nh->nh_power--;
1215 fi->fib_power--;
1216 res->nh_sel = nhsel;
1217 spin_unlock_bh(&fib_multipath_lock);
1218 return;
1219 }
1220 }
1221 } endfor_nexthops(fi);
1222
1223 /* Race condition: route has just become dead. */
1224 res->nh_sel = 0;
1225 spin_unlock_bh(&fib_multipath_lock);
1226}
1227#endif