blob: 0de6102020e23668a4c7c3fa35cd6df581f99361 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: semantics.
7 *
8 * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <asm/uaccess.h>
19#include <asm/system.h>
20#include <linux/bitops.h>
21#include <linux/types.h>
22#include <linux/kernel.h>
23#include <linux/jiffies.h>
24#include <linux/mm.h>
25#include <linux/string.h>
26#include <linux/socket.h>
27#include <linux/sockios.h>
28#include <linux/errno.h>
29#include <linux/in.h>
30#include <linux/inet.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020031#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032#include <linux/netdevice.h>
33#include <linux/if_arp.h>
34#include <linux/proc_fs.h>
35#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070036#include <linux/init.h>
37
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020038#include <net/arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
44#include <net/ip_fib.h>
Thomas Graff21c7bc2006-08-15 00:34:17 -070045#include <net/netlink.h>
Thomas Graf4e902c52006-08-17 18:14:52 -070046#include <net/nexthop.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070047
48#include "fib_lookup.h"
49
50#define FSprintk(a...)
51
Stephen Hemminger832b4c52006-08-29 16:48:09 -070052static DEFINE_SPINLOCK(fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070053static struct hlist_head *fib_info_hash;
54static struct hlist_head *fib_info_laddrhash;
55static unsigned int fib_hash_size;
56static unsigned int fib_info_cnt;
57
58#define DEVINDEX_HASHBITS 8
59#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
60static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
61
62#ifdef CONFIG_IP_ROUTE_MULTIPATH
63
64static DEFINE_SPINLOCK(fib_multipath_lock);
65
66#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
67for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
68
69#define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
70for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
71
72#else /* CONFIG_IP_ROUTE_MULTIPATH */
73
74/* Hope, that gcc will optimize it to get rid of dummy loop */
75
76#define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
77for (nhsel=0; nhsel < 1; nhsel++)
78
79#define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
80for (nhsel=0; nhsel < 1; nhsel++)
81
82#endif /* CONFIG_IP_ROUTE_MULTIPATH */
83
84#define endfor_nexthops(fi) }
85
86
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090087static const struct
Linus Torvalds1da177e2005-04-16 15:20:36 -070088{
89 int error;
90 u8 scope;
Thomas Grafa0ee18b2007-03-24 20:32:54 -070091} fib_props[RTN_MAX + 1] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090092 {
Linus Torvalds1da177e2005-04-16 15:20:36 -070093 .error = 0,
94 .scope = RT_SCOPE_NOWHERE,
95 }, /* RTN_UNSPEC */
96 {
97 .error = 0,
98 .scope = RT_SCOPE_UNIVERSE,
99 }, /* RTN_UNICAST */
100 {
101 .error = 0,
102 .scope = RT_SCOPE_HOST,
103 }, /* RTN_LOCAL */
104 {
105 .error = 0,
106 .scope = RT_SCOPE_LINK,
107 }, /* RTN_BROADCAST */
108 {
109 .error = 0,
110 .scope = RT_SCOPE_LINK,
111 }, /* RTN_ANYCAST */
112 {
113 .error = 0,
114 .scope = RT_SCOPE_UNIVERSE,
115 }, /* RTN_MULTICAST */
116 {
117 .error = -EINVAL,
118 .scope = RT_SCOPE_UNIVERSE,
119 }, /* RTN_BLACKHOLE */
120 {
121 .error = -EHOSTUNREACH,
122 .scope = RT_SCOPE_UNIVERSE,
123 }, /* RTN_UNREACHABLE */
124 {
125 .error = -EACCES,
126 .scope = RT_SCOPE_UNIVERSE,
127 }, /* RTN_PROHIBIT */
128 {
129 .error = -EAGAIN,
130 .scope = RT_SCOPE_UNIVERSE,
131 }, /* RTN_THROW */
132 {
133 .error = -EINVAL,
134 .scope = RT_SCOPE_NOWHERE,
135 }, /* RTN_NAT */
136 {
137 .error = -EINVAL,
138 .scope = RT_SCOPE_NOWHERE,
139 }, /* RTN_XRESOLVE */
140};
141
142
143/* Release a nexthop info record */
144
145void free_fib_info(struct fib_info *fi)
146{
147 if (fi->fib_dead == 0) {
148 printk("Freeing alive fib_info %p\n", fi);
149 return;
150 }
151 change_nexthops(fi) {
152 if (nh->nh_dev)
153 dev_put(nh->nh_dev);
154 nh->nh_dev = NULL;
155 } endfor_nexthops(fi);
156 fib_info_cnt--;
157 kfree(fi);
158}
159
160void fib_release_info(struct fib_info *fi)
161{
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700162 spin_lock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163 if (fi && --fi->fib_treeref == 0) {
164 hlist_del(&fi->fib_hash);
165 if (fi->fib_prefsrc)
166 hlist_del(&fi->fib_lhash);
167 change_nexthops(fi) {
168 if (!nh->nh_dev)
169 continue;
170 hlist_del(&nh->nh_hash);
171 } endfor_nexthops(fi)
172 fi->fib_dead = 1;
173 fib_info_put(fi);
174 }
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700175 spin_unlock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176}
177
178static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
179{
180 const struct fib_nh *onh = ofi->fib_nh;
181
182 for_nexthops(fi) {
183 if (nh->nh_oif != onh->nh_oif ||
184 nh->nh_gw != onh->nh_gw ||
185 nh->nh_scope != onh->nh_scope ||
186#ifdef CONFIG_IP_ROUTE_MULTIPATH
187 nh->nh_weight != onh->nh_weight ||
188#endif
189#ifdef CONFIG_NET_CLS_ROUTE
190 nh->nh_tclassid != onh->nh_tclassid ||
191#endif
192 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
193 return -1;
194 onh++;
195 } endfor_nexthops(fi);
196 return 0;
197}
198
199static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
200{
201 unsigned int mask = (fib_hash_size - 1);
202 unsigned int val = fi->fib_nhs;
203
204 val ^= fi->fib_protocol;
Al Viro81f7bf62006-09-27 18:40:00 -0700205 val ^= (__force u32)fi->fib_prefsrc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 val ^= fi->fib_priority;
207
208 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
209}
210
211static struct fib_info *fib_find_info(const struct fib_info *nfi)
212{
213 struct hlist_head *head;
214 struct hlist_node *node;
215 struct fib_info *fi;
216 unsigned int hash;
217
218 hash = fib_info_hashfn(nfi);
219 head = &fib_info_hash[hash];
220
221 hlist_for_each_entry(fi, node, head, fib_hash) {
222 if (fi->fib_nhs != nfi->fib_nhs)
223 continue;
224 if (nfi->fib_protocol == fi->fib_protocol &&
225 nfi->fib_prefsrc == fi->fib_prefsrc &&
226 nfi->fib_priority == fi->fib_priority &&
227 memcmp(nfi->fib_metrics, fi->fib_metrics,
228 sizeof(fi->fib_metrics)) == 0 &&
229 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
230 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
231 return fi;
232 }
233
234 return NULL;
235}
236
237static inline unsigned int fib_devindex_hashfn(unsigned int val)
238{
239 unsigned int mask = DEVINDEX_HASHSIZE - 1;
240
241 return (val ^
242 (val >> DEVINDEX_HASHBITS) ^
243 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
244}
245
246/* Check, that the gateway is already configured.
247 Used only by redirect accept routine.
248 */
249
Al Virod878e72e2006-09-26 22:18:13 -0700250int ip_fib_check_default(__be32 gw, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251{
252 struct hlist_head *head;
253 struct hlist_node *node;
254 struct fib_nh *nh;
255 unsigned int hash;
256
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700257 spin_lock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258
259 hash = fib_devindex_hashfn(dev->ifindex);
260 head = &fib_info_devhash[hash];
261 hlist_for_each_entry(nh, node, head, nh_hash) {
262 if (nh->nh_dev == dev &&
263 nh->nh_gw == gw &&
264 !(nh->nh_flags&RTNH_F_DEAD)) {
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700265 spin_unlock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266 return 0;
267 }
268 }
269
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700270 spin_unlock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271
272 return -1;
273}
274
Thomas Graf339bf982006-11-10 14:10:15 -0800275static inline size_t fib_nlmsg_size(struct fib_info *fi)
276{
277 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
278 + nla_total_size(4) /* RTA_TABLE */
279 + nla_total_size(4) /* RTA_DST */
280 + nla_total_size(4) /* RTA_PRIORITY */
281 + nla_total_size(4); /* RTA_PREFSRC */
282
283 /* space for nested metrics */
284 payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
285
286 if (fi->fib_nhs) {
287 /* Also handles the special case fib_nhs == 1 */
288
289 /* each nexthop is packed in an attribute */
290 size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
291
292 /* may contain flow and gateway attribute */
293 nhsize += 2 * nla_total_size(4);
294
295 /* all nexthops are packed in a nested attribute */
296 payload += nla_total_size(fi->fib_nhs * nhsize);
297 }
298
299 return payload;
300}
301
Al Viro81f7bf62006-09-27 18:40:00 -0700302void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
Milan Kocianb8f55832007-05-23 14:55:06 -0700303 int dst_len, u32 tb_id, struct nl_info *info,
304 unsigned int nlm_flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305{
306 struct sk_buff *skb;
Thomas Graf4e902c52006-08-17 18:14:52 -0700307 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
Thomas Graff21c7bc2006-08-15 00:34:17 -0700308 int err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309
Thomas Graf339bf982006-11-10 14:10:15 -0800310 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
Thomas Graff21c7bc2006-08-15 00:34:17 -0700311 if (skb == NULL)
312 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313
Thomas Graf4e902c52006-08-17 18:14:52 -0700314 err = fib_dump_info(skb, info->pid, seq, event, tb_id,
Thomas Grafbe403ea2006-08-17 18:15:17 -0700315 fa->fa_type, fa->fa_scope, key, dst_len,
Milan Kocianb8f55832007-05-23 14:55:06 -0700316 fa->fa_tos, fa->fa_info, nlm_flags);
Patrick McHardy26932562007-01-31 23:16:40 -0800317 if (err < 0) {
318 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
319 WARN_ON(err == -EMSGSIZE);
320 kfree_skb(skb);
321 goto errout;
322 }
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800323 err = rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE,
Thomas Graf4e902c52006-08-17 18:14:52 -0700324 info->nlh, GFP_KERNEL);
Thomas Graff21c7bc2006-08-15 00:34:17 -0700325errout:
326 if (err < 0)
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800327 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328}
329
330/* Return the first fib alias matching TOS with
331 * priority less than or equal to PRIO.
332 */
333struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
334{
335 if (fah) {
336 struct fib_alias *fa;
337 list_for_each_entry(fa, fah, fa_list) {
338 if (fa->fa_tos > tos)
339 continue;
340 if (fa->fa_info->fib_priority >= prio ||
341 fa->fa_tos < tos)
342 return fa;
343 }
344 }
345 return NULL;
346}
347
348int fib_detect_death(struct fib_info *fi, int order,
Denis V. Lunevc17860a2007-12-08 00:22:13 -0800349 struct fib_info **last_resort, int *last_idx, int dflt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700350{
351 struct neighbour *n;
352 int state = NUD_NONE;
353
354 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
355 if (n) {
356 state = n->nud_state;
357 neigh_release(n);
358 }
359 if (state==NUD_REACHABLE)
360 return 0;
Denis V. Lunevc17860a2007-12-08 00:22:13 -0800361 if ((state&NUD_VALID) && order != dflt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700362 return 0;
363 if ((state&NUD_VALID) ||
Denis V. Lunevc17860a2007-12-08 00:22:13 -0800364 (*last_idx<0 && order > dflt)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 *last_resort = fi;
366 *last_idx = order;
367 }
368 return 1;
369}
370
371#ifdef CONFIG_IP_ROUTE_MULTIPATH
372
Thomas Graf4e902c52006-08-17 18:14:52 -0700373static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374{
375 int nhs = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700376
Thomas Graf4e902c52006-08-17 18:14:52 -0700377 while (rtnh_ok(rtnh, remaining)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700378 nhs++;
Thomas Graf4e902c52006-08-17 18:14:52 -0700379 rtnh = rtnh_next(rtnh, &remaining);
380 }
381
382 /* leftover implies invalid nexthop configuration, discard it */
383 return remaining > 0 ? 0 : nhs;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700384}
385
Thomas Graf4e902c52006-08-17 18:14:52 -0700386static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
387 int remaining, struct fib_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700388{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389 change_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700390 int attrlen;
391
392 if (!rtnh_ok(rtnh, remaining))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393 return -EINVAL;
Thomas Graf4e902c52006-08-17 18:14:52 -0700394
395 nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
396 nh->nh_oif = rtnh->rtnh_ifindex;
397 nh->nh_weight = rtnh->rtnh_hops + 1;
398
399 attrlen = rtnh_attrlen(rtnh);
400 if (attrlen > 0) {
401 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
402
403 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
Al Viro17fb2c62006-09-26 22:15:25 -0700404 nh->nh_gw = nla ? nla_get_be32(nla) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700406 nla = nla_find(attrs, attrlen, RTA_FLOW);
407 nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408#endif
409 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700410
411 rtnh = rtnh_next(rtnh, &remaining);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412 } endfor_nexthops(fi);
Thomas Graf4e902c52006-08-17 18:14:52 -0700413
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414 return 0;
415}
416
417#endif
418
Thomas Graf4e902c52006-08-17 18:14:52 -0700419int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420{
421#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700422 struct rtnexthop *rtnh;
423 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424#endif
425
Thomas Graf4e902c52006-08-17 18:14:52 -0700426 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427 return 1;
428
Thomas Graf4e902c52006-08-17 18:14:52 -0700429 if (cfg->fc_oif || cfg->fc_gw) {
430 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
431 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432 return 0;
433 return 1;
434 }
435
436#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700437 if (cfg->fc_mp == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438 return 0;
Thomas Graf4e902c52006-08-17 18:14:52 -0700439
440 rtnh = cfg->fc_mp;
441 remaining = cfg->fc_mp_len;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900442
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443 for_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700444 int attrlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445
Thomas Graf4e902c52006-08-17 18:14:52 -0700446 if (!rtnh_ok(rtnh, remaining))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447 return -EINVAL;
Thomas Graf4e902c52006-08-17 18:14:52 -0700448
449 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450 return 1;
Thomas Graf4e902c52006-08-17 18:14:52 -0700451
452 attrlen = rtnh_attrlen(rtnh);
453 if (attrlen < 0) {
454 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
455
456 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
Al Viro17fb2c62006-09-26 22:15:25 -0700457 if (nla && nla_get_be32(nla) != nh->nh_gw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700458 return 1;
459#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700460 nla = nla_find(attrs, attrlen, RTA_FLOW);
461 if (nla && nla_get_u32(nla) != nh->nh_tclassid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462 return 1;
463#endif
464 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700465
466 rtnh = rtnh_next(rtnh, &remaining);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467 } endfor_nexthops(fi);
468#endif
469 return 0;
470}
471
472
473/*
474 Picture
475 -------
476
477 Semantics of nexthop is very messy by historical reasons.
478 We have to take into account, that:
479 a) gateway can be actually local interface address,
480 so that gatewayed route is direct.
481 b) gateway must be on-link address, possibly
482 described not by an ifaddr, but also by a direct route.
483 c) If both gateway and interface are specified, they should not
484 contradict.
485 d) If we use tunnel routes, gateway could be not on-link.
486
487 Attempt to reconcile all of these (alas, self-contradictory) conditions
488 results in pretty ugly and hairy code with obscure logic.
489
490 I chose to generalized it instead, so that the size
491 of code does not increase practically, but it becomes
492 much more general.
493 Every prefix is assigned a "scope" value: "host" is local address,
494 "link" is direct route,
495 [ ... "site" ... "interior" ... ]
496 and "universe" is true gateway route with global meaning.
497
498 Every prefix refers to a set of "nexthop"s (gw, oif),
499 where gw must have narrower scope. This recursion stops
500 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
501 which means that gw is forced to be on link.
502
503 Code is still hairy, but now it is apparently logically
504 consistent and very flexible. F.e. as by-product it allows
505 to co-exists in peace independent exterior and interior
506 routing processes.
507
508 Normally it looks as following.
509
510 {universe prefix} -> (gw, oif) [scope link]
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900511 |
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512 |-> {link prefix} -> (gw, oif) [scope local]
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900513 |
Linus Torvalds1da177e2005-04-16 15:20:36 -0700514 |-> {local prefix} (terminal node)
515 */
516
Thomas Graf4e902c52006-08-17 18:14:52 -0700517static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
518 struct fib_nh *nh)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519{
520 int err;
521
522 if (nh->nh_gw) {
523 struct fib_result res;
524
525#ifdef CONFIG_IP_ROUTE_PERVASIVE
526 if (nh->nh_flags&RTNH_F_PERVASIVE)
527 return 0;
528#endif
529 if (nh->nh_flags&RTNH_F_ONLINK) {
530 struct net_device *dev;
531
Thomas Graf4e902c52006-08-17 18:14:52 -0700532 if (cfg->fc_scope >= RT_SCOPE_LINK)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533 return -EINVAL;
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800534 if (inet_addr_type(cfg->fc_nlinfo.nl_net,
535 nh->nh_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536 return -EINVAL;
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800537 if ((dev = __dev_get_by_index(cfg->fc_nlinfo.nl_net,
538 nh->nh_oif)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700539 return -ENODEV;
540 if (!(dev->flags&IFF_UP))
541 return -ENETDOWN;
542 nh->nh_dev = dev;
543 dev_hold(dev);
544 nh->nh_scope = RT_SCOPE_LINK;
545 return 0;
546 }
547 {
Thomas Graf4e902c52006-08-17 18:14:52 -0700548 struct flowi fl = {
549 .nl_u = {
550 .ip4_u = {
551 .daddr = nh->nh_gw,
552 .scope = cfg->fc_scope + 1,
553 },
554 },
555 .oif = nh->nh_oif,
556 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557
558 /* It is not necessary, but requires a bit of thinking */
559 if (fl.fl4_scope < RT_SCOPE_LINK)
560 fl.fl4_scope = RT_SCOPE_LINK;
561 if ((err = fib_lookup(&fl, &res)) != 0)
562 return err;
563 }
564 err = -EINVAL;
565 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
566 goto out;
567 nh->nh_scope = res.scope;
568 nh->nh_oif = FIB_RES_OIF(res);
569 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
570 goto out;
571 dev_hold(nh->nh_dev);
572 err = -ENETDOWN;
573 if (!(nh->nh_dev->flags & IFF_UP))
574 goto out;
575 err = 0;
576out:
577 fib_res_put(&res);
578 return err;
579 } else {
580 struct in_device *in_dev;
581
582 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
583 return -EINVAL;
584
585 in_dev = inetdev_by_index(nh->nh_oif);
586 if (in_dev == NULL)
587 return -ENODEV;
588 if (!(in_dev->dev->flags&IFF_UP)) {
589 in_dev_put(in_dev);
590 return -ENETDOWN;
591 }
592 nh->nh_dev = in_dev->dev;
593 dev_hold(nh->nh_dev);
594 nh->nh_scope = RT_SCOPE_HOST;
595 in_dev_put(in_dev);
596 }
597 return 0;
598}
599
Al Viro81f7bf62006-09-27 18:40:00 -0700600static inline unsigned int fib_laddr_hashfn(__be32 val)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601{
602 unsigned int mask = (fib_hash_size - 1);
603
Al Viro81f7bf62006-09-27 18:40:00 -0700604 return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700605}
606
607static struct hlist_head *fib_hash_alloc(int bytes)
608{
609 if (bytes <= PAGE_SIZE)
Joonwoo Park88f83492007-11-26 23:29:32 +0800610 return kzalloc(bytes, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700611 else
612 return (struct hlist_head *)
Joonwoo Park88f83492007-11-26 23:29:32 +0800613 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(bytes));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614}
615
616static void fib_hash_free(struct hlist_head *hash, int bytes)
617{
618 if (!hash)
619 return;
620
621 if (bytes <= PAGE_SIZE)
622 kfree(hash);
623 else
624 free_pages((unsigned long) hash, get_order(bytes));
625}
626
627static void fib_hash_move(struct hlist_head *new_info_hash,
628 struct hlist_head *new_laddrhash,
629 unsigned int new_size)
630{
David S. Millerb7656e72005-08-05 04:12:48 -0700631 struct hlist_head *old_info_hash, *old_laddrhash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632 unsigned int old_size = fib_hash_size;
David S. Millerb7656e72005-08-05 04:12:48 -0700633 unsigned int i, bytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700634
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700635 spin_lock_bh(&fib_info_lock);
David S. Millerb7656e72005-08-05 04:12:48 -0700636 old_info_hash = fib_info_hash;
637 old_laddrhash = fib_info_laddrhash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638 fib_hash_size = new_size;
639
640 for (i = 0; i < old_size; i++) {
641 struct hlist_head *head = &fib_info_hash[i];
642 struct hlist_node *node, *n;
643 struct fib_info *fi;
644
645 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
646 struct hlist_head *dest;
647 unsigned int new_hash;
648
649 hlist_del(&fi->fib_hash);
650
651 new_hash = fib_info_hashfn(fi);
652 dest = &new_info_hash[new_hash];
653 hlist_add_head(&fi->fib_hash, dest);
654 }
655 }
656 fib_info_hash = new_info_hash;
657
658 for (i = 0; i < old_size; i++) {
659 struct hlist_head *lhead = &fib_info_laddrhash[i];
660 struct hlist_node *node, *n;
661 struct fib_info *fi;
662
663 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
664 struct hlist_head *ldest;
665 unsigned int new_hash;
666
667 hlist_del(&fi->fib_lhash);
668
669 new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
670 ldest = &new_laddrhash[new_hash];
671 hlist_add_head(&fi->fib_lhash, ldest);
672 }
673 }
674 fib_info_laddrhash = new_laddrhash;
675
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700676 spin_unlock_bh(&fib_info_lock);
David S. Millerb7656e72005-08-05 04:12:48 -0700677
678 bytes = old_size * sizeof(struct hlist_head *);
679 fib_hash_free(old_info_hash, bytes);
680 fib_hash_free(old_laddrhash, bytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681}
682
Thomas Graf4e902c52006-08-17 18:14:52 -0700683struct fib_info *fib_create_info(struct fib_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700684{
685 int err;
686 struct fib_info *fi = NULL;
687 struct fib_info *ofi;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688 int nhs = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689
690 /* Fast check to catch the most weird cases */
Thomas Graf4e902c52006-08-17 18:14:52 -0700691 if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692 goto err_inval;
693
694#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700695 if (cfg->fc_mp) {
696 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697 if (nhs == 0)
698 goto err_inval;
699 }
700#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700701
702 err = -ENOBUFS;
703 if (fib_info_cnt >= fib_hash_size) {
704 unsigned int new_size = fib_hash_size << 1;
705 struct hlist_head *new_info_hash;
706 struct hlist_head *new_laddrhash;
707 unsigned int bytes;
708
709 if (!new_size)
710 new_size = 1;
711 bytes = new_size * sizeof(struct hlist_head *);
712 new_info_hash = fib_hash_alloc(bytes);
713 new_laddrhash = fib_hash_alloc(bytes);
714 if (!new_info_hash || !new_laddrhash) {
715 fib_hash_free(new_info_hash, bytes);
716 fib_hash_free(new_laddrhash, bytes);
Joonwoo Park88f83492007-11-26 23:29:32 +0800717 } else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700718 fib_hash_move(new_info_hash, new_laddrhash, new_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700719
720 if (!fib_hash_size)
721 goto failure;
722 }
723
Panagiotis Issaris0da974f2006-07-21 14:51:30 -0700724 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725 if (fi == NULL)
726 goto failure;
727 fib_info_cnt++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700728
Thomas Graf4e902c52006-08-17 18:14:52 -0700729 fi->fib_protocol = cfg->fc_protocol;
730 fi->fib_flags = cfg->fc_flags;
731 fi->fib_priority = cfg->fc_priority;
732 fi->fib_prefsrc = cfg->fc_prefsrc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733
734 fi->fib_nhs = nhs;
735 change_nexthops(fi) {
736 nh->nh_parent = fi;
737 } endfor_nexthops(fi)
738
Thomas Graf4e902c52006-08-17 18:14:52 -0700739 if (cfg->fc_mx) {
740 struct nlattr *nla;
741 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700742
Thomas Graf4e902c52006-08-17 18:14:52 -0700743 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +0200744 int type = nla_type(nla);
Thomas Graf4e902c52006-08-17 18:14:52 -0700745
746 if (type) {
747 if (type > RTAX_MAX)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700748 goto err_inval;
Thomas Graf4e902c52006-08-17 18:14:52 -0700749 fi->fib_metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751 }
752 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700753
Thomas Graf4e902c52006-08-17 18:14:52 -0700754 if (cfg->fc_mp) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700755#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700756 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
757 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758 goto failure;
Thomas Graf4e902c52006-08-17 18:14:52 -0700759 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760 goto err_inval;
Thomas Graf4e902c52006-08-17 18:14:52 -0700761 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762 goto err_inval;
763#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700764 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765 goto err_inval;
766#endif
767#else
768 goto err_inval;
769#endif
770 } else {
771 struct fib_nh *nh = fi->fib_nh;
Thomas Graf4e902c52006-08-17 18:14:52 -0700772
773 nh->nh_oif = cfg->fc_oif;
774 nh->nh_gw = cfg->fc_gw;
775 nh->nh_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700777 nh->nh_tclassid = cfg->fc_flow;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700778#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779#ifdef CONFIG_IP_ROUTE_MULTIPATH
780 nh->nh_weight = 1;
781#endif
782 }
783
Thomas Graf4e902c52006-08-17 18:14:52 -0700784 if (fib_props[cfg->fc_type].error) {
785 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786 goto err_inval;
787 goto link_it;
788 }
789
Thomas Graf4e902c52006-08-17 18:14:52 -0700790 if (cfg->fc_scope > RT_SCOPE_HOST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700791 goto err_inval;
792
Thomas Graf4e902c52006-08-17 18:14:52 -0700793 if (cfg->fc_scope == RT_SCOPE_HOST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794 struct fib_nh *nh = fi->fib_nh;
795
796 /* Local address is added. */
797 if (nhs != 1 || nh->nh_gw)
798 goto err_inval;
799 nh->nh_scope = RT_SCOPE_NOWHERE;
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800800 nh->nh_dev = dev_get_by_index(cfg->fc_nlinfo.nl_net,
801 fi->fib_nh->nh_oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802 err = -ENODEV;
803 if (nh->nh_dev == NULL)
804 goto failure;
805 } else {
806 change_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700807 if ((err = fib_check_nh(cfg, fi, nh)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808 goto failure;
809 } endfor_nexthops(fi)
810 }
811
812 if (fi->fib_prefsrc) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700813 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
814 fi->fib_prefsrc != cfg->fc_dst)
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800815 if (inet_addr_type(cfg->fc_nlinfo.nl_net,
816 fi->fib_prefsrc) != RTN_LOCAL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700817 goto err_inval;
818 }
819
820link_it:
821 if ((ofi = fib_find_info(fi)) != NULL) {
822 fi->fib_dead = 1;
823 free_fib_info(fi);
824 ofi->fib_treeref++;
825 return ofi;
826 }
827
828 fi->fib_treeref++;
829 atomic_inc(&fi->fib_clntref);
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700830 spin_lock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700831 hlist_add_head(&fi->fib_hash,
832 &fib_info_hash[fib_info_hashfn(fi)]);
833 if (fi->fib_prefsrc) {
834 struct hlist_head *head;
835
836 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
837 hlist_add_head(&fi->fib_lhash, head);
838 }
839 change_nexthops(fi) {
840 struct hlist_head *head;
841 unsigned int hash;
842
843 if (!nh->nh_dev)
844 continue;
845 hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
846 head = &fib_info_devhash[hash];
847 hlist_add_head(&nh->nh_hash, head);
848 } endfor_nexthops(fi)
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700849 spin_unlock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850 return fi;
851
852err_inval:
853 err = -EINVAL;
854
855failure:
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900856 if (fi) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700857 fi->fib_dead = 1;
858 free_fib_info(fi);
859 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700860
861 return ERR_PTR(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700862}
863
Robert Olssone5b43762005-08-25 13:01:03 -0700864/* Note! fib_semantic_match intentionally uses RCU list functions. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865int fib_semantic_match(struct list_head *head, const struct flowi *flp,
Al Viro1ef1b8c2006-09-26 22:20:56 -0700866 struct fib_result *res, __be32 zone, __be32 mask,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700867 int prefixlen)
868{
869 struct fib_alias *fa;
870 int nh_sel = 0;
871
Robert Olssone5b43762005-08-25 13:01:03 -0700872 list_for_each_entry_rcu(fa, head, fa_list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700873 int err;
874
875 if (fa->fa_tos &&
876 fa->fa_tos != flp->fl4_tos)
877 continue;
878
879 if (fa->fa_scope < flp->fl4_scope)
880 continue;
881
882 fa->fa_state |= FA_S_ACCESSED;
883
884 err = fib_props[fa->fa_type].error;
885 if (err == 0) {
886 struct fib_info *fi = fa->fa_info;
887
888 if (fi->fib_flags & RTNH_F_DEAD)
889 continue;
890
891 switch (fa->fa_type) {
892 case RTN_UNICAST:
893 case RTN_LOCAL:
894 case RTN_BROADCAST:
895 case RTN_ANYCAST:
896 case RTN_MULTICAST:
897 for_nexthops(fi) {
898 if (nh->nh_flags&RTNH_F_DEAD)
899 continue;
900 if (!flp->oif || flp->oif == nh->nh_oif)
901 break;
902 }
903#ifdef CONFIG_IP_ROUTE_MULTIPATH
904 if (nhsel < fi->fib_nhs) {
905 nh_sel = nhsel;
906 goto out_fill_res;
907 }
908#else
909 if (nhsel < 1) {
910 goto out_fill_res;
911 }
912#endif
913 endfor_nexthops(fi);
914 continue;
915
916 default:
917 printk(KERN_DEBUG "impossible 102\n");
918 return -EINVAL;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -0700919 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700920 }
921 return err;
922 }
923 return 1;
924
925out_fill_res:
926 res->prefixlen = prefixlen;
927 res->nh_sel = nh_sel;
928 res->type = fa->fa_type;
929 res->scope = fa->fa_scope;
930 res->fi = fa->fa_info;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700931 atomic_inc(&res->fi->fib_clntref);
932 return 0;
933}
934
935/* Find appropriate source address to this destination */
936
Al Virob83738a2006-09-26 22:14:15 -0700937__be32 __fib_res_prefsrc(struct fib_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700938{
939 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
940}
941
Thomas Grafbe403ea2006-08-17 18:15:17 -0700942int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
Al Viro81f7bf62006-09-27 18:40:00 -0700943 u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
Thomas Grafbe403ea2006-08-17 18:15:17 -0700944 struct fib_info *fi, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700945{
Thomas Grafbe403ea2006-08-17 18:15:17 -0700946 struct nlmsghdr *nlh;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700947 struct rtmsg *rtm;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948
Thomas Grafbe403ea2006-08-17 18:15:17 -0700949 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
950 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -0800951 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700952
953 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700954 rtm->rtm_family = AF_INET;
955 rtm->rtm_dst_len = dst_len;
956 rtm->rtm_src_len = 0;
957 rtm->rtm_tos = tos;
958 rtm->rtm_table = tb_id;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700959 NLA_PUT_U32(skb, RTA_TABLE, tb_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700960 rtm->rtm_type = type;
961 rtm->rtm_flags = fi->fib_flags;
962 rtm->rtm_scope = scope;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963 rtm->rtm_protocol = fi->fib_protocol;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700964
965 if (rtm->rtm_dst_len)
Al Viro17fb2c62006-09-26 22:15:25 -0700966 NLA_PUT_BE32(skb, RTA_DST, dst);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700967
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968 if (fi->fib_priority)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700969 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
970
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700972 goto nla_put_failure;
973
Linus Torvalds1da177e2005-04-16 15:20:36 -0700974 if (fi->fib_prefsrc)
Al Viro17fb2c62006-09-26 22:15:25 -0700975 NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700976
Linus Torvalds1da177e2005-04-16 15:20:36 -0700977 if (fi->fib_nhs == 1) {
978 if (fi->fib_nh->nh_gw)
Al Viro17fb2c62006-09-26 22:15:25 -0700979 NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700980
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981 if (fi->fib_nh->nh_oif)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700982 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
Patrick McHardy8265abc2006-07-21 15:09:55 -0700983#ifdef CONFIG_NET_CLS_ROUTE
984 if (fi->fib_nh[0].nh_tclassid)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700985 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
Patrick McHardy8265abc2006-07-21 15:09:55 -0700986#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987 }
988#ifdef CONFIG_IP_ROUTE_MULTIPATH
989 if (fi->fib_nhs > 1) {
Thomas Grafbe403ea2006-08-17 18:15:17 -0700990 struct rtnexthop *rtnh;
991 struct nlattr *mp;
992
993 mp = nla_nest_start(skb, RTA_MULTIPATH);
994 if (mp == NULL)
995 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996
997 for_nexthops(fi) {
Thomas Grafbe403ea2006-08-17 18:15:17 -0700998 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
999 if (rtnh == NULL)
1000 goto nla_put_failure;
1001
1002 rtnh->rtnh_flags = nh->nh_flags & 0xFF;
1003 rtnh->rtnh_hops = nh->nh_weight - 1;
1004 rtnh->rtnh_ifindex = nh->nh_oif;
1005
Linus Torvalds1da177e2005-04-16 15:20:36 -07001006 if (nh->nh_gw)
Al Viro17fb2c62006-09-26 22:15:25 -07001007 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
Patrick McHardy8265abc2006-07-21 15:09:55 -07001008#ifdef CONFIG_NET_CLS_ROUTE
1009 if (nh->nh_tclassid)
Thomas Grafbe403ea2006-08-17 18:15:17 -07001010 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
Patrick McHardy8265abc2006-07-21 15:09:55 -07001011#endif
Thomas Grafbe403ea2006-08-17 18:15:17 -07001012 /* length of rtnetlink header + attributes */
1013 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014 } endfor_nexthops(fi);
Thomas Grafbe403ea2006-08-17 18:15:17 -07001015
1016 nla_nest_end(skb, mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017 }
1018#endif
Thomas Grafbe403ea2006-08-17 18:15:17 -07001019 return nlmsg_end(skb, nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020
Thomas Grafbe403ea2006-08-17 18:15:17 -07001021nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08001022 nlmsg_cancel(skb, nlh);
1023 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024}
1025
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026/*
1027 Update FIB if:
1028 - local address disappeared -> we must delete all the entries
1029 referring to it.
1030 - device went down -> we must shutdown all nexthops going via it.
1031 */
1032
Al Viro81f7bf62006-09-27 18:40:00 -07001033int fib_sync_down(__be32 local, struct net_device *dev, int force)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001034{
1035 int ret = 0;
1036 int scope = RT_SCOPE_NOWHERE;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001037
Linus Torvalds1da177e2005-04-16 15:20:36 -07001038 if (force)
1039 scope = -1;
1040
1041 if (local && fib_info_laddrhash) {
1042 unsigned int hash = fib_laddr_hashfn(local);
1043 struct hlist_head *head = &fib_info_laddrhash[hash];
1044 struct hlist_node *node;
1045 struct fib_info *fi;
1046
1047 hlist_for_each_entry(fi, node, head, fib_lhash) {
1048 if (fi->fib_prefsrc == local) {
1049 fi->fib_flags |= RTNH_F_DEAD;
1050 ret++;
1051 }
1052 }
1053 }
1054
1055 if (dev) {
1056 struct fib_info *prev_fi = NULL;
1057 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1058 struct hlist_head *head = &fib_info_devhash[hash];
1059 struct hlist_node *node;
1060 struct fib_nh *nh;
1061
1062 hlist_for_each_entry(nh, node, head, nh_hash) {
1063 struct fib_info *fi = nh->nh_parent;
1064 int dead;
1065
1066 BUG_ON(!fi->fib_nhs);
1067 if (nh->nh_dev != dev || fi == prev_fi)
1068 continue;
1069 prev_fi = fi;
1070 dead = 0;
1071 change_nexthops(fi) {
1072 if (nh->nh_flags&RTNH_F_DEAD)
1073 dead++;
1074 else if (nh->nh_dev == dev &&
1075 nh->nh_scope != scope) {
1076 nh->nh_flags |= RTNH_F_DEAD;
1077#ifdef CONFIG_IP_ROUTE_MULTIPATH
1078 spin_lock_bh(&fib_multipath_lock);
1079 fi->fib_power -= nh->nh_power;
1080 nh->nh_power = 0;
1081 spin_unlock_bh(&fib_multipath_lock);
1082#endif
1083 dead++;
1084 }
1085#ifdef CONFIG_IP_ROUTE_MULTIPATH
1086 if (force > 1 && nh->nh_dev == dev) {
1087 dead = fi->fib_nhs;
1088 break;
1089 }
1090#endif
1091 } endfor_nexthops(fi)
1092 if (dead == fi->fib_nhs) {
1093 fi->fib_flags |= RTNH_F_DEAD;
1094 ret++;
1095 }
1096 }
1097 }
1098
1099 return ret;
1100}
1101
1102#ifdef CONFIG_IP_ROUTE_MULTIPATH
1103
1104/*
1105 Dead device goes up. We wake up dead nexthops.
1106 It takes sense only on multipath routes.
1107 */
1108
1109int fib_sync_up(struct net_device *dev)
1110{
1111 struct fib_info *prev_fi;
1112 unsigned int hash;
1113 struct hlist_head *head;
1114 struct hlist_node *node;
1115 struct fib_nh *nh;
1116 int ret;
1117
1118 if (!(dev->flags&IFF_UP))
1119 return 0;
1120
1121 prev_fi = NULL;
1122 hash = fib_devindex_hashfn(dev->ifindex);
1123 head = &fib_info_devhash[hash];
1124 ret = 0;
1125
1126 hlist_for_each_entry(nh, node, head, nh_hash) {
1127 struct fib_info *fi = nh->nh_parent;
1128 int alive;
1129
1130 BUG_ON(!fi->fib_nhs);
1131 if (nh->nh_dev != dev || fi == prev_fi)
1132 continue;
1133
1134 prev_fi = fi;
1135 alive = 0;
1136 change_nexthops(fi) {
1137 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1138 alive++;
1139 continue;
1140 }
1141 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1142 continue;
Herbert Xue5ed6392005-10-03 14:35:55 -07001143 if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144 continue;
1145 alive++;
1146 spin_lock_bh(&fib_multipath_lock);
1147 nh->nh_power = 0;
1148 nh->nh_flags &= ~RTNH_F_DEAD;
1149 spin_unlock_bh(&fib_multipath_lock);
1150 } endfor_nexthops(fi)
1151
1152 if (alive > 0) {
1153 fi->fib_flags &= ~RTNH_F_DEAD;
1154 ret++;
1155 }
1156 }
1157
1158 return ret;
1159}
1160
1161/*
1162 The algorithm is suboptimal, but it provides really
1163 fair weighted route distribution.
1164 */
1165
1166void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1167{
1168 struct fib_info *fi = res->fi;
1169 int w;
1170
1171 spin_lock_bh(&fib_multipath_lock);
1172 if (fi->fib_power <= 0) {
1173 int power = 0;
1174 change_nexthops(fi) {
1175 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1176 power += nh->nh_weight;
1177 nh->nh_power = nh->nh_weight;
1178 }
1179 } endfor_nexthops(fi);
1180 fi->fib_power = power;
1181 if (power <= 0) {
1182 spin_unlock_bh(&fib_multipath_lock);
1183 /* Race condition: route has just become dead. */
1184 res->nh_sel = 0;
1185 return;
1186 }
1187 }
1188
1189
1190 /* w should be random number [0..fi->fib_power-1],
1191 it is pretty bad approximation.
1192 */
1193
1194 w = jiffies % fi->fib_power;
1195
1196 change_nexthops(fi) {
1197 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1198 if ((w -= nh->nh_power) <= 0) {
1199 nh->nh_power--;
1200 fi->fib_power--;
1201 res->nh_sel = nhsel;
1202 spin_unlock_bh(&fib_multipath_lock);
1203 return;
1204 }
1205 }
1206 } endfor_nexthops(fi);
1207
1208 /* Race condition: route has just become dead. */
1209 res->nh_sel = 0;
1210 spin_unlock_bh(&fib_multipath_lock);
1211}
1212#endif