blob: a52334d30cf8ac6fc615cbe844c6b9bb3dc1bcae [file] [log] [blame]
Robert Olsson19baf832005-06-21 12:43:18 -07001/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version
5 * 2 of the License, or (at your option) any later version.
6 *
7 * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
8 * & Swedish University of Agricultural Sciences.
9 *
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090010 * Jens Laas <jens.laas@data.slu.se> Swedish University of
Robert Olsson19baf832005-06-21 12:43:18 -070011 * Agricultural Sciences.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090012 *
Robert Olsson19baf832005-06-21 12:43:18 -070013 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
14 *
15 * This work is based on the LPC-trie which is originally descibed in:
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090016 *
Robert Olsson19baf832005-06-21 12:43:18 -070017 * An experimental study of compression methods for dynamic tries
18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
19 * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
20 *
21 *
22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
23 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
24 *
25 * Version: $Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $
26 *
27 *
28 * Code from fib_hash has been reused which includes the following header:
29 *
30 *
31 * INET An implementation of the TCP/IP protocol suite for the LINUX
32 * operating system. INET is implemented using the BSD Socket
33 * interface as the means of communication with the user level.
34 *
35 * IPv4 FIB: lookup engine and maintenance routines.
36 *
37 *
38 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
39 *
40 * This program is free software; you can redistribute it and/or
41 * modify it under the terms of the GNU General Public License
42 * as published by the Free Software Foundation; either version
43 * 2 of the License, or (at your option) any later version.
Robert Olssonfd966252005-12-22 11:25:10 -080044 *
45 * Substantial contributions to this work comes from:
46 *
47 * David S. Miller, <davem@davemloft.net>
48 * Stephen Hemminger <shemminger@osdl.org>
49 * Paul E. McKenney <paulmck@us.ibm.com>
50 * Patrick McHardy <kaber@trash.net>
Robert Olsson19baf832005-06-21 12:43:18 -070051 */
52
Robert Olsson05eee482007-03-19 16:27:37 -070053#define VERSION "0.408"
Robert Olsson19baf832005-06-21 12:43:18 -070054
Robert Olsson19baf832005-06-21 12:43:18 -070055#include <asm/uaccess.h>
56#include <asm/system.h>
Jiri Slaby1977f032007-10-18 23:40:25 -070057#include <linux/bitops.h>
Robert Olsson19baf832005-06-21 12:43:18 -070058#include <linux/types.h>
59#include <linux/kernel.h>
Robert Olsson19baf832005-06-21 12:43:18 -070060#include <linux/mm.h>
61#include <linux/string.h>
62#include <linux/socket.h>
63#include <linux/sockios.h>
64#include <linux/errno.h>
65#include <linux/in.h>
66#include <linux/inet.h>
Stephen Hemmingercd8787a2006-01-03 14:38:34 -080067#include <linux/inetdevice.h>
Robert Olsson19baf832005-06-21 12:43:18 -070068#include <linux/netdevice.h>
69#include <linux/if_arp.h>
70#include <linux/proc_fs.h>
Robert Olsson2373ce12005-08-25 13:01:29 -070071#include <linux/rcupdate.h>
Robert Olsson19baf832005-06-21 12:43:18 -070072#include <linux/skbuff.h>
73#include <linux/netlink.h>
74#include <linux/init.h>
75#include <linux/list.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020076#include <net/net_namespace.h>
Robert Olsson19baf832005-06-21 12:43:18 -070077#include <net/ip.h>
78#include <net/protocol.h>
79#include <net/route.h>
80#include <net/tcp.h>
81#include <net/sock.h>
82#include <net/ip_fib.h>
83#include "fib_lookup.h"
84
Robert Olsson06ef9212006-03-20 21:35:01 -080085#define MAX_STAT_DEPTH 32
Robert Olsson19baf832005-06-21 12:43:18 -070086
Robert Olsson19baf832005-06-21 12:43:18 -070087#define KEYLENGTH (8*sizeof(t_key))
Robert Olsson19baf832005-06-21 12:43:18 -070088
Robert Olsson19baf832005-06-21 12:43:18 -070089typedef unsigned int t_key;
90
91#define T_TNODE 0
92#define T_LEAF 1
93#define NODE_TYPE_MASK 0x1UL
Robert Olsson2373ce12005-08-25 13:01:29 -070094#define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK)
95
Olof Johansson91b9a272005-08-09 20:24:39 -070096#define IS_TNODE(n) (!(n->parent & T_LEAF))
97#define IS_LEAF(n) (n->parent & T_LEAF)
Robert Olsson19baf832005-06-21 12:43:18 -070098
99struct node {
Olof Johansson91b9a272005-08-09 20:24:39 -0700100 unsigned long parent;
Eric Dumazet8d9654442008-01-13 22:31:44 -0800101 t_key key;
Robert Olsson19baf832005-06-21 12:43:18 -0700102};
103
104struct leaf {
Olof Johansson91b9a272005-08-09 20:24:39 -0700105 unsigned long parent;
Eric Dumazet8d9654442008-01-13 22:31:44 -0800106 t_key key;
Robert Olsson19baf832005-06-21 12:43:18 -0700107 struct hlist_head list;
Robert Olsson2373ce12005-08-25 13:01:29 -0700108 struct rcu_head rcu;
Robert Olsson19baf832005-06-21 12:43:18 -0700109};
110
111struct leaf_info {
112 struct hlist_node hlist;
Robert Olsson2373ce12005-08-25 13:01:29 -0700113 struct rcu_head rcu;
Robert Olsson19baf832005-06-21 12:43:18 -0700114 int plen;
115 struct list_head falh;
116};
117
118struct tnode {
Olof Johansson91b9a272005-08-09 20:24:39 -0700119 unsigned long parent;
Eric Dumazet8d9654442008-01-13 22:31:44 -0800120 t_key key;
Eric Dumazet112d8cf2008-01-12 21:27:41 -0800121 unsigned char pos; /* 2log(KEYLENGTH) bits needed */
122 unsigned char bits; /* 2log(KEYLENGTH) bits needed */
Eric Dumazet8d9654442008-01-13 22:31:44 -0800123 unsigned int full_children; /* KEYLENGTH bits needed */
124 unsigned int empty_children; /* KEYLENGTH bits needed */
Robert Olsson2373ce12005-08-25 13:01:29 -0700125 struct rcu_head rcu;
Olof Johansson91b9a272005-08-09 20:24:39 -0700126 struct node *child[0];
Robert Olsson19baf832005-06-21 12:43:18 -0700127};
128
129#ifdef CONFIG_IP_FIB_TRIE_STATS
130struct trie_use_stats {
131 unsigned int gets;
132 unsigned int backtrack;
133 unsigned int semantic_match_passed;
134 unsigned int semantic_match_miss;
135 unsigned int null_node_hit;
Robert Olsson2f368952005-07-05 15:02:40 -0700136 unsigned int resize_node_skipped;
Robert Olsson19baf832005-06-21 12:43:18 -0700137};
138#endif
139
140struct trie_stat {
141 unsigned int totdepth;
142 unsigned int maxdepth;
143 unsigned int tnodes;
144 unsigned int leaves;
145 unsigned int nullpointers;
Robert Olsson06ef9212006-03-20 21:35:01 -0800146 unsigned int nodesizes[MAX_STAT_DEPTH];
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700147};
Robert Olsson19baf832005-06-21 12:43:18 -0700148
149struct trie {
Olof Johansson91b9a272005-08-09 20:24:39 -0700150 struct node *trie;
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -0800151 unsigned int size;
Robert Olsson19baf832005-06-21 12:43:18 -0700152#ifdef CONFIG_IP_FIB_TRIE_STATS
153 struct trie_use_stats stats;
154#endif
Robert Olsson19baf832005-06-21 12:43:18 -0700155};
156
Robert Olsson19baf832005-06-21 12:43:18 -0700157static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
158static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull);
Robert Olsson19baf832005-06-21 12:43:18 -0700159static struct node *resize(struct trie *t, struct tnode *tn);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700160static struct tnode *inflate(struct trie *t, struct tnode *tn);
161static struct tnode *halve(struct trie *t, struct tnode *tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700162static void tnode_free(struct tnode *tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700163
Christoph Lametere18b8902006-12-06 20:33:20 -0800164static struct kmem_cache *fn_alias_kmem __read_mostly;
Robert Olsson19baf832005-06-21 12:43:18 -0700165
Stephen Hemminger06801912007-08-10 15:22:13 -0700166static inline struct tnode *node_parent(struct node *node)
167{
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800168 return (struct tnode *)(node->parent & ~NODE_TYPE_MASK);
169}
Stephen Hemminger06801912007-08-10 15:22:13 -0700170
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800171static inline struct tnode *node_parent_rcu(struct node *node)
172{
173 struct tnode *ret = node_parent(node);
174
Stephen Hemminger06801912007-08-10 15:22:13 -0700175 return rcu_dereference(ret);
176}
177
178static inline void node_set_parent(struct node *node, struct tnode *ptr)
179{
180 rcu_assign_pointer(node->parent,
181 (unsigned long)ptr | NODE_TYPE(node));
182}
Robert Olsson2373ce12005-08-25 13:01:29 -0700183
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800184static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i)
Robert Olsson19baf832005-06-21 12:43:18 -0700185{
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800186 BUG_ON(i >= 1U << tn->bits);
Robert Olsson19baf832005-06-21 12:43:18 -0700187
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800188 return tn->child[i];
189}
190
191static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i)
192{
193 struct node *ret = tnode_get_child(tn, i);
194
195 return rcu_dereference(ret);
Robert Olsson19baf832005-06-21 12:43:18 -0700196}
197
Stephen Hemmignerbb435b82005-08-09 20:25:39 -0700198static inline int tnode_child_length(const struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700199{
Olof Johansson91b9a272005-08-09 20:24:39 -0700200 return 1 << tn->bits;
Robert Olsson19baf832005-06-21 12:43:18 -0700201}
202
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -0700203static inline t_key mask_pfx(t_key k, unsigned short l)
204{
205 return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);
206}
207
Robert Olsson19baf832005-06-21 12:43:18 -0700208static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
209{
Olof Johansson91b9a272005-08-09 20:24:39 -0700210 if (offset < KEYLENGTH)
Robert Olsson19baf832005-06-21 12:43:18 -0700211 return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
Olof Johansson91b9a272005-08-09 20:24:39 -0700212 else
Robert Olsson19baf832005-06-21 12:43:18 -0700213 return 0;
214}
215
216static inline int tkey_equals(t_key a, t_key b)
217{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700218 return a == b;
Robert Olsson19baf832005-06-21 12:43:18 -0700219}
220
221static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
222{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700223 if (bits == 0 || offset >= KEYLENGTH)
224 return 1;
Olof Johansson91b9a272005-08-09 20:24:39 -0700225 bits = bits > KEYLENGTH ? KEYLENGTH : bits;
226 return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700227}
Robert Olsson19baf832005-06-21 12:43:18 -0700228
229static inline int tkey_mismatch(t_key a, int offset, t_key b)
230{
231 t_key diff = a ^ b;
232 int i = offset;
233
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700234 if (!diff)
235 return 0;
236 while ((diff << i) >> (KEYLENGTH-1) == 0)
Robert Olsson19baf832005-06-21 12:43:18 -0700237 i++;
238 return i;
239}
240
Robert Olsson19baf832005-06-21 12:43:18 -0700241/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900242 To understand this stuff, an understanding of keys and all their bits is
243 necessary. Every node in the trie has a key associated with it, but not
Robert Olsson19baf832005-06-21 12:43:18 -0700244 all of the bits in that key are significant.
245
246 Consider a node 'n' and its parent 'tp'.
247
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900248 If n is a leaf, every bit in its key is significant. Its presence is
249 necessitated by path compression, since during a tree traversal (when
250 searching for a leaf - unless we are doing an insertion) we will completely
251 ignore all skipped bits we encounter. Thus we need to verify, at the end of
252 a potentially successful search, that we have indeed been walking the
Robert Olsson19baf832005-06-21 12:43:18 -0700253 correct key path.
254
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900255 Note that we can never "miss" the correct key in the tree if present by
256 following the wrong path. Path compression ensures that segments of the key
257 that are the same for all keys with a given prefix are skipped, but the
258 skipped part *is* identical for each node in the subtrie below the skipped
259 bit! trie_insert() in this implementation takes care of that - note the
Robert Olsson19baf832005-06-21 12:43:18 -0700260 call to tkey_sub_equals() in trie_insert().
261
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900262 if n is an internal node - a 'tnode' here, the various parts of its key
Robert Olsson19baf832005-06-21 12:43:18 -0700263 have many different meanings.
264
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900265 Example:
Robert Olsson19baf832005-06-21 12:43:18 -0700266 _________________________________________________________________
267 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
268 -----------------------------------------------------------------
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900269 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
Robert Olsson19baf832005-06-21 12:43:18 -0700270
271 _________________________________________________________________
272 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
273 -----------------------------------------------------------------
274 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
275
276 tp->pos = 7
277 tp->bits = 3
278 n->pos = 15
Olof Johansson91b9a272005-08-09 20:24:39 -0700279 n->bits = 4
Robert Olsson19baf832005-06-21 12:43:18 -0700280
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900281 First, let's just ignore the bits that come before the parent tp, that is
282 the bits from 0 to (tp->pos-1). They are *known* but at this point we do
Robert Olsson19baf832005-06-21 12:43:18 -0700283 not use them for anything.
284
285 The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900286 index into the parent's child array. That is, they will be used to find
Robert Olsson19baf832005-06-21 12:43:18 -0700287 'n' among tp's children.
288
289 The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
290 for the node n.
291
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900292 All the bits we have seen so far are significant to the node n. The rest
Robert Olsson19baf832005-06-21 12:43:18 -0700293 of the bits are really not needed or indeed known in n->key.
294
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900295 The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
Robert Olsson19baf832005-06-21 12:43:18 -0700296 n's child array, and will of course be different for each child.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900297
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700298
Robert Olsson19baf832005-06-21 12:43:18 -0700299 The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
300 at this point.
301
302*/
303
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700304static inline void check_tnode(const struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700305{
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700306 WARN_ON(tn && tn->pos+tn->bits > 32);
Robert Olsson19baf832005-06-21 12:43:18 -0700307}
308
Denis V. Lunevf5026fa2007-12-13 09:47:57 -0800309static const int halve_threshold = 25;
310static const int inflate_threshold = 50;
311static const int halve_threshold_root = 8;
312static const int inflate_threshold_root = 15;
Robert Olsson19baf832005-06-21 12:43:18 -0700313
Robert Olsson2373ce12005-08-25 13:01:29 -0700314
315static void __alias_free_mem(struct rcu_head *head)
316{
317 struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
318 kmem_cache_free(fn_alias_kmem, fa);
319}
320
321static inline void alias_free_mem_rcu(struct fib_alias *fa)
322{
323 call_rcu(&fa->rcu, __alias_free_mem);
324}
325
326static void __leaf_free_rcu(struct rcu_head *head)
327{
328 kfree(container_of(head, struct leaf, rcu));
329}
330
Robert Olsson2373ce12005-08-25 13:01:29 -0700331static void __leaf_info_free_rcu(struct rcu_head *head)
332{
333 kfree(container_of(head, struct leaf_info, rcu));
334}
335
336static inline void free_leaf_info(struct leaf_info *leaf)
337{
338 call_rcu(&leaf->rcu, __leaf_info_free_rcu);
339}
340
Eric Dumazet8d9654442008-01-13 22:31:44 -0800341static struct tnode *tnode_alloc(size_t size)
Robert Olsson2373ce12005-08-25 13:01:29 -0700342{
343 struct page *pages;
344
345 if (size <= PAGE_SIZE)
Eric Dumazet8d9654442008-01-13 22:31:44 -0800346 return kzalloc(size, GFP_KERNEL);
Robert Olsson2373ce12005-08-25 13:01:29 -0700347
348 pages = alloc_pages(GFP_KERNEL|__GFP_ZERO, get_order(size));
349 if (!pages)
350 return NULL;
351
352 return page_address(pages);
353}
354
355static void __tnode_free_rcu(struct rcu_head *head)
356{
357 struct tnode *tn = container_of(head, struct tnode, rcu);
Eric Dumazet8d9654442008-01-13 22:31:44 -0800358 size_t size = sizeof(struct tnode) +
359 (sizeof(struct node *) << tn->bits);
Robert Olsson2373ce12005-08-25 13:01:29 -0700360
361 if (size <= PAGE_SIZE)
362 kfree(tn);
363 else
364 free_pages((unsigned long)tn, get_order(size));
365}
366
367static inline void tnode_free(struct tnode *tn)
368{
Stephen Hemminger132adf52007-03-08 20:44:43 -0800369 if (IS_LEAF(tn)) {
Robert Olsson550e29b2006-04-04 12:53:35 -0700370 struct leaf *l = (struct leaf *) tn;
371 call_rcu_bh(&l->rcu, __leaf_free_rcu);
Stephen Hemminger132adf52007-03-08 20:44:43 -0800372 } else
Robert Olsson550e29b2006-04-04 12:53:35 -0700373 call_rcu(&tn->rcu, __tnode_free_rcu);
Robert Olsson2373ce12005-08-25 13:01:29 -0700374}
375
Robert Olsson19baf832005-06-21 12:43:18 -0700376static struct leaf *leaf_new(void)
377{
378 struct leaf *l = kmalloc(sizeof(struct leaf), GFP_KERNEL);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700379 if (l) {
Robert Olsson2373ce12005-08-25 13:01:29 -0700380 l->parent = T_LEAF;
Robert Olsson19baf832005-06-21 12:43:18 -0700381 INIT_HLIST_HEAD(&l->list);
382 }
383 return l;
384}
385
386static struct leaf_info *leaf_info_new(int plen)
387{
388 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
Robert Olsson2373ce12005-08-25 13:01:29 -0700389 if (li) {
390 li->plen = plen;
391 INIT_LIST_HEAD(&li->falh);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700392 }
Robert Olsson2373ce12005-08-25 13:01:29 -0700393 return li;
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700394}
395
Robert Olsson19baf832005-06-21 12:43:18 -0700396static struct tnode* tnode_new(t_key key, int pos, int bits)
397{
Eric Dumazet8d9654442008-01-13 22:31:44 -0800398 size_t sz = sizeof(struct tnode) + (sizeof(struct node *) << bits);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700399 struct tnode *tn = tnode_alloc(sz);
Robert Olsson19baf832005-06-21 12:43:18 -0700400
Olof Johansson91b9a272005-08-09 20:24:39 -0700401 if (tn) {
Robert Olsson2373ce12005-08-25 13:01:29 -0700402 tn->parent = T_TNODE;
Robert Olsson19baf832005-06-21 12:43:18 -0700403 tn->pos = pos;
404 tn->bits = bits;
405 tn->key = key;
406 tn->full_children = 0;
407 tn->empty_children = 1<<bits;
408 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700409
Eric Dumazet8d9654442008-01-13 22:31:44 -0800410 pr_debug("AT %p s=%u %lu\n", tn, (unsigned int) sizeof(struct tnode),
411 (unsigned long) (sizeof(struct node) << bits));
Robert Olsson19baf832005-06-21 12:43:18 -0700412 return tn;
413}
414
Robert Olsson19baf832005-06-21 12:43:18 -0700415/*
416 * Check whether a tnode 'n' is "full", i.e. it is an internal node
417 * and no bits are skipped. See discussion in dyntree paper p. 6
418 */
419
Stephen Hemmignerbb435b82005-08-09 20:25:39 -0700420static inline int tnode_full(const struct tnode *tn, const struct node *n)
Robert Olsson19baf832005-06-21 12:43:18 -0700421{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700422 if (n == NULL || IS_LEAF(n))
Robert Olsson19baf832005-06-21 12:43:18 -0700423 return 0;
424
425 return ((struct tnode *) n)->pos == tn->pos + tn->bits;
426}
427
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700428static inline void put_child(struct trie *t, struct tnode *tn, int i, struct node *n)
Robert Olsson19baf832005-06-21 12:43:18 -0700429{
430 tnode_put_child_reorg(tn, i, n, -1);
431}
432
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700433 /*
Robert Olsson19baf832005-06-21 12:43:18 -0700434 * Add a child at position i overwriting the old value.
435 * Update the value of full_children and empty_children.
436 */
437
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700438static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700439{
Robert Olsson2373ce12005-08-25 13:01:29 -0700440 struct node *chi = tn->child[i];
Robert Olsson19baf832005-06-21 12:43:18 -0700441 int isfull;
442
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700443 BUG_ON(i >= 1<<tn->bits);
444
Robert Olsson19baf832005-06-21 12:43:18 -0700445
446 /* update emptyChildren */
447 if (n == NULL && chi != NULL)
448 tn->empty_children++;
449 else if (n != NULL && chi == NULL)
450 tn->empty_children--;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700451
Robert Olsson19baf832005-06-21 12:43:18 -0700452 /* update fullChildren */
Olof Johansson91b9a272005-08-09 20:24:39 -0700453 if (wasfull == -1)
Robert Olsson19baf832005-06-21 12:43:18 -0700454 wasfull = tnode_full(tn, chi);
455
456 isfull = tnode_full(tn, n);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700457 if (wasfull && !isfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700458 tn->full_children--;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700459 else if (!wasfull && isfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700460 tn->full_children++;
Olof Johansson91b9a272005-08-09 20:24:39 -0700461
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700462 if (n)
Stephen Hemminger06801912007-08-10 15:22:13 -0700463 node_set_parent(n, tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700464
Robert Olsson2373ce12005-08-25 13:01:29 -0700465 rcu_assign_pointer(tn->child[i], n);
Robert Olsson19baf832005-06-21 12:43:18 -0700466}
467
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700468static struct node *resize(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700469{
470 int i;
Robert Olsson2f368952005-07-05 15:02:40 -0700471 int err = 0;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700472 struct tnode *old_tn;
Robert Olssone6308be2005-10-04 13:01:58 -0700473 int inflate_threshold_use;
474 int halve_threshold_use;
Robert Olsson05eee482007-03-19 16:27:37 -0700475 int max_resize;
Robert Olsson19baf832005-06-21 12:43:18 -0700476
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900477 if (!tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700478 return NULL;
479
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700480 pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
481 tn, inflate_threshold, halve_threshold);
Robert Olsson19baf832005-06-21 12:43:18 -0700482
483 /* No children */
484 if (tn->empty_children == tnode_child_length(tn)) {
485 tnode_free(tn);
486 return NULL;
487 }
488 /* One child */
489 if (tn->empty_children == tnode_child_length(tn) - 1)
490 for (i = 0; i < tnode_child_length(tn); i++) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700491 struct node *n;
Robert Olsson19baf832005-06-21 12:43:18 -0700492
Olof Johansson91b9a272005-08-09 20:24:39 -0700493 n = tn->child[i];
Robert Olsson2373ce12005-08-25 13:01:29 -0700494 if (!n)
Olof Johansson91b9a272005-08-09 20:24:39 -0700495 continue;
Olof Johansson91b9a272005-08-09 20:24:39 -0700496
497 /* compress one level */
Stephen Hemminger06801912007-08-10 15:22:13 -0700498 node_set_parent(n, NULL);
Olof Johansson91b9a272005-08-09 20:24:39 -0700499 tnode_free(tn);
500 return n;
Robert Olsson19baf832005-06-21 12:43:18 -0700501 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700502 /*
Robert Olsson19baf832005-06-21 12:43:18 -0700503 * Double as long as the resulting node has a number of
504 * nonempty nodes that are above the threshold.
505 */
506
507 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700508 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
509 * the Helsinki University of Technology and Matti Tikkanen of Nokia
Robert Olsson19baf832005-06-21 12:43:18 -0700510 * Telecommunications, page 6:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700511 * "A node is doubled if the ratio of non-empty children to all
Robert Olsson19baf832005-06-21 12:43:18 -0700512 * children in the *doubled* node is at least 'high'."
513 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700514 * 'high' in this instance is the variable 'inflate_threshold'. It
515 * is expressed as a percentage, so we multiply it with
516 * tnode_child_length() and instead of multiplying by 2 (since the
517 * child array will be doubled by inflate()) and multiplying
518 * the left-hand side by 100 (to handle the percentage thing) we
Robert Olsson19baf832005-06-21 12:43:18 -0700519 * multiply the left-hand side by 50.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700520 *
521 * The left-hand side may look a bit weird: tnode_child_length(tn)
522 * - tn->empty_children is of course the number of non-null children
523 * in the current node. tn->full_children is the number of "full"
Robert Olsson19baf832005-06-21 12:43:18 -0700524 * children, that is non-null tnodes with a skip value of 0.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700525 * All of those will be doubled in the resulting inflated tnode, so
Robert Olsson19baf832005-06-21 12:43:18 -0700526 * we just count them one extra time here.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700527 *
Robert Olsson19baf832005-06-21 12:43:18 -0700528 * A clearer way to write this would be:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700529 *
Robert Olsson19baf832005-06-21 12:43:18 -0700530 * to_be_doubled = tn->full_children;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700531 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
Robert Olsson19baf832005-06-21 12:43:18 -0700532 * tn->full_children;
533 *
534 * new_child_length = tnode_child_length(tn) * 2;
535 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700536 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
Robert Olsson19baf832005-06-21 12:43:18 -0700537 * new_child_length;
538 * if (new_fill_factor >= inflate_threshold)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700539 *
540 * ...and so on, tho it would mess up the while () loop.
541 *
Robert Olsson19baf832005-06-21 12:43:18 -0700542 * anyway,
543 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
544 * inflate_threshold
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700545 *
Robert Olsson19baf832005-06-21 12:43:18 -0700546 * avoid a division:
547 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
548 * inflate_threshold * new_child_length
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700549 *
Robert Olsson19baf832005-06-21 12:43:18 -0700550 * expand not_to_be_doubled and to_be_doubled, and shorten:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700551 * 100 * (tnode_child_length(tn) - tn->empty_children +
Olof Johansson91b9a272005-08-09 20:24:39 -0700552 * tn->full_children) >= inflate_threshold * new_child_length
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700553 *
Robert Olsson19baf832005-06-21 12:43:18 -0700554 * expand new_child_length:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700555 * 100 * (tnode_child_length(tn) - tn->empty_children +
Olof Johansson91b9a272005-08-09 20:24:39 -0700556 * tn->full_children) >=
Robert Olsson19baf832005-06-21 12:43:18 -0700557 * inflate_threshold * tnode_child_length(tn) * 2
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700558 *
Robert Olsson19baf832005-06-21 12:43:18 -0700559 * shorten again:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700560 * 50 * (tn->full_children + tnode_child_length(tn) -
Olof Johansson91b9a272005-08-09 20:24:39 -0700561 * tn->empty_children) >= inflate_threshold *
Robert Olsson19baf832005-06-21 12:43:18 -0700562 * tnode_child_length(tn)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700563 *
Robert Olsson19baf832005-06-21 12:43:18 -0700564 */
565
566 check_tnode(tn);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700567
Robert Olssone6308be2005-10-04 13:01:58 -0700568 /* Keep root node larger */
569
Stephen Hemminger132adf52007-03-08 20:44:43 -0800570 if (!tn->parent)
Robert Olssone6308be2005-10-04 13:01:58 -0700571 inflate_threshold_use = inflate_threshold_root;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900572 else
Robert Olssone6308be2005-10-04 13:01:58 -0700573 inflate_threshold_use = inflate_threshold;
574
Robert Olsson2f368952005-07-05 15:02:40 -0700575 err = 0;
Robert Olsson05eee482007-03-19 16:27:37 -0700576 max_resize = 10;
577 while ((tn->full_children > 0 && max_resize-- &&
Robert Olsson19baf832005-06-21 12:43:18 -0700578 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
Robert Olssone6308be2005-10-04 13:01:58 -0700579 inflate_threshold_use * tnode_child_length(tn))) {
Robert Olsson19baf832005-06-21 12:43:18 -0700580
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700581 old_tn = tn;
582 tn = inflate(t, tn);
583 if (IS_ERR(tn)) {
584 tn = old_tn;
Robert Olsson2f368952005-07-05 15:02:40 -0700585#ifdef CONFIG_IP_FIB_TRIE_STATS
586 t->stats.resize_node_skipped++;
587#endif
588 break;
589 }
Robert Olsson19baf832005-06-21 12:43:18 -0700590 }
591
Robert Olsson05eee482007-03-19 16:27:37 -0700592 if (max_resize < 0) {
593 if (!tn->parent)
594 printk(KERN_WARNING "Fix inflate_threshold_root. Now=%d size=%d bits\n",
595 inflate_threshold_root, tn->bits);
596 else
597 printk(KERN_WARNING "Fix inflate_threshold. Now=%d size=%d bits\n",
598 inflate_threshold, tn->bits);
599 }
600
Robert Olsson19baf832005-06-21 12:43:18 -0700601 check_tnode(tn);
602
603 /*
604 * Halve as long as the number of empty children in this
605 * node is above threshold.
606 */
Robert Olsson2f368952005-07-05 15:02:40 -0700607
Robert Olssone6308be2005-10-04 13:01:58 -0700608
609 /* Keep root node larger */
610
Stephen Hemminger132adf52007-03-08 20:44:43 -0800611 if (!tn->parent)
Robert Olssone6308be2005-10-04 13:01:58 -0700612 halve_threshold_use = halve_threshold_root;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900613 else
Robert Olssone6308be2005-10-04 13:01:58 -0700614 halve_threshold_use = halve_threshold;
615
Robert Olsson2f368952005-07-05 15:02:40 -0700616 err = 0;
Robert Olsson05eee482007-03-19 16:27:37 -0700617 max_resize = 10;
618 while (tn->bits > 1 && max_resize-- &&
Robert Olsson19baf832005-06-21 12:43:18 -0700619 100 * (tnode_child_length(tn) - tn->empty_children) <
Robert Olssone6308be2005-10-04 13:01:58 -0700620 halve_threshold_use * tnode_child_length(tn)) {
Robert Olsson19baf832005-06-21 12:43:18 -0700621
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700622 old_tn = tn;
623 tn = halve(t, tn);
624 if (IS_ERR(tn)) {
625 tn = old_tn;
Robert Olsson2f368952005-07-05 15:02:40 -0700626#ifdef CONFIG_IP_FIB_TRIE_STATS
627 t->stats.resize_node_skipped++;
628#endif
629 break;
630 }
631 }
632
Robert Olsson05eee482007-03-19 16:27:37 -0700633 if (max_resize < 0) {
634 if (!tn->parent)
635 printk(KERN_WARNING "Fix halve_threshold_root. Now=%d size=%d bits\n",
636 halve_threshold_root, tn->bits);
637 else
638 printk(KERN_WARNING "Fix halve_threshold. Now=%d size=%d bits\n",
639 halve_threshold, tn->bits);
640 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700641
Robert Olsson19baf832005-06-21 12:43:18 -0700642 /* Only one child remains */
Robert Olsson19baf832005-06-21 12:43:18 -0700643 if (tn->empty_children == tnode_child_length(tn) - 1)
644 for (i = 0; i < tnode_child_length(tn); i++) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700645 struct node *n;
646
Olof Johansson91b9a272005-08-09 20:24:39 -0700647 n = tn->child[i];
Robert Olsson2373ce12005-08-25 13:01:29 -0700648 if (!n)
Olof Johansson91b9a272005-08-09 20:24:39 -0700649 continue;
Olof Johansson91b9a272005-08-09 20:24:39 -0700650
651 /* compress one level */
652
Stephen Hemminger06801912007-08-10 15:22:13 -0700653 node_set_parent(n, NULL);
Olof Johansson91b9a272005-08-09 20:24:39 -0700654 tnode_free(tn);
655 return n;
Robert Olsson19baf832005-06-21 12:43:18 -0700656 }
657
658 return (struct node *) tn;
659}
660
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700661static struct tnode *inflate(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700662{
Robert Olsson19baf832005-06-21 12:43:18 -0700663 struct tnode *oldtnode = tn;
664 int olen = tnode_child_length(tn);
665 int i;
666
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700667 pr_debug("In inflate\n");
Robert Olsson19baf832005-06-21 12:43:18 -0700668
669 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
670
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700671 if (!tn)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700672 return ERR_PTR(-ENOMEM);
Robert Olsson2f368952005-07-05 15:02:40 -0700673
674 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700675 * Preallocate and store tnodes before the actual work so we
676 * don't get into an inconsistent state if memory allocation
677 * fails. In case of failure we return the oldnode and inflate
Robert Olsson2f368952005-07-05 15:02:40 -0700678 * of tnode is ignored.
679 */
Olof Johansson91b9a272005-08-09 20:24:39 -0700680
681 for (i = 0; i < olen; i++) {
Robert Olsson2f368952005-07-05 15:02:40 -0700682 struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i);
683
684 if (inode &&
685 IS_TNODE(inode) &&
686 inode->pos == oldtnode->pos + oldtnode->bits &&
687 inode->bits > 1) {
688 struct tnode *left, *right;
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -0700689 t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700690
Robert Olsson2f368952005-07-05 15:02:40 -0700691 left = tnode_new(inode->key&(~m), inode->pos + 1,
692 inode->bits - 1);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700693 if (!left)
694 goto nomem;
Olof Johansson91b9a272005-08-09 20:24:39 -0700695
Robert Olsson2f368952005-07-05 15:02:40 -0700696 right = tnode_new(inode->key|m, inode->pos + 1,
697 inode->bits - 1);
698
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900699 if (!right) {
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700700 tnode_free(left);
701 goto nomem;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900702 }
Robert Olsson2f368952005-07-05 15:02:40 -0700703
704 put_child(t, tn, 2*i, (struct node *) left);
705 put_child(t, tn, 2*i+1, (struct node *) right);
706 }
707 }
708
Olof Johansson91b9a272005-08-09 20:24:39 -0700709 for (i = 0; i < olen; i++) {
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -0800710 struct tnode *inode;
Robert Olsson19baf832005-06-21 12:43:18 -0700711 struct node *node = tnode_get_child(oldtnode, i);
Olof Johansson91b9a272005-08-09 20:24:39 -0700712 struct tnode *left, *right;
713 int size, j;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700714
Robert Olsson19baf832005-06-21 12:43:18 -0700715 /* An empty child */
716 if (node == NULL)
717 continue;
718
719 /* A leaf or an internal node with skipped bits */
720
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700721 if (IS_LEAF(node) || ((struct tnode *) node)->pos >
Robert Olsson19baf832005-06-21 12:43:18 -0700722 tn->pos + tn->bits - 1) {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700723 if (tkey_extract_bits(node->key, oldtnode->pos + oldtnode->bits,
Robert Olsson19baf832005-06-21 12:43:18 -0700724 1) == 0)
725 put_child(t, tn, 2*i, node);
726 else
727 put_child(t, tn, 2*i+1, node);
728 continue;
729 }
730
731 /* An internal node with two children */
732 inode = (struct tnode *) node;
733
734 if (inode->bits == 1) {
735 put_child(t, tn, 2*i, inode->child[0]);
736 put_child(t, tn, 2*i+1, inode->child[1]);
737
738 tnode_free(inode);
Olof Johansson91b9a272005-08-09 20:24:39 -0700739 continue;
Robert Olsson19baf832005-06-21 12:43:18 -0700740 }
741
Olof Johansson91b9a272005-08-09 20:24:39 -0700742 /* An internal node with more than two children */
Robert Olsson19baf832005-06-21 12:43:18 -0700743
Olof Johansson91b9a272005-08-09 20:24:39 -0700744 /* We will replace this node 'inode' with two new
745 * ones, 'left' and 'right', each with half of the
746 * original children. The two new nodes will have
747 * a position one bit further down the key and this
748 * means that the "significant" part of their keys
749 * (see the discussion near the top of this file)
750 * will differ by one bit, which will be "0" in
751 * left's key and "1" in right's key. Since we are
752 * moving the key position by one step, the bit that
753 * we are moving away from - the bit at position
754 * (inode->pos) - is the one that will differ between
755 * left and right. So... we synthesize that bit in the
756 * two new keys.
757 * The mask 'm' below will be a single "one" bit at
758 * the position (inode->pos)
759 */
Robert Olsson19baf832005-06-21 12:43:18 -0700760
Olof Johansson91b9a272005-08-09 20:24:39 -0700761 /* Use the old key, but set the new significant
762 * bit to zero.
763 */
Robert Olsson19baf832005-06-21 12:43:18 -0700764
Olof Johansson91b9a272005-08-09 20:24:39 -0700765 left = (struct tnode *) tnode_get_child(tn, 2*i);
766 put_child(t, tn, 2*i, NULL);
Robert Olsson19baf832005-06-21 12:43:18 -0700767
Olof Johansson91b9a272005-08-09 20:24:39 -0700768 BUG_ON(!left);
Robert Olsson2f368952005-07-05 15:02:40 -0700769
Olof Johansson91b9a272005-08-09 20:24:39 -0700770 right = (struct tnode *) tnode_get_child(tn, 2*i+1);
771 put_child(t, tn, 2*i+1, NULL);
Robert Olsson2f368952005-07-05 15:02:40 -0700772
Olof Johansson91b9a272005-08-09 20:24:39 -0700773 BUG_ON(!right);
Robert Olsson2f368952005-07-05 15:02:40 -0700774
Olof Johansson91b9a272005-08-09 20:24:39 -0700775 size = tnode_child_length(left);
776 for (j = 0; j < size; j++) {
777 put_child(t, left, j, inode->child[j]);
778 put_child(t, right, j, inode->child[j + size]);
Robert Olsson19baf832005-06-21 12:43:18 -0700779 }
Olof Johansson91b9a272005-08-09 20:24:39 -0700780 put_child(t, tn, 2*i, resize(t, left));
781 put_child(t, tn, 2*i+1, resize(t, right));
782
783 tnode_free(inode);
Robert Olsson19baf832005-06-21 12:43:18 -0700784 }
785 tnode_free(oldtnode);
786 return tn;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700787nomem:
788 {
789 int size = tnode_child_length(tn);
790 int j;
791
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700792 for (j = 0; j < size; j++)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700793 if (tn->child[j])
794 tnode_free((struct tnode *)tn->child[j]);
795
796 tnode_free(tn);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700797
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700798 return ERR_PTR(-ENOMEM);
799 }
Robert Olsson19baf832005-06-21 12:43:18 -0700800}
801
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700802static struct tnode *halve(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700803{
804 struct tnode *oldtnode = tn;
805 struct node *left, *right;
806 int i;
807 int olen = tnode_child_length(tn);
808
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700809 pr_debug("In halve\n");
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700810
811 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
Robert Olsson19baf832005-06-21 12:43:18 -0700812
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700813 if (!tn)
814 return ERR_PTR(-ENOMEM);
Robert Olsson2f368952005-07-05 15:02:40 -0700815
816 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700817 * Preallocate and store tnodes before the actual work so we
818 * don't get into an inconsistent state if memory allocation
819 * fails. In case of failure we return the oldnode and halve
Robert Olsson2f368952005-07-05 15:02:40 -0700820 * of tnode is ignored.
821 */
822
Olof Johansson91b9a272005-08-09 20:24:39 -0700823 for (i = 0; i < olen; i += 2) {
Robert Olsson2f368952005-07-05 15:02:40 -0700824 left = tnode_get_child(oldtnode, i);
825 right = tnode_get_child(oldtnode, i+1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700826
Robert Olsson2f368952005-07-05 15:02:40 -0700827 /* Two nonempty children */
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700828 if (left && right) {
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700829 struct tnode *newn;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700830
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700831 newn = tnode_new(left->key, tn->pos + tn->bits, 1);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700832
833 if (!newn)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700834 goto nomem;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700835
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700836 put_child(t, tn, i/2, (struct node *)newn);
Robert Olsson2f368952005-07-05 15:02:40 -0700837 }
Robert Olsson2f368952005-07-05 15:02:40 -0700838
Robert Olsson2f368952005-07-05 15:02:40 -0700839 }
Robert Olsson19baf832005-06-21 12:43:18 -0700840
Olof Johansson91b9a272005-08-09 20:24:39 -0700841 for (i = 0; i < olen; i += 2) {
842 struct tnode *newBinNode;
843
Robert Olsson19baf832005-06-21 12:43:18 -0700844 left = tnode_get_child(oldtnode, i);
845 right = tnode_get_child(oldtnode, i+1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700846
Robert Olsson19baf832005-06-21 12:43:18 -0700847 /* At least one of the children is empty */
848 if (left == NULL) {
849 if (right == NULL) /* Both are empty */
850 continue;
851 put_child(t, tn, i/2, right);
Olof Johansson91b9a272005-08-09 20:24:39 -0700852 continue;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700853 }
Olof Johansson91b9a272005-08-09 20:24:39 -0700854
855 if (right == NULL) {
Robert Olsson19baf832005-06-21 12:43:18 -0700856 put_child(t, tn, i/2, left);
Olof Johansson91b9a272005-08-09 20:24:39 -0700857 continue;
858 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700859
Robert Olsson19baf832005-06-21 12:43:18 -0700860 /* Two nonempty children */
Olof Johansson91b9a272005-08-09 20:24:39 -0700861 newBinNode = (struct tnode *) tnode_get_child(tn, i/2);
862 put_child(t, tn, i/2, NULL);
Olof Johansson91b9a272005-08-09 20:24:39 -0700863 put_child(t, newBinNode, 0, left);
864 put_child(t, newBinNode, 1, right);
865 put_child(t, tn, i/2, resize(t, newBinNode));
Robert Olsson19baf832005-06-21 12:43:18 -0700866 }
867 tnode_free(oldtnode);
868 return tn;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700869nomem:
870 {
871 int size = tnode_child_length(tn);
872 int j;
873
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700874 for (j = 0; j < size; j++)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700875 if (tn->child[j])
876 tnode_free((struct tnode *)tn->child[j]);
877
878 tnode_free(tn);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700879
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700880 return ERR_PTR(-ENOMEM);
881 }
Robert Olsson19baf832005-06-21 12:43:18 -0700882}
883
Robert Olsson772cb712005-09-19 15:31:18 -0700884/* readside must use rcu_read_lock currently dump routines
Robert Olsson2373ce12005-08-25 13:01:29 -0700885 via get_fa_head and dump */
886
Robert Olsson772cb712005-09-19 15:31:18 -0700887static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700888{
Robert Olsson772cb712005-09-19 15:31:18 -0700889 struct hlist_head *head = &l->list;
Robert Olsson19baf832005-06-21 12:43:18 -0700890 struct hlist_node *node;
891 struct leaf_info *li;
892
Robert Olsson2373ce12005-08-25 13:01:29 -0700893 hlist_for_each_entry_rcu(li, node, head, hlist)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700894 if (li->plen == plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700895 return li;
Olof Johansson91b9a272005-08-09 20:24:39 -0700896
Robert Olsson19baf832005-06-21 12:43:18 -0700897 return NULL;
898}
899
900static inline struct list_head * get_fa_head(struct leaf *l, int plen)
901{
Robert Olsson772cb712005-09-19 15:31:18 -0700902 struct leaf_info *li = find_leaf_info(l, plen);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700903
Olof Johansson91b9a272005-08-09 20:24:39 -0700904 if (!li)
905 return NULL;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700906
Olof Johansson91b9a272005-08-09 20:24:39 -0700907 return &li->falh;
Robert Olsson19baf832005-06-21 12:43:18 -0700908}
909
910static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
911{
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900912 struct leaf_info *li = NULL, *last = NULL;
913 struct hlist_node *node;
Robert Olsson19baf832005-06-21 12:43:18 -0700914
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900915 if (hlist_empty(head)) {
916 hlist_add_head_rcu(&new->hlist, head);
917 } else {
918 hlist_for_each_entry(li, node, head, hlist) {
919 if (new->plen > li->plen)
920 break;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700921
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900922 last = li;
923 }
924 if (last)
925 hlist_add_after_rcu(&last->hlist, &new->hlist);
926 else
927 hlist_add_before_rcu(&new->hlist, &li->hlist);
928 }
Robert Olsson19baf832005-06-21 12:43:18 -0700929}
930
Robert Olsson2373ce12005-08-25 13:01:29 -0700931/* rcu_read_lock needs to be hold by caller from readside */
932
Robert Olsson19baf832005-06-21 12:43:18 -0700933static struct leaf *
934fib_find_node(struct trie *t, u32 key)
935{
936 int pos;
937 struct tnode *tn;
938 struct node *n;
939
940 pos = 0;
Robert Olsson2373ce12005-08-25 13:01:29 -0700941 n = rcu_dereference(t->trie);
Robert Olsson19baf832005-06-21 12:43:18 -0700942
943 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
944 tn = (struct tnode *) n;
Olof Johansson91b9a272005-08-09 20:24:39 -0700945
Robert Olsson19baf832005-06-21 12:43:18 -0700946 check_tnode(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -0700947
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700948 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700949 pos = tn->pos + tn->bits;
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800950 n = tnode_get_child_rcu(tn, tkey_extract_bits(key, tn->pos, tn->bits));
Olof Johansson91b9a272005-08-09 20:24:39 -0700951 } else
Robert Olsson19baf832005-06-21 12:43:18 -0700952 break;
953 }
954 /* Case we have found a leaf. Compare prefixes */
955
Olof Johansson91b9a272005-08-09 20:24:39 -0700956 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key))
957 return (struct leaf *)n;
958
Robert Olsson19baf832005-06-21 12:43:18 -0700959 return NULL;
960}
961
962static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
963{
Robert Olsson19baf832005-06-21 12:43:18 -0700964 int wasfull;
Stephen Hemminger06801912007-08-10 15:22:13 -0700965 t_key cindex, key = tn->key;
966 struct tnode *tp;
Robert Olsson19baf832005-06-21 12:43:18 -0700967
Stephen Hemminger06801912007-08-10 15:22:13 -0700968 while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) {
Robert Olsson19baf832005-06-21 12:43:18 -0700969 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
970 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
971 tn = (struct tnode *) resize (t, (struct tnode *)tn);
972 tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull);
Olof Johansson91b9a272005-08-09 20:24:39 -0700973
Stephen Hemminger06801912007-08-10 15:22:13 -0700974 tp = node_parent((struct node *) tn);
975 if (!tp)
Robert Olsson19baf832005-06-21 12:43:18 -0700976 break;
Stephen Hemminger06801912007-08-10 15:22:13 -0700977 tn = tp;
Robert Olsson19baf832005-06-21 12:43:18 -0700978 }
Stephen Hemminger06801912007-08-10 15:22:13 -0700979
Robert Olsson19baf832005-06-21 12:43:18 -0700980 /* Handle last (top) tnode */
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700981 if (IS_TNODE(tn))
Robert Olsson19baf832005-06-21 12:43:18 -0700982 tn = (struct tnode*) resize(t, (struct tnode *)tn);
983
984 return (struct node*) tn;
985}
986
Robert Olsson2373ce12005-08-25 13:01:29 -0700987/* only used from updater-side */
988
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -0800989static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700990{
991 int pos, newpos;
992 struct tnode *tp = NULL, *tn = NULL;
993 struct node *n;
994 struct leaf *l;
995 int missbit;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700996 struct list_head *fa_head = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -0700997 struct leaf_info *li;
998 t_key cindex;
999
1000 pos = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001001 n = t->trie;
Robert Olsson19baf832005-06-21 12:43:18 -07001002
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001003 /* If we point to NULL, stop. Either the tree is empty and we should
1004 * just put a new leaf in if, or we have reached an empty child slot,
Robert Olsson19baf832005-06-21 12:43:18 -07001005 * and we should just put our new leaf in that.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001006 * If we point to a T_TNODE, check if it matches our key. Note that
1007 * a T_TNODE might be skipping any number of bits - its 'pos' need
Robert Olsson19baf832005-06-21 12:43:18 -07001008 * not be the parent's 'pos'+'bits'!
1009 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001010 * If it does match the current key, get pos/bits from it, extract
Robert Olsson19baf832005-06-21 12:43:18 -07001011 * the index from our key, push the T_TNODE and walk the tree.
1012 *
1013 * If it doesn't, we have to replace it with a new T_TNODE.
1014 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001015 * If we point to a T_LEAF, it might or might not have the same key
1016 * as we do. If it does, just change the value, update the T_LEAF's
1017 * value, and return it.
Robert Olsson19baf832005-06-21 12:43:18 -07001018 * If it doesn't, we need to replace it with a T_TNODE.
1019 */
1020
1021 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
1022 tn = (struct tnode *) n;
Olof Johansson91b9a272005-08-09 20:24:39 -07001023
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001024 check_tnode(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001025
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001026 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
Robert Olsson19baf832005-06-21 12:43:18 -07001027 tp = tn;
Olof Johansson91b9a272005-08-09 20:24:39 -07001028 pos = tn->pos + tn->bits;
Robert Olsson19baf832005-06-21 12:43:18 -07001029 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
1030
Stephen Hemminger06801912007-08-10 15:22:13 -07001031 BUG_ON(n && node_parent(n) != tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001032 } else
Robert Olsson19baf832005-06-21 12:43:18 -07001033 break;
1034 }
1035
1036 /*
1037 * n ----> NULL, LEAF or TNODE
1038 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001039 * tp is n's (parent) ----> NULL or TNODE
Robert Olsson19baf832005-06-21 12:43:18 -07001040 */
1041
Olof Johansson91b9a272005-08-09 20:24:39 -07001042 BUG_ON(tp && IS_LEAF(tp));
Robert Olsson19baf832005-06-21 12:43:18 -07001043
1044 /* Case 1: n is a leaf. Compare prefixes */
1045
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001046 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -08001047 l = (struct leaf *) n;
Robert Olsson19baf832005-06-21 12:43:18 -07001048 li = leaf_info_new(plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001049
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001050 if (!li)
1051 return NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001052
1053 fa_head = &li->falh;
1054 insert_leaf_info(&l->list, li);
1055 goto done;
1056 }
Robert Olsson19baf832005-06-21 12:43:18 -07001057 l = leaf_new();
1058
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001059 if (!l)
1060 return NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001061
1062 l->key = key;
1063 li = leaf_info_new(plen);
1064
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001065 if (!li) {
Robert Olssonf835e472005-06-28 15:00:39 -07001066 tnode_free((struct tnode *) l);
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001067 return NULL;
Robert Olssonf835e472005-06-28 15:00:39 -07001068 }
Robert Olsson19baf832005-06-21 12:43:18 -07001069
1070 fa_head = &li->falh;
1071 insert_leaf_info(&l->list, li);
1072
Robert Olsson19baf832005-06-21 12:43:18 -07001073 if (t->trie && n == NULL) {
Olof Johansson91b9a272005-08-09 20:24:39 -07001074 /* Case 2: n is NULL, and will just insert a new leaf */
Robert Olsson19baf832005-06-21 12:43:18 -07001075
Stephen Hemminger06801912007-08-10 15:22:13 -07001076 node_set_parent((struct node *)l, tp);
Robert Olsson19baf832005-06-21 12:43:18 -07001077
Olof Johansson91b9a272005-08-09 20:24:39 -07001078 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1079 put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
1080 } else {
1081 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001082 /*
1083 * Add a new tnode here
Robert Olsson19baf832005-06-21 12:43:18 -07001084 * first tnode need some special handling
1085 */
1086
1087 if (tp)
Olof Johansson91b9a272005-08-09 20:24:39 -07001088 pos = tp->pos+tp->bits;
Robert Olsson19baf832005-06-21 12:43:18 -07001089 else
Olof Johansson91b9a272005-08-09 20:24:39 -07001090 pos = 0;
1091
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001092 if (n) {
Robert Olsson19baf832005-06-21 12:43:18 -07001093 newpos = tkey_mismatch(key, pos, n->key);
1094 tn = tnode_new(n->key, newpos, 1);
Olof Johansson91b9a272005-08-09 20:24:39 -07001095 } else {
Robert Olsson19baf832005-06-21 12:43:18 -07001096 newpos = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001097 tn = tnode_new(key, newpos, 1); /* First tnode */
Robert Olsson19baf832005-06-21 12:43:18 -07001098 }
Robert Olsson19baf832005-06-21 12:43:18 -07001099
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001100 if (!tn) {
Robert Olssonf835e472005-06-28 15:00:39 -07001101 free_leaf_info(li);
1102 tnode_free((struct tnode *) l);
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001103 return NULL;
Olof Johansson91b9a272005-08-09 20:24:39 -07001104 }
1105
Stephen Hemminger06801912007-08-10 15:22:13 -07001106 node_set_parent((struct node *)tn, tp);
Robert Olsson19baf832005-06-21 12:43:18 -07001107
Olof Johansson91b9a272005-08-09 20:24:39 -07001108 missbit = tkey_extract_bits(key, newpos, 1);
Robert Olsson19baf832005-06-21 12:43:18 -07001109 put_child(t, tn, missbit, (struct node *)l);
1110 put_child(t, tn, 1-missbit, n);
1111
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001112 if (tp) {
Robert Olsson19baf832005-06-21 12:43:18 -07001113 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1114 put_child(t, (struct tnode *)tp, cindex, (struct node *)tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001115 } else {
Robert Olsson2373ce12005-08-25 13:01:29 -07001116 rcu_assign_pointer(t->trie, (struct node *)tn); /* First tnode */
Robert Olsson19baf832005-06-21 12:43:18 -07001117 tp = tn;
1118 }
1119 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001120
1121 if (tp && tp->pos + tp->bits > 32)
Stephen Hemminger78c66712005-09-21 00:15:39 -07001122 printk(KERN_WARNING "fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
Robert Olsson19baf832005-06-21 12:43:18 -07001123 tp, tp->pos, tp->bits, key, plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001124
Robert Olsson19baf832005-06-21 12:43:18 -07001125 /* Rebalance the trie */
Robert Olsson2373ce12005-08-25 13:01:29 -07001126
1127 rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
Robert Olssonf835e472005-06-28 15:00:39 -07001128done:
Robert Olsson19baf832005-06-21 12:43:18 -07001129 return fa_head;
1130}
1131
Robert Olssond562f1f2007-03-26 14:22:22 -07001132/*
1133 * Caller must hold RTNL.
1134 */
Thomas Graf4e902c52006-08-17 18:14:52 -07001135static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
Robert Olsson19baf832005-06-21 12:43:18 -07001136{
1137 struct trie *t = (struct trie *) tb->tb_data;
1138 struct fib_alias *fa, *new_fa;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001139 struct list_head *fa_head = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001140 struct fib_info *fi;
Thomas Graf4e902c52006-08-17 18:14:52 -07001141 int plen = cfg->fc_dst_len;
1142 u8 tos = cfg->fc_tos;
Robert Olsson19baf832005-06-21 12:43:18 -07001143 u32 key, mask;
1144 int err;
1145 struct leaf *l;
1146
1147 if (plen > 32)
1148 return -EINVAL;
1149
Thomas Graf4e902c52006-08-17 18:14:52 -07001150 key = ntohl(cfg->fc_dst);
Robert Olsson19baf832005-06-21 12:43:18 -07001151
Patrick McHardy2dfe55b2006-08-10 23:08:33 -07001152 pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
Robert Olsson19baf832005-06-21 12:43:18 -07001153
Olof Johansson91b9a272005-08-09 20:24:39 -07001154 mask = ntohl(inet_make_mask(plen));
Robert Olsson19baf832005-06-21 12:43:18 -07001155
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001156 if (key & ~mask)
Robert Olsson19baf832005-06-21 12:43:18 -07001157 return -EINVAL;
1158
1159 key = key & mask;
1160
Thomas Graf4e902c52006-08-17 18:14:52 -07001161 fi = fib_create_info(cfg);
1162 if (IS_ERR(fi)) {
1163 err = PTR_ERR(fi);
Robert Olsson19baf832005-06-21 12:43:18 -07001164 goto err;
Thomas Graf4e902c52006-08-17 18:14:52 -07001165 }
Robert Olsson19baf832005-06-21 12:43:18 -07001166
1167 l = fib_find_node(t, key);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001168 fa = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001169
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001170 if (l) {
Robert Olsson19baf832005-06-21 12:43:18 -07001171 fa_head = get_fa_head(l, plen);
1172 fa = fib_find_alias(fa_head, tos, fi->fib_priority);
1173 }
1174
1175 /* Now fa, if non-NULL, points to the first fib alias
1176 * with the same keys [prefix,tos,priority], if such key already
1177 * exists or to the node before which we will insert new one.
1178 *
1179 * If fa is NULL, we will need to allocate a new one and
1180 * insert to the head of f.
1181 *
1182 * If f is NULL, no fib node matched the destination key
1183 * and we need to allocate a new one of those as well.
1184 */
1185
Olof Johansson91b9a272005-08-09 20:24:39 -07001186 if (fa && fa->fa_info->fib_priority == fi->fib_priority) {
Robert Olsson19baf832005-06-21 12:43:18 -07001187 struct fib_alias *fa_orig;
1188
1189 err = -EEXIST;
Thomas Graf4e902c52006-08-17 18:14:52 -07001190 if (cfg->fc_nlflags & NLM_F_EXCL)
Robert Olsson19baf832005-06-21 12:43:18 -07001191 goto out;
1192
Thomas Graf4e902c52006-08-17 18:14:52 -07001193 if (cfg->fc_nlflags & NLM_F_REPLACE) {
Robert Olsson19baf832005-06-21 12:43:18 -07001194 struct fib_info *fi_drop;
1195 u8 state;
1196
Joonwoo Park67250332008-01-18 03:45:18 -08001197 if (fi->fib_treeref > 1)
1198 goto out;
1199
Robert Olsson2373ce12005-08-25 13:01:29 -07001200 err = -ENOBUFS;
Christoph Lametere94b1762006-12-06 20:33:17 -08001201 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
Robert Olsson2373ce12005-08-25 13:01:29 -07001202 if (new_fa == NULL)
1203 goto out;
Robert Olsson19baf832005-06-21 12:43:18 -07001204
1205 fi_drop = fa->fa_info;
Robert Olsson2373ce12005-08-25 13:01:29 -07001206 new_fa->fa_tos = fa->fa_tos;
1207 new_fa->fa_info = fi;
Thomas Graf4e902c52006-08-17 18:14:52 -07001208 new_fa->fa_type = cfg->fc_type;
1209 new_fa->fa_scope = cfg->fc_scope;
Robert Olsson19baf832005-06-21 12:43:18 -07001210 state = fa->fa_state;
Robert Olsson2373ce12005-08-25 13:01:29 -07001211 new_fa->fa_state &= ~FA_S_ACCESSED;
Robert Olsson19baf832005-06-21 12:43:18 -07001212
Robert Olsson2373ce12005-08-25 13:01:29 -07001213 list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
1214 alias_free_mem_rcu(fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001215
1216 fib_release_info(fi_drop);
1217 if (state & FA_S_ACCESSED)
Olof Johansson91b9a272005-08-09 20:24:39 -07001218 rt_cache_flush(-1);
Milan Kocianb8f55832007-05-23 14:55:06 -07001219 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
1220 tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
Robert Olsson19baf832005-06-21 12:43:18 -07001221
Olof Johansson91b9a272005-08-09 20:24:39 -07001222 goto succeeded;
Robert Olsson19baf832005-06-21 12:43:18 -07001223 }
1224 /* Error if we find a perfect match which
1225 * uses the same scope, type, and nexthop
1226 * information.
1227 */
1228 fa_orig = fa;
1229 list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) {
1230 if (fa->fa_tos != tos)
1231 break;
1232 if (fa->fa_info->fib_priority != fi->fib_priority)
1233 break;
Thomas Graf4e902c52006-08-17 18:14:52 -07001234 if (fa->fa_type == cfg->fc_type &&
1235 fa->fa_scope == cfg->fc_scope &&
Robert Olsson19baf832005-06-21 12:43:18 -07001236 fa->fa_info == fi) {
1237 goto out;
1238 }
1239 }
Thomas Graf4e902c52006-08-17 18:14:52 -07001240 if (!(cfg->fc_nlflags & NLM_F_APPEND))
Robert Olsson19baf832005-06-21 12:43:18 -07001241 fa = fa_orig;
1242 }
1243 err = -ENOENT;
Thomas Graf4e902c52006-08-17 18:14:52 -07001244 if (!(cfg->fc_nlflags & NLM_F_CREATE))
Robert Olsson19baf832005-06-21 12:43:18 -07001245 goto out;
1246
1247 err = -ENOBUFS;
Christoph Lametere94b1762006-12-06 20:33:17 -08001248 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
Robert Olsson19baf832005-06-21 12:43:18 -07001249 if (new_fa == NULL)
1250 goto out;
1251
1252 new_fa->fa_info = fi;
1253 new_fa->fa_tos = tos;
Thomas Graf4e902c52006-08-17 18:14:52 -07001254 new_fa->fa_type = cfg->fc_type;
1255 new_fa->fa_scope = cfg->fc_scope;
Robert Olsson19baf832005-06-21 12:43:18 -07001256 new_fa->fa_state = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001257 /*
1258 * Insert new entry to the list.
1259 */
1260
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001261 if (!fa_head) {
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001262 fa_head = fib_insert_node(t, key, plen);
1263 if (unlikely(!fa_head)) {
1264 err = -ENOMEM;
Robert Olssonf835e472005-06-28 15:00:39 -07001265 goto out_free_new_fa;
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001266 }
Robert Olssonf835e472005-06-28 15:00:39 -07001267 }
Robert Olsson19baf832005-06-21 12:43:18 -07001268
Robert Olsson2373ce12005-08-25 13:01:29 -07001269 list_add_tail_rcu(&new_fa->fa_list,
1270 (fa ? &fa->fa_list : fa_head));
Robert Olsson19baf832005-06-21 12:43:18 -07001271
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08001272 t->size++;
1273
Robert Olsson19baf832005-06-21 12:43:18 -07001274 rt_cache_flush(-1);
Thomas Graf4e902c52006-08-17 18:14:52 -07001275 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
Milan Kocianb8f55832007-05-23 14:55:06 -07001276 &cfg->fc_nlinfo, 0);
Robert Olsson19baf832005-06-21 12:43:18 -07001277succeeded:
1278 return 0;
Robert Olssonf835e472005-06-28 15:00:39 -07001279
1280out_free_new_fa:
1281 kmem_cache_free(fn_alias_kmem, new_fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001282out:
1283 fib_release_info(fi);
Olof Johansson91b9a272005-08-09 20:24:39 -07001284err:
Robert Olsson19baf832005-06-21 12:43:18 -07001285 return err;
1286}
1287
Robert Olsson2373ce12005-08-25 13:01:29 -07001288
Robert Olsson772cb712005-09-19 15:31:18 -07001289/* should be called with rcu_read_lock */
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001290static inline int check_leaf(struct trie *t, struct leaf *l,
1291 t_key key, int *plen, const struct flowi *flp,
Patrick McHardy06c74272005-08-23 22:06:09 -07001292 struct fib_result *res)
Robert Olsson19baf832005-06-21 12:43:18 -07001293{
Patrick McHardy06c74272005-08-23 22:06:09 -07001294 int err, i;
Al Viro888454c2006-09-19 13:42:46 -07001295 __be32 mask;
Robert Olsson19baf832005-06-21 12:43:18 -07001296 struct leaf_info *li;
1297 struct hlist_head *hhead = &l->list;
1298 struct hlist_node *node;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001299
Robert Olsson2373ce12005-08-25 13:01:29 -07001300 hlist_for_each_entry_rcu(li, node, hhead, hlist) {
Robert Olsson19baf832005-06-21 12:43:18 -07001301 i = li->plen;
Al Viro888454c2006-09-19 13:42:46 -07001302 mask = inet_make_mask(i);
1303 if (l->key != (key & ntohl(mask)))
Robert Olsson19baf832005-06-21 12:43:18 -07001304 continue;
1305
Al Viro888454c2006-09-19 13:42:46 -07001306 if ((err = fib_semantic_match(&li->falh, flp, res, htonl(l->key), mask, i)) <= 0) {
Robert Olsson19baf832005-06-21 12:43:18 -07001307 *plen = i;
1308#ifdef CONFIG_IP_FIB_TRIE_STATS
1309 t->stats.semantic_match_passed++;
1310#endif
Patrick McHardy06c74272005-08-23 22:06:09 -07001311 return err;
Robert Olsson19baf832005-06-21 12:43:18 -07001312 }
1313#ifdef CONFIG_IP_FIB_TRIE_STATS
1314 t->stats.semantic_match_miss++;
1315#endif
1316 }
Patrick McHardy06c74272005-08-23 22:06:09 -07001317 return 1;
Robert Olsson19baf832005-06-21 12:43:18 -07001318}
1319
1320static int
1321fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1322{
1323 struct trie *t = (struct trie *) tb->tb_data;
1324 int plen, ret = 0;
1325 struct node *n;
1326 struct tnode *pn;
1327 int pos, bits;
Olof Johansson91b9a272005-08-09 20:24:39 -07001328 t_key key = ntohl(flp->fl4_dst);
Robert Olsson19baf832005-06-21 12:43:18 -07001329 int chopped_off;
1330 t_key cindex = 0;
1331 int current_prefix_length = KEYLENGTH;
Olof Johansson91b9a272005-08-09 20:24:39 -07001332 struct tnode *cn;
1333 t_key node_prefix, key_prefix, pref_mismatch;
1334 int mp;
1335
Robert Olsson2373ce12005-08-25 13:01:29 -07001336 rcu_read_lock();
Robert Olsson19baf832005-06-21 12:43:18 -07001337
Robert Olsson2373ce12005-08-25 13:01:29 -07001338 n = rcu_dereference(t->trie);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001339 if (!n)
Robert Olsson19baf832005-06-21 12:43:18 -07001340 goto failed;
1341
1342#ifdef CONFIG_IP_FIB_TRIE_STATS
1343 t->stats.gets++;
1344#endif
1345
1346 /* Just a leaf? */
1347 if (IS_LEAF(n)) {
Patrick McHardy06c74272005-08-23 22:06:09 -07001348 if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
Robert Olsson19baf832005-06-21 12:43:18 -07001349 goto found;
1350 goto failed;
1351 }
1352 pn = (struct tnode *) n;
1353 chopped_off = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001354
Olof Johansson91b9a272005-08-09 20:24:39 -07001355 while (pn) {
Robert Olsson19baf832005-06-21 12:43:18 -07001356 pos = pn->pos;
1357 bits = pn->bits;
1358
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001359 if (!chopped_off)
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -07001360 cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length),
1361 pos, bits);
Robert Olsson19baf832005-06-21 12:43:18 -07001362
1363 n = tnode_get_child(pn, cindex);
1364
1365 if (n == NULL) {
1366#ifdef CONFIG_IP_FIB_TRIE_STATS
1367 t->stats.null_node_hit++;
1368#endif
1369 goto backtrace;
1370 }
1371
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001372 if (IS_LEAF(n)) {
Patrick McHardy06c74272005-08-23 22:06:09 -07001373 if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
Robert Olsson19baf832005-06-21 12:43:18 -07001374 goto found;
Olof Johansson91b9a272005-08-09 20:24:39 -07001375 else
1376 goto backtrace;
1377 }
1378
1379#define HL_OPTIMIZE
1380#ifdef HL_OPTIMIZE
1381 cn = (struct tnode *)n;
1382
1383 /*
1384 * It's a tnode, and we can do some extra checks here if we
1385 * like, to avoid descending into a dead-end branch.
1386 * This tnode is in the parent's child array at index
1387 * key[p_pos..p_pos+p_bits] but potentially with some bits
1388 * chopped off, so in reality the index may be just a
1389 * subprefix, padded with zero at the end.
1390 * We can also take a look at any skipped bits in this
1391 * tnode - everything up to p_pos is supposed to be ok,
1392 * and the non-chopped bits of the index (se previous
1393 * paragraph) are also guaranteed ok, but the rest is
1394 * considered unknown.
1395 *
1396 * The skipped bits are key[pos+bits..cn->pos].
1397 */
1398
1399 /* If current_prefix_length < pos+bits, we are already doing
1400 * actual prefix matching, which means everything from
1401 * pos+(bits-chopped_off) onward must be zero along some
1402 * branch of this subtree - otherwise there is *no* valid
1403 * prefix present. Here we can only check the skipped
1404 * bits. Remember, since we have already indexed into the
1405 * parent's child array, we know that the bits we chopped of
1406 * *are* zero.
1407 */
1408
1409 /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */
1410
1411 if (current_prefix_length < pos+bits) {
1412 if (tkey_extract_bits(cn->key, current_prefix_length,
1413 cn->pos - current_prefix_length) != 0 ||
1414 !(cn->child[0]))
1415 goto backtrace;
1416 }
1417
1418 /*
1419 * If chopped_off=0, the index is fully validated and we
1420 * only need to look at the skipped bits for this, the new,
1421 * tnode. What we actually want to do is to find out if
1422 * these skipped bits match our key perfectly, or if we will
1423 * have to count on finding a matching prefix further down,
1424 * because if we do, we would like to have some way of
1425 * verifying the existence of such a prefix at this point.
1426 */
1427
1428 /* The only thing we can do at this point is to verify that
1429 * any such matching prefix can indeed be a prefix to our
1430 * key, and if the bits in the node we are inspecting that
1431 * do not match our key are not ZERO, this cannot be true.
1432 * Thus, find out where there is a mismatch (before cn->pos)
1433 * and verify that all the mismatching bits are zero in the
1434 * new tnode's key.
1435 */
1436
1437 /* Note: We aren't very concerned about the piece of the key
1438 * that precede pn->pos+pn->bits, since these have already been
1439 * checked. The bits after cn->pos aren't checked since these are
1440 * by definition "unknown" at this point. Thus, what we want to
1441 * see is if we are about to enter the "prefix matching" state,
1442 * and in that case verify that the skipped bits that will prevail
1443 * throughout this subtree are zero, as they have to be if we are
1444 * to find a matching prefix.
1445 */
1446
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -07001447 node_prefix = mask_pfx(cn->key, cn->pos);
1448 key_prefix = mask_pfx(key, cn->pos);
Olof Johansson91b9a272005-08-09 20:24:39 -07001449 pref_mismatch = key_prefix^node_prefix;
1450 mp = 0;
1451
1452 /* In short: If skipped bits in this node do not match the search
1453 * key, enter the "prefix matching" state.directly.
1454 */
1455 if (pref_mismatch) {
1456 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
1457 mp++;
1458 pref_mismatch = pref_mismatch <<1;
1459 }
1460 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
1461
1462 if (key_prefix != 0)
1463 goto backtrace;
1464
1465 if (current_prefix_length >= cn->pos)
1466 current_prefix_length = mp;
1467 }
1468#endif
1469 pn = (struct tnode *)n; /* Descend */
1470 chopped_off = 0;
1471 continue;
1472
Robert Olsson19baf832005-06-21 12:43:18 -07001473backtrace:
1474 chopped_off++;
1475
1476 /* As zero don't change the child key (cindex) */
Olof Johansson91b9a272005-08-09 20:24:39 -07001477 while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1))))
Robert Olsson19baf832005-06-21 12:43:18 -07001478 chopped_off++;
Robert Olsson19baf832005-06-21 12:43:18 -07001479
1480 /* Decrease current_... with bits chopped off */
1481 if (current_prefix_length > pn->pos + pn->bits - chopped_off)
1482 current_prefix_length = pn->pos + pn->bits - chopped_off;
Olof Johansson91b9a272005-08-09 20:24:39 -07001483
Robert Olsson19baf832005-06-21 12:43:18 -07001484 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001485 * Either we do the actual chop off according or if we have
Robert Olsson19baf832005-06-21 12:43:18 -07001486 * chopped off all bits in this tnode walk up to our parent.
1487 */
1488
Olof Johansson91b9a272005-08-09 20:24:39 -07001489 if (chopped_off <= pn->bits) {
Robert Olsson19baf832005-06-21 12:43:18 -07001490 cindex &= ~(1 << (chopped_off-1));
Olof Johansson91b9a272005-08-09 20:24:39 -07001491 } else {
Stephen Hemminger06801912007-08-10 15:22:13 -07001492 struct tnode *parent = node_parent((struct node *) pn);
1493 if (!parent)
Robert Olsson19baf832005-06-21 12:43:18 -07001494 goto failed;
Olof Johansson91b9a272005-08-09 20:24:39 -07001495
Robert Olsson19baf832005-06-21 12:43:18 -07001496 /* Get Child's index */
Stephen Hemminger06801912007-08-10 15:22:13 -07001497 cindex = tkey_extract_bits(pn->key, parent->pos, parent->bits);
1498 pn = parent;
Robert Olsson19baf832005-06-21 12:43:18 -07001499 chopped_off = 0;
1500
1501#ifdef CONFIG_IP_FIB_TRIE_STATS
1502 t->stats.backtrack++;
1503#endif
1504 goto backtrace;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001505 }
Robert Olsson19baf832005-06-21 12:43:18 -07001506 }
1507failed:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001508 ret = 1;
Robert Olsson19baf832005-06-21 12:43:18 -07001509found:
Robert Olsson2373ce12005-08-25 13:01:29 -07001510 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07001511 return ret;
1512}
1513
Robert Olsson2373ce12005-08-25 13:01:29 -07001514/* only called from updater side */
Robert Olsson19baf832005-06-21 12:43:18 -07001515static int trie_leaf_remove(struct trie *t, t_key key)
1516{
1517 t_key cindex;
1518 struct tnode *tp = NULL;
1519 struct node *n = t->trie;
1520 struct leaf *l;
1521
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001522 pr_debug("entering trie_leaf_remove(%p)\n", n);
Robert Olsson19baf832005-06-21 12:43:18 -07001523
1524 /* Note that in the case skipped bits, those bits are *not* checked!
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001525 * When we finish this, we will have NULL or a T_LEAF, and the
Robert Olsson19baf832005-06-21 12:43:18 -07001526 * T_LEAF may or may not match our key.
1527 */
1528
Olof Johansson91b9a272005-08-09 20:24:39 -07001529 while (n != NULL && IS_TNODE(n)) {
Robert Olsson19baf832005-06-21 12:43:18 -07001530 struct tnode *tn = (struct tnode *) n;
1531 check_tnode(tn);
1532 n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits));
1533
Stephen Hemminger06801912007-08-10 15:22:13 -07001534 BUG_ON(n && node_parent(n) != tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001535 }
Robert Olsson19baf832005-06-21 12:43:18 -07001536 l = (struct leaf *) n;
1537
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001538 if (!n || !tkey_equals(l->key, key))
Robert Olsson19baf832005-06-21 12:43:18 -07001539 return 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001540
1541 /*
1542 * Key found.
1543 * Remove the leaf and rebalance the tree
Robert Olsson19baf832005-06-21 12:43:18 -07001544 */
1545
Robert Olsson19baf832005-06-21 12:43:18 -07001546 t->size--;
1547
Stephen Hemminger06801912007-08-10 15:22:13 -07001548 tp = node_parent(n);
Robert Olsson19baf832005-06-21 12:43:18 -07001549 tnode_free((struct tnode *) n);
1550
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001551 if (tp) {
Robert Olsson19baf832005-06-21 12:43:18 -07001552 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1553 put_child(t, (struct tnode *)tp, cindex, NULL);
Robert Olsson2373ce12005-08-25 13:01:29 -07001554 rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
Olof Johansson91b9a272005-08-09 20:24:39 -07001555 } else
Robert Olsson2373ce12005-08-25 13:01:29 -07001556 rcu_assign_pointer(t->trie, NULL);
Robert Olsson19baf832005-06-21 12:43:18 -07001557
1558 return 1;
1559}
1560
Robert Olssond562f1f2007-03-26 14:22:22 -07001561/*
1562 * Caller must hold RTNL.
1563 */
Thomas Graf4e902c52006-08-17 18:14:52 -07001564static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
Robert Olsson19baf832005-06-21 12:43:18 -07001565{
1566 struct trie *t = (struct trie *) tb->tb_data;
1567 u32 key, mask;
Thomas Graf4e902c52006-08-17 18:14:52 -07001568 int plen = cfg->fc_dst_len;
1569 u8 tos = cfg->fc_tos;
Robert Olsson19baf832005-06-21 12:43:18 -07001570 struct fib_alias *fa, *fa_to_delete;
1571 struct list_head *fa_head;
1572 struct leaf *l;
Olof Johansson91b9a272005-08-09 20:24:39 -07001573 struct leaf_info *li;
1574
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001575 if (plen > 32)
Robert Olsson19baf832005-06-21 12:43:18 -07001576 return -EINVAL;
1577
Thomas Graf4e902c52006-08-17 18:14:52 -07001578 key = ntohl(cfg->fc_dst);
Olof Johansson91b9a272005-08-09 20:24:39 -07001579 mask = ntohl(inet_make_mask(plen));
Robert Olsson19baf832005-06-21 12:43:18 -07001580
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001581 if (key & ~mask)
Robert Olsson19baf832005-06-21 12:43:18 -07001582 return -EINVAL;
1583
1584 key = key & mask;
1585 l = fib_find_node(t, key);
1586
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001587 if (!l)
Robert Olsson19baf832005-06-21 12:43:18 -07001588 return -ESRCH;
1589
1590 fa_head = get_fa_head(l, plen);
1591 fa = fib_find_alias(fa_head, tos, 0);
1592
1593 if (!fa)
1594 return -ESRCH;
1595
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001596 pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
Robert Olsson19baf832005-06-21 12:43:18 -07001597
1598 fa_to_delete = NULL;
1599 fa_head = fa->fa_list.prev;
Robert Olsson2373ce12005-08-25 13:01:29 -07001600
Robert Olsson19baf832005-06-21 12:43:18 -07001601 list_for_each_entry(fa, fa_head, fa_list) {
1602 struct fib_info *fi = fa->fa_info;
1603
1604 if (fa->fa_tos != tos)
1605 break;
1606
Thomas Graf4e902c52006-08-17 18:14:52 -07001607 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
1608 (cfg->fc_scope == RT_SCOPE_NOWHERE ||
1609 fa->fa_scope == cfg->fc_scope) &&
1610 (!cfg->fc_protocol ||
1611 fi->fib_protocol == cfg->fc_protocol) &&
1612 fib_nh_match(cfg, fi) == 0) {
Robert Olsson19baf832005-06-21 12:43:18 -07001613 fa_to_delete = fa;
1614 break;
1615 }
1616 }
1617
Olof Johansson91b9a272005-08-09 20:24:39 -07001618 if (!fa_to_delete)
1619 return -ESRCH;
Robert Olsson19baf832005-06-21 12:43:18 -07001620
Olof Johansson91b9a272005-08-09 20:24:39 -07001621 fa = fa_to_delete;
Thomas Graf4e902c52006-08-17 18:14:52 -07001622 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
Milan Kocianb8f55832007-05-23 14:55:06 -07001623 &cfg->fc_nlinfo, 0);
Robert Olsson19baf832005-06-21 12:43:18 -07001624
Olof Johansson91b9a272005-08-09 20:24:39 -07001625 l = fib_find_node(t, key);
Robert Olsson772cb712005-09-19 15:31:18 -07001626 li = find_leaf_info(l, plen);
Robert Olsson19baf832005-06-21 12:43:18 -07001627
Robert Olsson2373ce12005-08-25 13:01:29 -07001628 list_del_rcu(&fa->fa_list);
Robert Olsson19baf832005-06-21 12:43:18 -07001629
Olof Johansson91b9a272005-08-09 20:24:39 -07001630 if (list_empty(fa_head)) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001631 hlist_del_rcu(&li->hlist);
Olof Johansson91b9a272005-08-09 20:24:39 -07001632 free_leaf_info(li);
Robert Olsson2373ce12005-08-25 13:01:29 -07001633 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001634
1635 if (hlist_empty(&l->list))
1636 trie_leaf_remove(t, key);
1637
1638 if (fa->fa_state & FA_S_ACCESSED)
1639 rt_cache_flush(-1);
1640
Robert Olsson2373ce12005-08-25 13:01:29 -07001641 fib_release_info(fa->fa_info);
1642 alias_free_mem_rcu(fa);
Olof Johansson91b9a272005-08-09 20:24:39 -07001643 return 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001644}
1645
1646static int trie_flush_list(struct trie *t, struct list_head *head)
1647{
1648 struct fib_alias *fa, *fa_node;
1649 int found = 0;
1650
1651 list_for_each_entry_safe(fa, fa_node, head, fa_list) {
1652 struct fib_info *fi = fa->fa_info;
Robert Olsson19baf832005-06-21 12:43:18 -07001653
Robert Olsson2373ce12005-08-25 13:01:29 -07001654 if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
1655 list_del_rcu(&fa->fa_list);
1656 fib_release_info(fa->fa_info);
1657 alias_free_mem_rcu(fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001658 found++;
1659 }
1660 }
1661 return found;
1662}
1663
1664static int trie_flush_leaf(struct trie *t, struct leaf *l)
1665{
1666 int found = 0;
1667 struct hlist_head *lih = &l->list;
1668 struct hlist_node *node, *tmp;
1669 struct leaf_info *li = NULL;
1670
1671 hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
Robert Olsson19baf832005-06-21 12:43:18 -07001672 found += trie_flush_list(t, &li->falh);
1673
1674 if (list_empty(&li->falh)) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001675 hlist_del_rcu(&li->hlist);
Robert Olsson19baf832005-06-21 12:43:18 -07001676 free_leaf_info(li);
1677 }
1678 }
1679 return found;
1680}
1681
Robert Olsson2373ce12005-08-25 13:01:29 -07001682/* rcu_read_lock needs to be hold by caller from readside */
1683
Robert Olsson19baf832005-06-21 12:43:18 -07001684static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
1685{
1686 struct node *c = (struct node *) thisleaf;
1687 struct tnode *p;
1688 int idx;
Robert Olsson2373ce12005-08-25 13:01:29 -07001689 struct node *trie = rcu_dereference(t->trie);
Robert Olsson19baf832005-06-21 12:43:18 -07001690
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001691 if (c == NULL) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001692 if (trie == NULL)
Robert Olsson19baf832005-06-21 12:43:18 -07001693 return NULL;
1694
Robert Olsson2373ce12005-08-25 13:01:29 -07001695 if (IS_LEAF(trie)) /* trie w. just a leaf */
1696 return (struct leaf *) trie;
Robert Olsson19baf832005-06-21 12:43:18 -07001697
Robert Olsson2373ce12005-08-25 13:01:29 -07001698 p = (struct tnode*) trie; /* Start */
Olof Johansson91b9a272005-08-09 20:24:39 -07001699 } else
Eric Dumazetb59cfbf2008-01-18 03:31:36 -08001700 p = node_parent_rcu(c);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001701
Robert Olsson19baf832005-06-21 12:43:18 -07001702 while (p) {
1703 int pos, last;
1704
1705 /* Find the next child of the parent */
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001706 if (c)
1707 pos = 1 + tkey_extract_bits(c->key, p->pos, p->bits);
1708 else
Robert Olsson19baf832005-06-21 12:43:18 -07001709 pos = 0;
1710
1711 last = 1 << p->bits;
Olof Johansson91b9a272005-08-09 20:24:39 -07001712 for (idx = pos; idx < last ; idx++) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001713 c = rcu_dereference(p->child[idx]);
1714
1715 if (!c)
Olof Johansson91b9a272005-08-09 20:24:39 -07001716 continue;
Robert Olsson19baf832005-06-21 12:43:18 -07001717
Olof Johansson91b9a272005-08-09 20:24:39 -07001718 /* Decend if tnode */
Robert Olsson2373ce12005-08-25 13:01:29 -07001719 while (IS_TNODE(c)) {
1720 p = (struct tnode *) c;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001721 idx = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001722
Olof Johansson91b9a272005-08-09 20:24:39 -07001723 /* Rightmost non-NULL branch */
1724 if (p && IS_TNODE(p))
Robert Olsson2373ce12005-08-25 13:01:29 -07001725 while (!(c = rcu_dereference(p->child[idx]))
1726 && idx < (1<<p->bits)) idx++;
Robert Olsson19baf832005-06-21 12:43:18 -07001727
Olof Johansson91b9a272005-08-09 20:24:39 -07001728 /* Done with this tnode? */
Robert Olsson2373ce12005-08-25 13:01:29 -07001729 if (idx >= (1 << p->bits) || !c)
Olof Johansson91b9a272005-08-09 20:24:39 -07001730 goto up;
Robert Olsson19baf832005-06-21 12:43:18 -07001731 }
Robert Olsson2373ce12005-08-25 13:01:29 -07001732 return (struct leaf *) c;
Robert Olsson19baf832005-06-21 12:43:18 -07001733 }
1734up:
1735 /* No more children go up one step */
Olof Johansson91b9a272005-08-09 20:24:39 -07001736 c = (struct node *) p;
Eric Dumazetb59cfbf2008-01-18 03:31:36 -08001737 p = node_parent_rcu(c);
Robert Olsson19baf832005-06-21 12:43:18 -07001738 }
1739 return NULL; /* Ready. Root of trie */
1740}
1741
Robert Olssond562f1f2007-03-26 14:22:22 -07001742/*
1743 * Caller must hold RTNL.
1744 */
Robert Olsson19baf832005-06-21 12:43:18 -07001745static int fn_trie_flush(struct fib_table *tb)
1746{
1747 struct trie *t = (struct trie *) tb->tb_data;
1748 struct leaf *ll = NULL, *l = NULL;
1749 int found = 0, h;
1750
Olof Johansson91b9a272005-08-09 20:24:39 -07001751 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
Robert Olsson19baf832005-06-21 12:43:18 -07001752 found += trie_flush_leaf(t, l);
1753
1754 if (ll && hlist_empty(&ll->list))
1755 trie_leaf_remove(t, ll->key);
1756 ll = l;
1757 }
1758
1759 if (ll && hlist_empty(&ll->list))
1760 trie_leaf_remove(t, ll->key);
1761
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001762 pr_debug("trie_flush found=%d\n", found);
Robert Olsson19baf832005-06-21 12:43:18 -07001763 return found;
1764}
1765
Robert Olsson19baf832005-06-21 12:43:18 -07001766static void
1767fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1768{
1769 struct trie *t = (struct trie *) tb->tb_data;
1770 int order, last_idx;
1771 struct fib_info *fi = NULL;
1772 struct fib_info *last_resort;
1773 struct fib_alias *fa = NULL;
1774 struct list_head *fa_head;
1775 struct leaf *l;
1776
1777 last_idx = -1;
1778 last_resort = NULL;
1779 order = -1;
1780
Robert Olsson2373ce12005-08-25 13:01:29 -07001781 rcu_read_lock();
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001782
Robert Olsson19baf832005-06-21 12:43:18 -07001783 l = fib_find_node(t, 0);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001784 if (!l)
Robert Olsson19baf832005-06-21 12:43:18 -07001785 goto out;
1786
1787 fa_head = get_fa_head(l, 0);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001788 if (!fa_head)
Robert Olsson19baf832005-06-21 12:43:18 -07001789 goto out;
1790
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001791 if (list_empty(fa_head))
Robert Olsson19baf832005-06-21 12:43:18 -07001792 goto out;
1793
Robert Olsson2373ce12005-08-25 13:01:29 -07001794 list_for_each_entry_rcu(fa, fa_head, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001795 struct fib_info *next_fi = fa->fa_info;
Olof Johansson91b9a272005-08-09 20:24:39 -07001796
Robert Olsson19baf832005-06-21 12:43:18 -07001797 if (fa->fa_scope != res->scope ||
1798 fa->fa_type != RTN_UNICAST)
1799 continue;
Olof Johansson91b9a272005-08-09 20:24:39 -07001800
Robert Olsson19baf832005-06-21 12:43:18 -07001801 if (next_fi->fib_priority > res->fi->fib_priority)
1802 break;
1803 if (!next_fi->fib_nh[0].nh_gw ||
1804 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1805 continue;
1806 fa->fa_state |= FA_S_ACCESSED;
Olof Johansson91b9a272005-08-09 20:24:39 -07001807
Robert Olsson19baf832005-06-21 12:43:18 -07001808 if (fi == NULL) {
1809 if (next_fi != res->fi)
1810 break;
1811 } else if (!fib_detect_death(fi, order, &last_resort,
Denis V. Lunev971b8932007-12-08 00:32:23 -08001812 &last_idx, tb->tb_default)) {
Denis V. Luneva2bbe682007-12-08 00:31:44 -08001813 fib_result_assign(res, fi);
Denis V. Lunev971b8932007-12-08 00:32:23 -08001814 tb->tb_default = order;
Robert Olsson19baf832005-06-21 12:43:18 -07001815 goto out;
1816 }
1817 fi = next_fi;
1818 order++;
1819 }
1820 if (order <= 0 || fi == NULL) {
Denis V. Lunev971b8932007-12-08 00:32:23 -08001821 tb->tb_default = -1;
Robert Olsson19baf832005-06-21 12:43:18 -07001822 goto out;
1823 }
1824
Denis V. Lunev971b8932007-12-08 00:32:23 -08001825 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
1826 tb->tb_default)) {
Denis V. Luneva2bbe682007-12-08 00:31:44 -08001827 fib_result_assign(res, fi);
Denis V. Lunev971b8932007-12-08 00:32:23 -08001828 tb->tb_default = order;
Robert Olsson19baf832005-06-21 12:43:18 -07001829 goto out;
1830 }
Denis V. Luneva2bbe682007-12-08 00:31:44 -08001831 if (last_idx >= 0)
1832 fib_result_assign(res, last_resort);
Denis V. Lunev971b8932007-12-08 00:32:23 -08001833 tb->tb_default = last_idx;
1834out:
Robert Olsson2373ce12005-08-25 13:01:29 -07001835 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07001836}
1837
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001838static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb,
Robert Olsson19baf832005-06-21 12:43:18 -07001839 struct sk_buff *skb, struct netlink_callback *cb)
1840{
1841 int i, s_i;
1842 struct fib_alias *fa;
1843
Al Viro32ab5f82006-09-26 22:21:45 -07001844 __be32 xkey = htonl(key);
Robert Olsson19baf832005-06-21 12:43:18 -07001845
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001846 s_i = cb->args[4];
Robert Olsson19baf832005-06-21 12:43:18 -07001847 i = 0;
1848
Robert Olsson2373ce12005-08-25 13:01:29 -07001849 /* rcu_read_lock is hold by caller */
1850
1851 list_for_each_entry_rcu(fa, fah, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001852 if (i < s_i) {
1853 i++;
1854 continue;
1855 }
Stephen Hemminger78c66712005-09-21 00:15:39 -07001856 BUG_ON(!fa->fa_info);
Robert Olsson19baf832005-06-21 12:43:18 -07001857
1858 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
1859 cb->nlh->nlmsg_seq,
1860 RTM_NEWROUTE,
1861 tb->tb_id,
1862 fa->fa_type,
1863 fa->fa_scope,
Thomas Grafbe403ea2006-08-17 18:15:17 -07001864 xkey,
Robert Olsson19baf832005-06-21 12:43:18 -07001865 plen,
1866 fa->fa_tos,
David S. Miller90f66912005-06-21 14:43:28 -07001867 fa->fa_info, 0) < 0) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001868 cb->args[4] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001869 return -1;
Olof Johansson91b9a272005-08-09 20:24:39 -07001870 }
Robert Olsson19baf832005-06-21 12:43:18 -07001871 i++;
1872 }
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001873 cb->args[4] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001874 return skb->len;
1875}
1876
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001877static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, struct sk_buff *skb,
Robert Olsson19baf832005-06-21 12:43:18 -07001878 struct netlink_callback *cb)
1879{
1880 int h, s_h;
1881 struct list_head *fa_head;
1882 struct leaf *l = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001883
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001884 s_h = cb->args[3];
Robert Olsson19baf832005-06-21 12:43:18 -07001885
Olof Johansson91b9a272005-08-09 20:24:39 -07001886 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
Robert Olsson19baf832005-06-21 12:43:18 -07001887 if (h < s_h)
1888 continue;
1889 if (h > s_h)
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001890 memset(&cb->args[4], 0,
1891 sizeof(cb->args) - 4*sizeof(cb->args[0]));
Robert Olsson19baf832005-06-21 12:43:18 -07001892
1893 fa_head = get_fa_head(l, plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001894
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001895 if (!fa_head)
Robert Olsson19baf832005-06-21 12:43:18 -07001896 continue;
1897
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001898 if (list_empty(fa_head))
Robert Olsson19baf832005-06-21 12:43:18 -07001899 continue;
1900
1901 if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001902 cb->args[3] = h;
Robert Olsson19baf832005-06-21 12:43:18 -07001903 return -1;
1904 }
1905 }
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001906 cb->args[3] = h;
Robert Olsson19baf832005-06-21 12:43:18 -07001907 return skb->len;
1908}
1909
1910static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
1911{
1912 int m, s_m;
1913 struct trie *t = (struct trie *) tb->tb_data;
1914
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001915 s_m = cb->args[2];
Robert Olsson19baf832005-06-21 12:43:18 -07001916
Robert Olsson2373ce12005-08-25 13:01:29 -07001917 rcu_read_lock();
Olof Johansson91b9a272005-08-09 20:24:39 -07001918 for (m = 0; m <= 32; m++) {
Robert Olsson19baf832005-06-21 12:43:18 -07001919 if (m < s_m)
1920 continue;
1921 if (m > s_m)
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001922 memset(&cb->args[3], 0,
1923 sizeof(cb->args) - 3*sizeof(cb->args[0]));
Robert Olsson19baf832005-06-21 12:43:18 -07001924
1925 if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001926 cb->args[2] = m;
Robert Olsson19baf832005-06-21 12:43:18 -07001927 goto out;
1928 }
1929 }
Robert Olsson2373ce12005-08-25 13:01:29 -07001930 rcu_read_unlock();
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001931 cb->args[2] = m;
Robert Olsson19baf832005-06-21 12:43:18 -07001932 return skb->len;
Olof Johansson91b9a272005-08-09 20:24:39 -07001933out:
Robert Olsson2373ce12005-08-25 13:01:29 -07001934 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07001935 return -1;
1936}
1937
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08001938void __init fib_hash_init(void)
1939{
1940 fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias),
1941 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
1942}
Robert Olsson19baf832005-06-21 12:43:18 -07001943
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08001944
1945/* Fix more generic FIB names for init later */
1946struct fib_table *fib_hash_table(u32 id)
Robert Olsson19baf832005-06-21 12:43:18 -07001947{
1948 struct fib_table *tb;
1949 struct trie *t;
1950
Robert Olsson19baf832005-06-21 12:43:18 -07001951 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
1952 GFP_KERNEL);
1953 if (tb == NULL)
1954 return NULL;
1955
1956 tb->tb_id = id;
Denis V. Lunev971b8932007-12-08 00:32:23 -08001957 tb->tb_default = -1;
Robert Olsson19baf832005-06-21 12:43:18 -07001958 tb->tb_lookup = fn_trie_lookup;
1959 tb->tb_insert = fn_trie_insert;
1960 tb->tb_delete = fn_trie_delete;
1961 tb->tb_flush = fn_trie_flush;
1962 tb->tb_select_default = fn_trie_select_default;
1963 tb->tb_dump = fn_trie_dump;
Robert Olsson19baf832005-06-21 12:43:18 -07001964
1965 t = (struct trie *) tb->tb_data;
Stephen Hemmingerc28a1cf2008-01-12 20:49:13 -08001966 memset(t, 0, sizeof(*t));
Robert Olsson19baf832005-06-21 12:43:18 -07001967
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001968 if (id == RT_TABLE_LOCAL)
Stephen Hemminger78c66712005-09-21 00:15:39 -07001969 printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION);
Robert Olsson19baf832005-06-21 12:43:18 -07001970
1971 return tb;
1972}
1973
Robert Olsson19baf832005-06-21 12:43:18 -07001974#ifdef CONFIG_PROC_FS
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001975/* Depth first Trie walk iterator */
1976struct fib_trie_iter {
Denis V. Lunev1c340b22008-01-10 03:27:17 -08001977 struct seq_net_private p;
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08001978 struct trie *trie_local, *trie_main;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001979 struct tnode *tnode;
1980 struct trie *trie;
1981 unsigned index;
1982 unsigned depth;
1983};
Robert Olsson19baf832005-06-21 12:43:18 -07001984
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001985static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
Robert Olsson19baf832005-06-21 12:43:18 -07001986{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001987 struct tnode *tn = iter->tnode;
1988 unsigned cindex = iter->index;
1989 struct tnode *p;
1990
Eric W. Biederman6640e692007-01-24 14:42:04 -08001991 /* A single entry routing table */
1992 if (!tn)
1993 return NULL;
1994
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001995 pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
1996 iter->tnode, iter->index, iter->depth);
1997rescan:
1998 while (cindex < (1<<tn->bits)) {
Eric Dumazetb59cfbf2008-01-18 03:31:36 -08001999 struct node *n = tnode_get_child_rcu(tn, cindex);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002000
2001 if (n) {
2002 if (IS_LEAF(n)) {
2003 iter->tnode = tn;
2004 iter->index = cindex + 1;
2005 } else {
2006 /* push down one level */
2007 iter->tnode = (struct tnode *) n;
2008 iter->index = 0;
2009 ++iter->depth;
2010 }
2011 return n;
2012 }
2013
2014 ++cindex;
2015 }
2016
2017 /* Current node exhausted, pop back up */
Eric Dumazetb59cfbf2008-01-18 03:31:36 -08002018 p = node_parent_rcu((struct node *)tn);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002019 if (p) {
2020 cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
2021 tn = p;
2022 --iter->depth;
2023 goto rescan;
2024 }
2025
2026 /* got root? */
Robert Olsson19baf832005-06-21 12:43:18 -07002027 return NULL;
2028}
2029
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002030static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
2031 struct trie *t)
Robert Olsson19baf832005-06-21 12:43:18 -07002032{
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08002033 struct node *n ;
2034
Stephen Hemminger132adf52007-03-08 20:44:43 -08002035 if (!t)
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08002036 return NULL;
2037
2038 n = rcu_dereference(t->trie);
2039
Stephen Hemminger132adf52007-03-08 20:44:43 -08002040 if (!iter)
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08002041 return NULL;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002042
Eric W. Biederman6640e692007-01-24 14:42:04 -08002043 if (n) {
2044 if (IS_TNODE(n)) {
2045 iter->tnode = (struct tnode *) n;
2046 iter->trie = t;
2047 iter->index = 0;
2048 iter->depth = 1;
2049 } else {
2050 iter->tnode = NULL;
2051 iter->trie = t;
2052 iter->index = 0;
2053 iter->depth = 0;
2054 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002055 return n;
2056 }
Robert Olsson19baf832005-06-21 12:43:18 -07002057 return NULL;
2058}
2059
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002060static void trie_collect_stats(struct trie *t, struct trie_stat *s)
Robert Olsson19baf832005-06-21 12:43:18 -07002061{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002062 struct node *n;
2063 struct fib_trie_iter iter;
Robert Olsson19baf832005-06-21 12:43:18 -07002064
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002065 memset(s, 0, sizeof(*s));
Robert Olsson19baf832005-06-21 12:43:18 -07002066
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002067 rcu_read_lock();
2068 for (n = fib_trie_get_first(&iter, t); n;
2069 n = fib_trie_get_next(&iter)) {
2070 if (IS_LEAF(n)) {
2071 s->leaves++;
2072 s->totdepth += iter.depth;
2073 if (iter.depth > s->maxdepth)
2074 s->maxdepth = iter.depth;
2075 } else {
2076 const struct tnode *tn = (const struct tnode *) n;
2077 int i;
Robert Olsson19baf832005-06-21 12:43:18 -07002078
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002079 s->tnodes++;
Stephen Hemminger132adf52007-03-08 20:44:43 -08002080 if (tn->bits < MAX_STAT_DEPTH)
Robert Olsson06ef9212006-03-20 21:35:01 -08002081 s->nodesizes[tn->bits]++;
2082
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002083 for (i = 0; i < (1<<tn->bits); i++)
2084 if (!tn->child[i])
2085 s->nullpointers++;
2086 }
2087 }
2088 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07002089}
2090
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002091/*
Robert Olsson19baf832005-06-21 12:43:18 -07002092 * This outputs /proc/net/fib_triestats
Robert Olsson19baf832005-06-21 12:43:18 -07002093 */
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002094static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
Robert Olsson19baf832005-06-21 12:43:18 -07002095{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002096 unsigned i, max, pointers, bytes, avdepth;
Robert Olsson19baf832005-06-21 12:43:18 -07002097
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002098 if (stat->leaves)
2099 avdepth = stat->totdepth*100 / stat->leaves;
2100 else
2101 avdepth = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07002102
Stephen Hemminger187b5182008-01-12 20:55:55 -08002103 seq_printf(seq, "\tAver depth: %u.%02d\n", avdepth / 100, avdepth % 100 );
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002104 seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth);
Robert Olsson19baf832005-06-21 12:43:18 -07002105
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002106 seq_printf(seq, "\tLeaves: %u\n", stat->leaves);
Olof Johansson91b9a272005-08-09 20:24:39 -07002107
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002108 bytes = sizeof(struct leaf) * stat->leaves;
Stephen Hemminger187b5182008-01-12 20:55:55 -08002109 seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002110 bytes += sizeof(struct tnode) * stat->tnodes;
Robert Olsson19baf832005-06-21 12:43:18 -07002111
Robert Olsson06ef9212006-03-20 21:35:01 -08002112 max = MAX_STAT_DEPTH;
2113 while (max > 0 && stat->nodesizes[max-1] == 0)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002114 max--;
Robert Olsson19baf832005-06-21 12:43:18 -07002115
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002116 pointers = 0;
2117 for (i = 1; i <= max; i++)
2118 if (stat->nodesizes[i] != 0) {
Stephen Hemminger187b5182008-01-12 20:55:55 -08002119 seq_printf(seq, " %u: %u", i, stat->nodesizes[i]);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002120 pointers += (1<<i) * stat->nodesizes[i];
2121 }
2122 seq_putc(seq, '\n');
Stephen Hemminger187b5182008-01-12 20:55:55 -08002123 seq_printf(seq, "\tPointers: %u\n", pointers);
Robert Olsson19baf832005-06-21 12:43:18 -07002124
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002125 bytes += sizeof(struct node *) * pointers;
Stephen Hemminger187b5182008-01-12 20:55:55 -08002126 seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
2127 seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024);
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002128}
Robert Olsson19baf832005-06-21 12:43:18 -07002129
2130#ifdef CONFIG_IP_FIB_TRIE_STATS
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002131static void trie_show_usage(struct seq_file *seq,
2132 const struct trie_use_stats *stats)
2133{
2134 seq_printf(seq, "\nCounters:\n---------\n");
2135 seq_printf(seq,"gets = %u\n", stats->gets);
2136 seq_printf(seq,"backtracks = %u\n", stats->backtrack);
2137 seq_printf(seq,"semantic match passed = %u\n", stats->semantic_match_passed);
2138 seq_printf(seq,"semantic match miss = %u\n", stats->semantic_match_miss);
2139 seq_printf(seq,"null node hit= %u\n", stats->null_node_hit);
2140 seq_printf(seq,"skipped node resize = %u\n\n", stats->resize_node_skipped);
Robert Olsson19baf832005-06-21 12:43:18 -07002141}
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002142#endif /* CONFIG_IP_FIB_TRIE_STATS */
2143
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002144static void fib_trie_show(struct seq_file *seq, const char *name, struct trie *trie)
2145{
2146 struct trie_stat stat;
2147
2148 seq_printf(seq, "%s: %d\n", name, trie->size);
2149 trie_collect_stats(trie, &stat);
2150 trie_show_stats(seq, &stat);
2151#ifdef CONFIG_IP_FIB_TRIE_STATS
2152 trie_show_usage(seq, &trie->stats);
2153#endif
2154}
Robert Olsson19baf832005-06-21 12:43:18 -07002155
2156static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2157{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002158 struct net *net = (struct net *)seq->private;
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08002159 struct fib_table *tb;
2160
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002161 seq_printf(seq,
2162 "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002163 sizeof(struct leaf), sizeof(struct tnode));
Olof Johansson91b9a272005-08-09 20:24:39 -07002164
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002165 tb = fib_get_table(net, RT_TABLE_LOCAL);
2166 if (tb)
2167 fib_trie_show(seq, "Local", (struct trie *) tb->tb_data);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002168
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002169 tb = fib_get_table(net, RT_TABLE_MAIN);
2170 if (tb)
2171 fib_trie_show(seq, "Main", (struct trie *) tb->tb_data);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002172
Robert Olsson19baf832005-06-21 12:43:18 -07002173 return 0;
2174}
2175
Robert Olsson19baf832005-06-21 12:43:18 -07002176static int fib_triestat_seq_open(struct inode *inode, struct file *file)
2177{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002178 int err;
2179 struct net *net;
2180
2181 net = get_proc_net(inode);
2182 if (net == NULL)
2183 return -ENXIO;
2184 err = single_open(file, fib_triestat_seq_show, net);
2185 if (err < 0) {
2186 put_net(net);
2187 return err;
2188 }
2189 return 0;
2190}
2191
2192static int fib_triestat_seq_release(struct inode *ino, struct file *f)
2193{
2194 struct seq_file *seq = f->private_data;
2195 put_net(seq->private);
2196 return single_release(ino, f);
Robert Olsson19baf832005-06-21 12:43:18 -07002197}
2198
Arjan van de Ven9a321442007-02-12 00:55:35 -08002199static const struct file_operations fib_triestat_fops = {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002200 .owner = THIS_MODULE,
2201 .open = fib_triestat_seq_open,
2202 .read = seq_read,
2203 .llseek = seq_lseek,
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002204 .release = fib_triestat_seq_release,
Robert Olsson19baf832005-06-21 12:43:18 -07002205};
2206
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002207static struct node *fib_trie_get_idx(struct fib_trie_iter *iter,
2208 loff_t pos)
Robert Olsson19baf832005-06-21 12:43:18 -07002209{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002210 loff_t idx = 0;
2211 struct node *n;
Robert Olsson19baf832005-06-21 12:43:18 -07002212
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08002213 for (n = fib_trie_get_first(iter, iter->trie_local);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002214 n; ++idx, n = fib_trie_get_next(iter)) {
2215 if (pos == idx)
2216 return n;
2217 }
Robert Olsson19baf832005-06-21 12:43:18 -07002218
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08002219 for (n = fib_trie_get_first(iter, iter->trie_main);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002220 n; ++idx, n = fib_trie_get_next(iter)) {
2221 if (pos == idx)
2222 return n;
2223 }
Robert Olsson19baf832005-06-21 12:43:18 -07002224 return NULL;
2225}
2226
2227static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -08002228 __acquires(RCU)
Robert Olsson19baf832005-06-21 12:43:18 -07002229{
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08002230 struct fib_trie_iter *iter = seq->private;
2231 struct fib_table *tb;
2232
2233 if (!iter->trie_local) {
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002234 tb = fib_get_table(iter->p.net, RT_TABLE_LOCAL);
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08002235 if (tb)
2236 iter->trie_local = (struct trie *) tb->tb_data;
2237 }
2238 if (!iter->trie_main) {
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002239 tb = fib_get_table(iter->p.net, RT_TABLE_MAIN);
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08002240 if (tb)
2241 iter->trie_main = (struct trie *) tb->tb_data;
2242 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002243 rcu_read_lock();
2244 if (*pos == 0)
Olof Johansson91b9a272005-08-09 20:24:39 -07002245 return SEQ_START_TOKEN;
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08002246 return fib_trie_get_idx(iter, *pos - 1);
Robert Olsson19baf832005-06-21 12:43:18 -07002247}
2248
2249static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2250{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002251 struct fib_trie_iter *iter = seq->private;
2252 void *l = v;
2253
Robert Olsson19baf832005-06-21 12:43:18 -07002254 ++*pos;
Olof Johansson91b9a272005-08-09 20:24:39 -07002255 if (v == SEQ_START_TOKEN)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002256 return fib_trie_get_idx(iter, 0);
Olof Johansson91b9a272005-08-09 20:24:39 -07002257
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002258 v = fib_trie_get_next(iter);
2259 BUG_ON(v == l);
2260 if (v)
2261 return v;
2262
2263 /* continue scan in next trie */
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08002264 if (iter->trie == iter->trie_local)
2265 return fib_trie_get_first(iter, iter->trie_main);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002266
2267 return NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07002268}
2269
2270static void fib_trie_seq_stop(struct seq_file *seq, void *v)
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -08002271 __releases(RCU)
Robert Olsson19baf832005-06-21 12:43:18 -07002272{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002273 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07002274}
2275
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002276static void seq_indent(struct seq_file *seq, int n)
2277{
2278 while (n-- > 0) seq_puts(seq, " ");
2279}
Robert Olsson19baf832005-06-21 12:43:18 -07002280
Eric Dumazet28d36e32008-01-14 23:09:56 -08002281static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002282{
Stephen Hemminger132adf52007-03-08 20:44:43 -08002283 switch (s) {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002284 case RT_SCOPE_UNIVERSE: return "universe";
2285 case RT_SCOPE_SITE: return "site";
2286 case RT_SCOPE_LINK: return "link";
2287 case RT_SCOPE_HOST: return "host";
2288 case RT_SCOPE_NOWHERE: return "nowhere";
2289 default:
Eric Dumazet28d36e32008-01-14 23:09:56 -08002290 snprintf(buf, len, "scope=%d", s);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002291 return buf;
2292 }
2293}
2294
2295static const char *rtn_type_names[__RTN_MAX] = {
2296 [RTN_UNSPEC] = "UNSPEC",
2297 [RTN_UNICAST] = "UNICAST",
2298 [RTN_LOCAL] = "LOCAL",
2299 [RTN_BROADCAST] = "BROADCAST",
2300 [RTN_ANYCAST] = "ANYCAST",
2301 [RTN_MULTICAST] = "MULTICAST",
2302 [RTN_BLACKHOLE] = "BLACKHOLE",
2303 [RTN_UNREACHABLE] = "UNREACHABLE",
2304 [RTN_PROHIBIT] = "PROHIBIT",
2305 [RTN_THROW] = "THROW",
2306 [RTN_NAT] = "NAT",
2307 [RTN_XRESOLVE] = "XRESOLVE",
2308};
2309
Eric Dumazet28d36e32008-01-14 23:09:56 -08002310static inline const char *rtn_type(char *buf, size_t len, unsigned t)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002311{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002312 if (t < __RTN_MAX && rtn_type_names[t])
2313 return rtn_type_names[t];
Eric Dumazet28d36e32008-01-14 23:09:56 -08002314 snprintf(buf, len, "type %u", t);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002315 return buf;
2316}
2317
2318/* Pretty print the trie */
Robert Olsson19baf832005-06-21 12:43:18 -07002319static int fib_trie_seq_show(struct seq_file *seq, void *v)
2320{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002321 const struct fib_trie_iter *iter = seq->private;
2322 struct node *n = v;
Robert Olsson19baf832005-06-21 12:43:18 -07002323
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002324 if (v == SEQ_START_TOKEN)
2325 return 0;
Robert Olsson19baf832005-06-21 12:43:18 -07002326
Eric Dumazetb59cfbf2008-01-18 03:31:36 -08002327 if (!node_parent_rcu(n)) {
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08002328 if (iter->trie == iter->trie_local)
Robert Olsson095b8502007-01-26 19:06:01 -08002329 seq_puts(seq, "<local>:\n");
2330 else
2331 seq_puts(seq, "<main>:\n");
2332 }
2333
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002334 if (IS_TNODE(n)) {
2335 struct tnode *tn = (struct tnode *) n;
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -07002336 __be32 prf = htonl(mask_pfx(tn->key, tn->pos));
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002337
Robert Olsson1d25cd62005-09-19 15:29:52 -07002338 seq_indent(seq, iter->depth-1);
2339 seq_printf(seq, " +-- %d.%d.%d.%d/%d %d %d %d\n",
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002340 NIPQUAD(prf), tn->pos, tn->bits, tn->full_children,
Robert Olsson1d25cd62005-09-19 15:29:52 -07002341 tn->empty_children);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002342
Olof Johansson91b9a272005-08-09 20:24:39 -07002343 } else {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002344 struct leaf *l = (struct leaf *) n;
2345 int i;
Al Viro32ab5f82006-09-26 22:21:45 -07002346 __be32 val = htonl(l->key);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002347
2348 seq_indent(seq, iter->depth);
2349 seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val));
2350 for (i = 32; i >= 0; i--) {
Robert Olsson772cb712005-09-19 15:31:18 -07002351 struct leaf_info *li = find_leaf_info(l, i);
Eric Dumazet28d36e32008-01-14 23:09:56 -08002352
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002353 if (li) {
2354 struct fib_alias *fa;
Eric Dumazet28d36e32008-01-14 23:09:56 -08002355
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002356 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
Eric Dumazet28d36e32008-01-14 23:09:56 -08002357 char buf1[32], buf2[32];
2358
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002359 seq_indent(seq, iter->depth+1);
2360 seq_printf(seq, " /%d %s %s", i,
Eric Dumazet28d36e32008-01-14 23:09:56 -08002361 rtn_scope(buf1, sizeof(buf1),
2362 fa->fa_scope),
2363 rtn_type(buf2, sizeof(buf2),
2364 fa->fa_type));
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002365 if (fa->fa_tos)
2366 seq_printf(seq, "tos =%d\n",
2367 fa->fa_tos);
2368 seq_putc(seq, '\n');
2369 }
2370 }
2371 }
Robert Olsson19baf832005-06-21 12:43:18 -07002372 }
2373
2374 return 0;
2375}
2376
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002377static const struct seq_operations fib_trie_seq_ops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002378 .start = fib_trie_seq_start,
2379 .next = fib_trie_seq_next,
2380 .stop = fib_trie_seq_stop,
2381 .show = fib_trie_seq_show,
Robert Olsson19baf832005-06-21 12:43:18 -07002382};
2383
2384static int fib_trie_seq_open(struct inode *inode, struct file *file)
2385{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002386 return seq_open_net(inode, file, &fib_trie_seq_ops,
2387 sizeof(struct fib_trie_iter));
Robert Olsson19baf832005-06-21 12:43:18 -07002388}
2389
Arjan van de Ven9a321442007-02-12 00:55:35 -08002390static const struct file_operations fib_trie_fops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002391 .owner = THIS_MODULE,
2392 .open = fib_trie_seq_open,
2393 .read = seq_read,
2394 .llseek = seq_lseek,
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002395 .release = seq_release_net,
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002396};
2397
Al Viro32ab5f82006-09-26 22:21:45 -07002398static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002399{
2400 static unsigned type2flags[RTN_MAX + 1] = {
2401 [7] = RTF_REJECT, [8] = RTF_REJECT,
2402 };
2403 unsigned flags = type2flags[type];
2404
2405 if (fi && fi->fib_nh->nh_gw)
2406 flags |= RTF_GATEWAY;
Al Viro32ab5f82006-09-26 22:21:45 -07002407 if (mask == htonl(0xFFFFFFFF))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002408 flags |= RTF_HOST;
2409 flags |= RTF_UP;
2410 return flags;
2411}
2412
2413/*
2414 * This outputs /proc/net/route.
2415 * The format of the file is not supposed to be changed
2416 * and needs to be same as fib_hash output to avoid breaking
2417 * legacy utilities
2418 */
2419static int fib_route_seq_show(struct seq_file *seq, void *v)
2420{
Patrick McHardyc9e53cb2005-11-20 21:09:00 -08002421 const struct fib_trie_iter *iter = seq->private;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002422 struct leaf *l = v;
2423 int i;
2424 char bf[128];
2425
2426 if (v == SEQ_START_TOKEN) {
2427 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
2428 "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
2429 "\tWindow\tIRTT");
2430 return 0;
2431 }
2432
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08002433 if (iter->trie == iter->trie_local)
Patrick McHardyc9e53cb2005-11-20 21:09:00 -08002434 return 0;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002435 if (IS_TNODE(l))
2436 return 0;
2437
2438 for (i=32; i>=0; i--) {
Robert Olsson772cb712005-09-19 15:31:18 -07002439 struct leaf_info *li = find_leaf_info(l, i);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002440 struct fib_alias *fa;
Al Viro32ab5f82006-09-26 22:21:45 -07002441 __be32 mask, prefix;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002442
2443 if (!li)
2444 continue;
2445
2446 mask = inet_make_mask(li->plen);
2447 prefix = htonl(l->key);
2448
2449 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
Herbert Xu1371e372005-10-15 09:42:39 +10002450 const struct fib_info *fi = fa->fa_info;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002451 unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
2452
2453 if (fa->fa_type == RTN_BROADCAST
2454 || fa->fa_type == RTN_MULTICAST)
2455 continue;
2456
2457 if (fi)
2458 snprintf(bf, sizeof(bf),
2459 "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
2460 fi->fib_dev ? fi->fib_dev->name : "*",
2461 prefix,
2462 fi->fib_nh->nh_gw, flags, 0, 0,
2463 fi->fib_priority,
2464 mask,
2465 (fi->fib_advmss ? fi->fib_advmss + 40 : 0),
2466 fi->fib_window,
2467 fi->fib_rtt >> 3);
2468 else
2469 snprintf(bf, sizeof(bf),
2470 "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
2471 prefix, 0, flags, 0, 0, 0,
2472 mask, 0, 0, 0);
2473
2474 seq_printf(seq, "%-127s\n", bf);
2475 }
2476 }
2477
2478 return 0;
2479}
2480
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002481static const struct seq_operations fib_route_seq_ops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002482 .start = fib_trie_seq_start,
2483 .next = fib_trie_seq_next,
2484 .stop = fib_trie_seq_stop,
2485 .show = fib_route_seq_show,
2486};
2487
2488static int fib_route_seq_open(struct inode *inode, struct file *file)
2489{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002490 return seq_open_net(inode, file, &fib_route_seq_ops,
2491 sizeof(struct fib_trie_iter));
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002492}
2493
Arjan van de Ven9a321442007-02-12 00:55:35 -08002494static const struct file_operations fib_route_fops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002495 .owner = THIS_MODULE,
2496 .open = fib_route_seq_open,
2497 .read = seq_read,
2498 .llseek = seq_lseek,
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002499 .release = seq_release_net,
Robert Olsson19baf832005-06-21 12:43:18 -07002500};
2501
Denis V. Lunev61a02652008-01-10 03:21:09 -08002502int __net_init fib_proc_init(struct net *net)
Robert Olsson19baf832005-06-21 12:43:18 -07002503{
Denis V. Lunev61a02652008-01-10 03:21:09 -08002504 if (!proc_net_fops_create(net, "fib_trie", S_IRUGO, &fib_trie_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002505 goto out1;
2506
Denis V. Lunev61a02652008-01-10 03:21:09 -08002507 if (!proc_net_fops_create(net, "fib_triestat", S_IRUGO,
2508 &fib_triestat_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002509 goto out2;
2510
Denis V. Lunev61a02652008-01-10 03:21:09 -08002511 if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_route_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002512 goto out3;
2513
Robert Olsson19baf832005-06-21 12:43:18 -07002514 return 0;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002515
2516out3:
Denis V. Lunev61a02652008-01-10 03:21:09 -08002517 proc_net_remove(net, "fib_triestat");
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002518out2:
Denis V. Lunev61a02652008-01-10 03:21:09 -08002519 proc_net_remove(net, "fib_trie");
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002520out1:
2521 return -ENOMEM;
Robert Olsson19baf832005-06-21 12:43:18 -07002522}
2523
Denis V. Lunev61a02652008-01-10 03:21:09 -08002524void __net_exit fib_proc_exit(struct net *net)
Robert Olsson19baf832005-06-21 12:43:18 -07002525{
Denis V. Lunev61a02652008-01-10 03:21:09 -08002526 proc_net_remove(net, "fib_trie");
2527 proc_net_remove(net, "fib_triestat");
2528 proc_net_remove(net, "route");
Robert Olsson19baf832005-06-21 12:43:18 -07002529}
2530
2531#endif /* CONFIG_PROC_FS */