blob: b56e88edf1b351a10a8d83a0abf0a20f4c663e29 [file] [log] [blame]
Robert Olsson19baf832005-06-21 12:43:18 -07001/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version
5 * 2 of the License, or (at your option) any later version.
6 *
7 * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
8 * & Swedish University of Agricultural Sciences.
9 *
10 * Jens Laas <jens.laas@data.slu.se> Swedish University of
11 * Agricultural Sciences.
12 *
13 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
14 *
15 * This work is based on the LPC-trie which is originally descibed in:
16 *
17 * An experimental study of compression methods for dynamic tries
18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
19 * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
20 *
21 *
22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
23 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
24 *
25 * Version: $Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $
26 *
27 *
28 * Code from fib_hash has been reused which includes the following header:
29 *
30 *
31 * INET An implementation of the TCP/IP protocol suite for the LINUX
32 * operating system. INET is implemented using the BSD Socket
33 * interface as the means of communication with the user level.
34 *
35 * IPv4 FIB: lookup engine and maintenance routines.
36 *
37 *
38 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
39 *
40 * This program is free software; you can redistribute it and/or
41 * modify it under the terms of the GNU General Public License
42 * as published by the Free Software Foundation; either version
43 * 2 of the License, or (at your option) any later version.
44 */
45
Robert Olssonf835e472005-06-28 15:00:39 -070046#define VERSION "0.324"
Robert Olsson19baf832005-06-21 12:43:18 -070047
48#include <linux/config.h>
49#include <asm/uaccess.h>
50#include <asm/system.h>
51#include <asm/bitops.h>
52#include <linux/types.h>
53#include <linux/kernel.h>
54#include <linux/sched.h>
55#include <linux/mm.h>
56#include <linux/string.h>
57#include <linux/socket.h>
58#include <linux/sockios.h>
59#include <linux/errno.h>
60#include <linux/in.h>
61#include <linux/inet.h>
62#include <linux/netdevice.h>
63#include <linux/if_arp.h>
64#include <linux/proc_fs.h>
65#include <linux/skbuff.h>
66#include <linux/netlink.h>
67#include <linux/init.h>
68#include <linux/list.h>
69#include <net/ip.h>
70#include <net/protocol.h>
71#include <net/route.h>
72#include <net/tcp.h>
73#include <net/sock.h>
74#include <net/ip_fib.h>
75#include "fib_lookup.h"
76
77#undef CONFIG_IP_FIB_TRIE_STATS
78#define MAX_CHILDS 16384
79
80#define EXTRACT(p, n, str) ((str)<<(p)>>(32-(n)))
81#define KEYLENGTH (8*sizeof(t_key))
82#define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l))
83#define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset))
84
85static DEFINE_RWLOCK(fib_lock);
86
87typedef unsigned int t_key;
88
89#define T_TNODE 0
90#define T_LEAF 1
91#define NODE_TYPE_MASK 0x1UL
92#define NODE_PARENT(_node) \
93((struct tnode *)((_node)->_parent & ~NODE_TYPE_MASK))
94#define NODE_SET_PARENT(_node, _ptr) \
95((_node)->_parent = (((unsigned long)(_ptr)) | \
96 ((_node)->_parent & NODE_TYPE_MASK)))
97#define NODE_INIT_PARENT(_node, _type) \
98((_node)->_parent = (_type))
99#define NODE_TYPE(_node) \
100((_node)->_parent & NODE_TYPE_MASK)
101
102#define IS_TNODE(n) (!(n->_parent & T_LEAF))
103#define IS_LEAF(n) (n->_parent & T_LEAF)
104
105struct node {
106 t_key key;
107 unsigned long _parent;
108};
109
110struct leaf {
111 t_key key;
112 unsigned long _parent;
113 struct hlist_head list;
114};
115
116struct leaf_info {
117 struct hlist_node hlist;
118 int plen;
119 struct list_head falh;
120};
121
122struct tnode {
123 t_key key;
124 unsigned long _parent;
125 unsigned short pos:5; /* 2log(KEYLENGTH) bits needed */
126 unsigned short bits:5; /* 2log(KEYLENGTH) bits needed */
127 unsigned short full_children; /* KEYLENGTH bits needed */
128 unsigned short empty_children; /* KEYLENGTH bits needed */
129 struct node *child[0];
130};
131
132#ifdef CONFIG_IP_FIB_TRIE_STATS
133struct trie_use_stats {
134 unsigned int gets;
135 unsigned int backtrack;
136 unsigned int semantic_match_passed;
137 unsigned int semantic_match_miss;
138 unsigned int null_node_hit;
139};
140#endif
141
142struct trie_stat {
143 unsigned int totdepth;
144 unsigned int maxdepth;
145 unsigned int tnodes;
146 unsigned int leaves;
147 unsigned int nullpointers;
148 unsigned int nodesizes[MAX_CHILDS];
149};
150
151struct trie {
152 struct node *trie;
153#ifdef CONFIG_IP_FIB_TRIE_STATS
154 struct trie_use_stats stats;
155#endif
156 int size;
157 unsigned int revision;
158};
159
160static int trie_debug = 0;
161
162static int tnode_full(struct tnode *tn, struct node *n);
163static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
164static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull);
165static int tnode_child_length(struct tnode *tn);
166static struct node *resize(struct trie *t, struct tnode *tn);
167static struct tnode *inflate(struct trie *t, struct tnode *tn);
168static struct tnode *halve(struct trie *t, struct tnode *tn);
169static void tnode_free(struct tnode *tn);
170static void trie_dump_seq(struct seq_file *seq, struct trie *t);
171extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio);
172extern int fib_detect_death(struct fib_info *fi, int order,
173 struct fib_info **last_resort, int *last_idx, int *dflt);
174
175extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, int z, int tb_id,
176 struct nlmsghdr *n, struct netlink_skb_parms *req);
177
178static kmem_cache_t *fn_alias_kmem;
179static struct trie *trie_local = NULL, *trie_main = NULL;
180
181static void trie_bug(char *err)
182{
183 printk("Trie Bug: %s\n", err);
184 BUG();
185}
186
187static inline struct node *tnode_get_child(struct tnode *tn, int i)
188{
189 if (i >= 1<<tn->bits)
190 trie_bug("tnode_get_child");
191
192 return tn->child[i];
193}
194
195static inline int tnode_child_length(struct tnode *tn)
196{
197 return 1<<tn->bits;
198}
199
200/*
201 _________________________________________________________________
202 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
203 ----------------------------------------------------------------
204 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
205
206 _________________________________________________________________
207 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
208 -----------------------------------------------------------------
209 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
210
211 tp->pos = 7
212 tp->bits = 3
213 n->pos = 15
214 n->bits=4
215 KEYLENGTH=32
216*/
217
218static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
219{
220 if (offset < KEYLENGTH)
221 return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
222 else
223 return 0;
224}
225
226static inline int tkey_equals(t_key a, t_key b)
227{
228 return a == b;
229}
230
231static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
232{
233 if (bits == 0 || offset >= KEYLENGTH)
234 return 1;
235 bits = bits > KEYLENGTH ? KEYLENGTH : bits;
236 return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
237}
238
239static inline int tkey_mismatch(t_key a, int offset, t_key b)
240{
241 t_key diff = a ^ b;
242 int i = offset;
243
244 if(!diff)
245 return 0;
246 while((diff << i) >> (KEYLENGTH-1) == 0)
247 i++;
248 return i;
249}
250
251/* Candiate for fib_semantics */
252
253static void fn_free_alias(struct fib_alias *fa)
254{
255 fib_release_info(fa->fa_info);
256 kmem_cache_free(fn_alias_kmem, fa);
257}
258
259/*
260 To understand this stuff, an understanding of keys and all their bits is
261 necessary. Every node in the trie has a key associated with it, but not
262 all of the bits in that key are significant.
263
264 Consider a node 'n' and its parent 'tp'.
265
266 If n is a leaf, every bit in its key is significant. Its presence is
267 necessitaded by path compression, since during a tree traversal (when
268 searching for a leaf - unless we are doing an insertion) we will completely
269 ignore all skipped bits we encounter. Thus we need to verify, at the end of
270 a potentially successful search, that we have indeed been walking the
271 correct key path.
272
273 Note that we can never "miss" the correct key in the tree if present by
274 following the wrong path. Path compression ensures that segments of the key
275 that are the same for all keys with a given prefix are skipped, but the
276 skipped part *is* identical for each node in the subtrie below the skipped
277 bit! trie_insert() in this implementation takes care of that - note the
278 call to tkey_sub_equals() in trie_insert().
279
280 if n is an internal node - a 'tnode' here, the various parts of its key
281 have many different meanings.
282
283 Example:
284 _________________________________________________________________
285 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
286 -----------------------------------------------------------------
287 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
288
289 _________________________________________________________________
290 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
291 -----------------------------------------------------------------
292 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
293
294 tp->pos = 7
295 tp->bits = 3
296 n->pos = 15
297 n->bits=4
298
299 First, let's just ignore the bits that come before the parent tp, that is
300 the bits from 0 to (tp->pos-1). They are *known* but at this point we do
301 not use them for anything.
302
303 The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
304 index into the parent's child array. That is, they will be used to find
305 'n' among tp's children.
306
307 The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
308 for the node n.
309
310 All the bits we have seen so far are significant to the node n. The rest
311 of the bits are really not needed or indeed known in n->key.
312
313 The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
314 n's child array, and will of course be different for each child.
315
316 The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
317 at this point.
318
319*/
320
321static void check_tnode(struct tnode *tn)
322{
323 if(tn && tn->pos+tn->bits > 32) {
324 printk("TNODE ERROR tn=%p, pos=%d, bits=%d\n", tn, tn->pos, tn->bits);
325 }
326}
327
328static int halve_threshold = 25;
329static int inflate_threshold = 50;
330
331static struct leaf *leaf_new(void)
332{
333 struct leaf *l = kmalloc(sizeof(struct leaf), GFP_KERNEL);
334 if(l) {
335 NODE_INIT_PARENT(l, T_LEAF);
336 INIT_HLIST_HEAD(&l->list);
337 }
338 return l;
339}
340
341static struct leaf_info *leaf_info_new(int plen)
342{
343 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
Robert Olssonf835e472005-06-28 15:00:39 -0700344 if(li) {
345 li->plen = plen;
346 INIT_LIST_HEAD(&li->falh);
347 }
Robert Olsson19baf832005-06-21 12:43:18 -0700348 return li;
349}
350
351static inline void free_leaf(struct leaf *l)
352{
353 kfree(l);
354}
355
356static inline void free_leaf_info(struct leaf_info *li)
357{
358 kfree(li);
359}
360
361static struct tnode* tnode_new(t_key key, int pos, int bits)
362{
363 int nchildren = 1<<bits;
364 int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *);
365 struct tnode *tn = kmalloc(sz, GFP_KERNEL);
366
367 if(tn) {
368 memset(tn, 0, sz);
369 NODE_INIT_PARENT(tn, T_TNODE);
370 tn->pos = pos;
371 tn->bits = bits;
372 tn->key = key;
373 tn->full_children = 0;
374 tn->empty_children = 1<<bits;
375 }
376 if(trie_debug > 0)
377 printk("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode),
378 (unsigned int) (sizeof(struct node) * 1<<bits));
379 return tn;
380}
381
382static void tnode_free(struct tnode *tn)
383{
384 if(!tn) {
385 trie_bug("tnode_free\n");
386 }
387 if(IS_LEAF(tn)) {
388 free_leaf((struct leaf *)tn);
389 if(trie_debug > 0 )
390 printk("FL %p \n", tn);
391 }
392 else if(IS_TNODE(tn)) {
393 kfree(tn);
394 if(trie_debug > 0 )
395 printk("FT %p \n", tn);
396 }
397 else {
398 trie_bug("tnode_free\n");
399 }
400}
401
402/*
403 * Check whether a tnode 'n' is "full", i.e. it is an internal node
404 * and no bits are skipped. See discussion in dyntree paper p. 6
405 */
406
407static inline int tnode_full(struct tnode *tn, struct node *n)
408{
409 if(n == NULL || IS_LEAF(n))
410 return 0;
411
412 return ((struct tnode *) n)->pos == tn->pos + tn->bits;
413}
414
415static inline void put_child(struct trie *t, struct tnode *tn, int i, struct node *n)
416{
417 tnode_put_child_reorg(tn, i, n, -1);
418}
419
420 /*
421 * Add a child at position i overwriting the old value.
422 * Update the value of full_children and empty_children.
423 */
424
425static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull)
426{
427 struct node *chi;
428 int isfull;
429
430 if(i >= 1<<tn->bits) {
431 printk("bits=%d, i=%d\n", tn->bits, i);
432 trie_bug("tnode_put_child_reorg bits");
433 }
434 write_lock_bh(&fib_lock);
435 chi = tn->child[i];
436
437 /* update emptyChildren */
438 if (n == NULL && chi != NULL)
439 tn->empty_children++;
440 else if (n != NULL && chi == NULL)
441 tn->empty_children--;
442
443 /* update fullChildren */
444 if (wasfull == -1)
445 wasfull = tnode_full(tn, chi);
446
447 isfull = tnode_full(tn, n);
448 if (wasfull && !isfull)
449 tn->full_children--;
450
451 else if (!wasfull && isfull)
452 tn->full_children++;
453 if(n)
454 NODE_SET_PARENT(n, tn);
455
456 tn->child[i] = n;
457 write_unlock_bh(&fib_lock);
458}
459
460static struct node *resize(struct trie *t, struct tnode *tn)
461{
462 int i;
463
464 if (!tn)
465 return NULL;
466
467 if(trie_debug)
468 printk("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
469 tn, inflate_threshold, halve_threshold);
470
471 /* No children */
472 if (tn->empty_children == tnode_child_length(tn)) {
473 tnode_free(tn);
474 return NULL;
475 }
476 /* One child */
477 if (tn->empty_children == tnode_child_length(tn) - 1)
478 for (i = 0; i < tnode_child_length(tn); i++) {
479
480 write_lock_bh(&fib_lock);
481 if (tn->child[i] != NULL) {
482
483 /* compress one level */
484 struct node *n = tn->child[i];
485 if(n)
486 NODE_INIT_PARENT(n, NODE_TYPE(n));
487
488 write_unlock_bh(&fib_lock);
489 tnode_free(tn);
490 return n;
491 }
492 write_unlock_bh(&fib_lock);
493 }
494 /*
495 * Double as long as the resulting node has a number of
496 * nonempty nodes that are above the threshold.
497 */
498
499 /*
500 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
501 * the Helsinki University of Technology and Matti Tikkanen of Nokia
502 * Telecommunications, page 6:
503 * "A node is doubled if the ratio of non-empty children to all
504 * children in the *doubled* node is at least 'high'."
505 *
506 * 'high' in this instance is the variable 'inflate_threshold'. It
507 * is expressed as a percentage, so we multiply it with
508 * tnode_child_length() and instead of multiplying by 2 (since the
509 * child array will be doubled by inflate()) and multiplying
510 * the left-hand side by 100 (to handle the percentage thing) we
511 * multiply the left-hand side by 50.
512 *
513 * The left-hand side may look a bit weird: tnode_child_length(tn)
514 * - tn->empty_children is of course the number of non-null children
515 * in the current node. tn->full_children is the number of "full"
516 * children, that is non-null tnodes with a skip value of 0.
517 * All of those will be doubled in the resulting inflated tnode, so
518 * we just count them one extra time here.
519 *
520 * A clearer way to write this would be:
521 *
522 * to_be_doubled = tn->full_children;
523 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
524 * tn->full_children;
525 *
526 * new_child_length = tnode_child_length(tn) * 2;
527 *
528 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
529 * new_child_length;
530 * if (new_fill_factor >= inflate_threshold)
531 *
532 * ...and so on, tho it would mess up the while() loop.
533 *
534 * anyway,
535 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
536 * inflate_threshold
537 *
538 * avoid a division:
539 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
540 * inflate_threshold * new_child_length
541 *
542 * expand not_to_be_doubled and to_be_doubled, and shorten:
543 * 100 * (tnode_child_length(tn) - tn->empty_children +
544 * tn->full_children ) >= inflate_threshold * new_child_length
545 *
546 * expand new_child_length:
547 * 100 * (tnode_child_length(tn) - tn->empty_children +
548 * tn->full_children ) >=
549 * inflate_threshold * tnode_child_length(tn) * 2
550 *
551 * shorten again:
552 * 50 * (tn->full_children + tnode_child_length(tn) -
553 * tn->empty_children ) >= inflate_threshold *
554 * tnode_child_length(tn)
555 *
556 */
557
558 check_tnode(tn);
559
560 while ((tn->full_children > 0 &&
561 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
562 inflate_threshold * tnode_child_length(tn))) {
563
564 tn = inflate(t, tn);
565 }
566
567 check_tnode(tn);
568
569 /*
570 * Halve as long as the number of empty children in this
571 * node is above threshold.
572 */
573 while (tn->bits > 1 &&
574 100 * (tnode_child_length(tn) - tn->empty_children) <
575 halve_threshold * tnode_child_length(tn))
576
577 tn = halve(t, tn);
578
579 /* Only one child remains */
580
581 if (tn->empty_children == tnode_child_length(tn) - 1)
582 for (i = 0; i < tnode_child_length(tn); i++) {
583
584 write_lock_bh(&fib_lock);
585 if (tn->child[i] != NULL) {
586 /* compress one level */
587 struct node *n = tn->child[i];
588
589 if(n)
590 NODE_INIT_PARENT(n, NODE_TYPE(n));
591
592 write_unlock_bh(&fib_lock);
593 tnode_free(tn);
594 return n;
595 }
596 write_unlock_bh(&fib_lock);
597 }
598
599 return (struct node *) tn;
600}
601
602static struct tnode *inflate(struct trie *t, struct tnode *tn)
603{
604 struct tnode *inode;
605 struct tnode *oldtnode = tn;
606 int olen = tnode_child_length(tn);
607 int i;
608
609 if(trie_debug)
610 printk("In inflate\n");
611
612 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
613
614 if (!tn)
615 trie_bug("tnode_new failed");
616
617 for(i = 0; i < olen; i++) {
618 struct node *node = tnode_get_child(oldtnode, i);
619
620 /* An empty child */
621 if (node == NULL)
622 continue;
623
624 /* A leaf or an internal node with skipped bits */
625
626 if(IS_LEAF(node) || ((struct tnode *) node)->pos >
627 tn->pos + tn->bits - 1) {
628 if(tkey_extract_bits(node->key, tn->pos + tn->bits - 1,
629 1) == 0)
630 put_child(t, tn, 2*i, node);
631 else
632 put_child(t, tn, 2*i+1, node);
633 continue;
634 }
635
636 /* An internal node with two children */
637 inode = (struct tnode *) node;
638
639 if (inode->bits == 1) {
640 put_child(t, tn, 2*i, inode->child[0]);
641 put_child(t, tn, 2*i+1, inode->child[1]);
642
643 tnode_free(inode);
644 }
645
646 /* An internal node with more than two children */
647 else {
648 struct tnode *left, *right;
649 int size, j;
650
651 /* We will replace this node 'inode' with two new
652 * ones, 'left' and 'right', each with half of the
653 * original children. The two new nodes will have
654 * a position one bit further down the key and this
655 * means that the "significant" part of their keys
656 * (see the discussion near the top of this file)
657 * will differ by one bit, which will be "0" in
658 * left's key and "1" in right's key. Since we are
659 * moving the key position by one step, the bit that
660 * we are moving away from - the bit at position
661 * (inode->pos) - is the one that will differ between
662 * left and right. So... we synthesize that bit in the
663 * two new keys.
664 * The mask 'm' below will be a single "one" bit at
665 * the position (inode->pos)
666 */
667
668 t_key m = TKEY_GET_MASK(inode->pos, 1);
669
670 /* Use the old key, but set the new significant
671 * bit to zero.
672 */
673 left = tnode_new(inode->key&(~m), inode->pos + 1,
674 inode->bits - 1);
675
676 if(!left)
677 trie_bug("tnode_new failed");
678
679
680 /* Use the old key, but set the new significant
681 * bit to one.
682 */
683 right = tnode_new(inode->key|m, inode->pos + 1,
684 inode->bits - 1);
685
686 if(!right)
687 trie_bug("tnode_new failed");
688
689 size = tnode_child_length(left);
690 for(j = 0; j < size; j++) {
691 put_child(t, left, j, inode->child[j]);
692 put_child(t, right, j, inode->child[j + size]);
693 }
694 put_child(t, tn, 2*i, resize(t, left));
695 put_child(t, tn, 2*i+1, resize(t, right));
696
697 tnode_free(inode);
698 }
699 }
700 tnode_free(oldtnode);
701 return tn;
702}
703
704static struct tnode *halve(struct trie *t, struct tnode *tn)
705{
706 struct tnode *oldtnode = tn;
707 struct node *left, *right;
708 int i;
709 int olen = tnode_child_length(tn);
710
711 if(trie_debug) printk("In halve\n");
712
713 tn=tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
714
715 if(!tn)
716 trie_bug("tnode_new failed");
717
718 for(i = 0; i < olen; i += 2) {
719 left = tnode_get_child(oldtnode, i);
720 right = tnode_get_child(oldtnode, i+1);
721
722 /* At least one of the children is empty */
723 if (left == NULL) {
724 if (right == NULL) /* Both are empty */
725 continue;
726 put_child(t, tn, i/2, right);
727 } else if (right == NULL)
728 put_child(t, tn, i/2, left);
729
730 /* Two nonempty children */
731 else {
732 struct tnode *newBinNode =
733 tnode_new(left->key, tn->pos + tn->bits, 1);
734
735 if(!newBinNode)
736 trie_bug("tnode_new failed");
737
738 put_child(t, newBinNode, 0, left);
739 put_child(t, newBinNode, 1, right);
740 put_child(t, tn, i/2, resize(t, newBinNode));
741 }
742 }
743 tnode_free(oldtnode);
744 return tn;
745}
746
747static void *trie_init(struct trie *t)
748{
749 if(t) {
750 t->size = 0;
751 t->trie = NULL;
752 t->revision = 0;
753#ifdef CONFIG_IP_FIB_TRIE_STATS
754 memset(&t->stats, 0, sizeof(struct trie_use_stats));
755#endif
756 }
757 return t;
758}
759
760static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen)
761{
762 struct hlist_node *node;
763 struct leaf_info *li;
764
765 hlist_for_each_entry(li, node, head, hlist) {
766
767 if ( li->plen == plen )
768 return li;
769 }
770 return NULL;
771}
772
773static inline struct list_head * get_fa_head(struct leaf *l, int plen)
774{
775 struct list_head *fa_head=NULL;
776 struct leaf_info *li = find_leaf_info(&l->list, plen);
777
778 if(li)
779 fa_head = &li->falh;
780
781 return fa_head;
782}
783
784static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
785{
786 struct leaf_info *li=NULL, *last=NULL;
787 struct hlist_node *node, *tmp;
788
789 write_lock_bh(&fib_lock);
790
791 if(hlist_empty(head))
792 hlist_add_head(&new->hlist, head);
793 else {
794 hlist_for_each_entry_safe(li, node, tmp, head, hlist) {
795
796 if (new->plen > li->plen)
797 break;
798
799 last = li;
800 }
801 if(last)
802 hlist_add_after(&last->hlist, &new->hlist);
803 else
804 hlist_add_before(&new->hlist, &li->hlist);
805 }
806 write_unlock_bh(&fib_lock);
807}
808
809static struct leaf *
810fib_find_node(struct trie *t, u32 key)
811{
812 int pos;
813 struct tnode *tn;
814 struct node *n;
815
816 pos = 0;
817 n=t->trie;
818
819 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
820 tn = (struct tnode *) n;
821
822 check_tnode(tn);
823
824 if(tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
825 pos=tn->pos + tn->bits;
826 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
827 }
828 else
829 break;
830 }
831 /* Case we have found a leaf. Compare prefixes */
832
833 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
834 struct leaf *l = (struct leaf *) n;
835 return l;
836 }
837 return NULL;
838}
839
840static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
841{
842 int i = 0;
843 int wasfull;
844 t_key cindex, key;
845 struct tnode *tp = NULL;
846
847 if(!tn)
848 BUG();
849
850 key = tn->key;
851 i = 0;
852
853 while (tn != NULL && NODE_PARENT(tn) != NULL) {
854
855 if( i > 10 ) {
856 printk("Rebalance tn=%p \n", tn);
857 if(tn) printk("tn->parent=%p \n", NODE_PARENT(tn));
858
859 printk("Rebalance tp=%p \n", tp);
860 if(tp) printk("tp->parent=%p \n", NODE_PARENT(tp));
861 }
862
863 if( i > 12 ) BUG();
864 i++;
865
866 tp = NODE_PARENT(tn);
867 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
868 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
869 tn = (struct tnode *) resize (t, (struct tnode *)tn);
870 tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull);
871
872 if(!NODE_PARENT(tn))
873 break;
874
875 tn = NODE_PARENT(tn);
876 }
877 /* Handle last (top) tnode */
878 if (IS_TNODE(tn))
879 tn = (struct tnode*) resize(t, (struct tnode *)tn);
880
881 return (struct node*) tn;
882}
883
Robert Olssonf835e472005-06-28 15:00:39 -0700884static struct list_head *
885fib_insert_node(struct trie *t, int *err, u32 key, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700886{
887 int pos, newpos;
888 struct tnode *tp = NULL, *tn = NULL;
889 struct node *n;
890 struct leaf *l;
891 int missbit;
892 struct list_head *fa_head=NULL;
893 struct leaf_info *li;
894 t_key cindex;
895
896 pos = 0;
897 n=t->trie;
898
899 /* If we point to NULL, stop. Either the tree is empty and we should
900 * just put a new leaf in if, or we have reached an empty child slot,
901 * and we should just put our new leaf in that.
902 * If we point to a T_TNODE, check if it matches our key. Note that
903 * a T_TNODE might be skipping any number of bits - its 'pos' need
904 * not be the parent's 'pos'+'bits'!
905 *
906 * If it does match the current key, get pos/bits from it, extract
907 * the index from our key, push the T_TNODE and walk the tree.
908 *
909 * If it doesn't, we have to replace it with a new T_TNODE.
910 *
911 * If we point to a T_LEAF, it might or might not have the same key
912 * as we do. If it does, just change the value, update the T_LEAF's
913 * value, and return it.
914 * If it doesn't, we need to replace it with a T_TNODE.
915 */
916
917 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
918 tn = (struct tnode *) n;
919
920 check_tnode(tn);
921
922 if(tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
923 tp = tn;
924 pos=tn->pos + tn->bits;
925 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
926
927 if(n && NODE_PARENT(n) != tn) {
928 printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n));
929 BUG();
930 }
931 }
932 else
933 break;
934 }
935
936 /*
937 * n ----> NULL, LEAF or TNODE
938 *
939 * tp is n's (parent) ----> NULL or TNODE
940 */
941
942 if(tp && IS_LEAF(tp))
943 BUG();
944
Robert Olsson19baf832005-06-21 12:43:18 -0700945
946 /* Case 1: n is a leaf. Compare prefixes */
947
948 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
949 struct leaf *l = ( struct leaf *) n;
950
951 li = leaf_info_new(plen);
952
Robert Olssonf835e472005-06-28 15:00:39 -0700953 if(! li) {
954 *err = -ENOMEM;
955 goto err;
956 }
Robert Olsson19baf832005-06-21 12:43:18 -0700957
958 fa_head = &li->falh;
959 insert_leaf_info(&l->list, li);
960 goto done;
961 }
962 t->size++;
963 l = leaf_new();
964
Robert Olssonf835e472005-06-28 15:00:39 -0700965 if(! l) {
966 *err = -ENOMEM;
967 goto err;
968 }
Robert Olsson19baf832005-06-21 12:43:18 -0700969
970 l->key = key;
971 li = leaf_info_new(plen);
972
Robert Olssonf835e472005-06-28 15:00:39 -0700973 if(! li) {
974 tnode_free((struct tnode *) l);
975 *err = -ENOMEM;
976 goto err;
977 }
Robert Olsson19baf832005-06-21 12:43:18 -0700978
979 fa_head = &li->falh;
980 insert_leaf_info(&l->list, li);
981
982 /* Case 2: n is NULL, and will just insert a new leaf */
983 if (t->trie && n == NULL) {
984
985 NODE_SET_PARENT(l, tp);
986
987 if (!tp)
988 BUG();
989
990 else {
991 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
992 put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
993 }
994 }
995 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
996 else {
997 /*
998 * Add a new tnode here
999 * first tnode need some special handling
1000 */
1001
1002 if (tp)
1003 pos=tp->pos+tp->bits;
1004 else
1005 pos=0;
1006 if(n) {
1007 newpos = tkey_mismatch(key, pos, n->key);
1008 tn = tnode_new(n->key, newpos, 1);
1009 }
1010 else {
1011 newpos = 0;
1012 tn = tnode_new(key, newpos, 1); /* First tnode */
1013 }
Robert Olsson19baf832005-06-21 12:43:18 -07001014
Robert Olssonf835e472005-06-28 15:00:39 -07001015 if(!tn) {
1016 free_leaf_info(li);
1017 tnode_free((struct tnode *) l);
1018 *err = -ENOMEM;
1019 goto err;
1020 }
1021
Robert Olsson19baf832005-06-21 12:43:18 -07001022 NODE_SET_PARENT(tn, tp);
1023
1024 missbit=tkey_extract_bits(key, newpos, 1);
1025 put_child(t, tn, missbit, (struct node *)l);
1026 put_child(t, tn, 1-missbit, n);
1027
1028 if(tp) {
1029 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1030 put_child(t, (struct tnode *)tp, cindex, (struct node *)tn);
1031 }
1032 else {
1033 t->trie = (struct node*) tn; /* First tnode */
1034 tp = tn;
1035 }
1036 }
1037 if(tp && tp->pos+tp->bits > 32) {
1038 printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
1039 tp, tp->pos, tp->bits, key, plen);
1040 }
1041 /* Rebalance the trie */
1042 t->trie = trie_rebalance(t, tp);
Robert Olssonf835e472005-06-28 15:00:39 -07001043done:
1044 t->revision++;
1045err:;
Robert Olsson19baf832005-06-21 12:43:18 -07001046 return fa_head;
1047}
1048
1049static int
1050fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1051 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
1052{
1053 struct trie *t = (struct trie *) tb->tb_data;
1054 struct fib_alias *fa, *new_fa;
1055 struct list_head *fa_head=NULL;
1056 struct fib_info *fi;
1057 int plen = r->rtm_dst_len;
1058 int type = r->rtm_type;
1059 u8 tos = r->rtm_tos;
1060 u32 key, mask;
1061 int err;
1062 struct leaf *l;
1063
1064 if (plen > 32)
1065 return -EINVAL;
1066
1067 key = 0;
1068 if (rta->rta_dst)
1069 memcpy(&key, rta->rta_dst, 4);
1070
1071 key = ntohl(key);
1072
1073 if(trie_debug)
1074 printk("Insert table=%d %08x/%d\n", tb->tb_id, key, plen);
1075
1076 mask = ntohl( inet_make_mask(plen) );
1077
1078 if(key & ~mask)
1079 return -EINVAL;
1080
1081 key = key & mask;
1082
1083 if ((fi = fib_create_info(r, rta, nlhdr, &err)) == NULL)
1084 goto err;
1085
1086 l = fib_find_node(t, key);
1087 fa = NULL;
1088
1089 if(l) {
1090 fa_head = get_fa_head(l, plen);
1091 fa = fib_find_alias(fa_head, tos, fi->fib_priority);
1092 }
1093
1094 /* Now fa, if non-NULL, points to the first fib alias
1095 * with the same keys [prefix,tos,priority], if such key already
1096 * exists or to the node before which we will insert new one.
1097 *
1098 * If fa is NULL, we will need to allocate a new one and
1099 * insert to the head of f.
1100 *
1101 * If f is NULL, no fib node matched the destination key
1102 * and we need to allocate a new one of those as well.
1103 */
1104
1105 if (fa &&
1106 fa->fa_info->fib_priority == fi->fib_priority) {
1107 struct fib_alias *fa_orig;
1108
1109 err = -EEXIST;
1110 if (nlhdr->nlmsg_flags & NLM_F_EXCL)
1111 goto out;
1112
1113 if (nlhdr->nlmsg_flags & NLM_F_REPLACE) {
1114 struct fib_info *fi_drop;
1115 u8 state;
1116
1117 write_lock_bh(&fib_lock);
1118
1119 fi_drop = fa->fa_info;
1120 fa->fa_info = fi;
1121 fa->fa_type = type;
1122 fa->fa_scope = r->rtm_scope;
1123 state = fa->fa_state;
1124 fa->fa_state &= ~FA_S_ACCESSED;
1125
1126 write_unlock_bh(&fib_lock);
1127
1128 fib_release_info(fi_drop);
1129 if (state & FA_S_ACCESSED)
1130 rt_cache_flush(-1);
1131
1132 goto succeeded;
1133 }
1134 /* Error if we find a perfect match which
1135 * uses the same scope, type, and nexthop
1136 * information.
1137 */
1138 fa_orig = fa;
1139 list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) {
1140 if (fa->fa_tos != tos)
1141 break;
1142 if (fa->fa_info->fib_priority != fi->fib_priority)
1143 break;
1144 if (fa->fa_type == type &&
1145 fa->fa_scope == r->rtm_scope &&
1146 fa->fa_info == fi) {
1147 goto out;
1148 }
1149 }
1150 if (!(nlhdr->nlmsg_flags & NLM_F_APPEND))
1151 fa = fa_orig;
1152 }
1153 err = -ENOENT;
1154 if (!(nlhdr->nlmsg_flags&NLM_F_CREATE))
1155 goto out;
1156
1157 err = -ENOBUFS;
1158 new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL);
1159 if (new_fa == NULL)
1160 goto out;
1161
1162 new_fa->fa_info = fi;
1163 new_fa->fa_tos = tos;
1164 new_fa->fa_type = type;
1165 new_fa->fa_scope = r->rtm_scope;
1166 new_fa->fa_state = 0;
1167#if 0
1168 new_fa->dst = NULL;
1169#endif
1170 /*
1171 * Insert new entry to the list.
1172 */
1173
Robert Olssonf835e472005-06-28 15:00:39 -07001174 if(!fa_head) {
1175 fa_head = fib_insert_node(t, &err, key, plen);
1176 err = 0;
1177 if(err)
1178 goto out_free_new_fa;
1179 }
Robert Olsson19baf832005-06-21 12:43:18 -07001180
1181 write_lock_bh(&fib_lock);
1182
1183 list_add_tail(&new_fa->fa_list,
1184 (fa ? &fa->fa_list : fa_head));
1185
1186 write_unlock_bh(&fib_lock);
1187
1188 rt_cache_flush(-1);
1189 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req);
1190succeeded:
1191 return 0;
Robert Olssonf835e472005-06-28 15:00:39 -07001192
1193out_free_new_fa:
1194 kmem_cache_free(fn_alias_kmem, new_fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001195out:
1196 fib_release_info(fi);
1197err:;
1198 return err;
1199}
1200
1201static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *plen, const struct flowi *flp,
1202 struct fib_result *res, int *err)
1203{
1204 int i;
1205 t_key mask;
1206 struct leaf_info *li;
1207 struct hlist_head *hhead = &l->list;
1208 struct hlist_node *node;
1209
1210 hlist_for_each_entry(li, node, hhead, hlist) {
1211
1212 i = li->plen;
1213 mask = ntohl(inet_make_mask(i));
1214 if (l->key != (key & mask))
1215 continue;
1216
1217 if (((*err) = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) == 0) {
1218 *plen = i;
1219#ifdef CONFIG_IP_FIB_TRIE_STATS
1220 t->stats.semantic_match_passed++;
1221#endif
1222 return 1;
1223 }
1224#ifdef CONFIG_IP_FIB_TRIE_STATS
1225 t->stats.semantic_match_miss++;
1226#endif
1227 }
1228 return 0;
1229}
1230
1231static int
1232fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1233{
1234 struct trie *t = (struct trie *) tb->tb_data;
1235 int plen, ret = 0;
1236 struct node *n;
1237 struct tnode *pn;
1238 int pos, bits;
1239 t_key key=ntohl(flp->fl4_dst);
1240 int chopped_off;
1241 t_key cindex = 0;
1242 int current_prefix_length = KEYLENGTH;
1243 n = t->trie;
1244
1245 read_lock(&fib_lock);
1246 if(!n)
1247 goto failed;
1248
1249#ifdef CONFIG_IP_FIB_TRIE_STATS
1250 t->stats.gets++;
1251#endif
1252
1253 /* Just a leaf? */
1254 if (IS_LEAF(n)) {
1255 if( check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret) )
1256 goto found;
1257 goto failed;
1258 }
1259 pn = (struct tnode *) n;
1260 chopped_off = 0;
1261
1262 while (pn) {
1263
1264 pos = pn->pos;
1265 bits = pn->bits;
1266
1267 if(!chopped_off)
1268 cindex = tkey_extract_bits(MASK_PFX(key, current_prefix_length), pos, bits);
1269
1270 n = tnode_get_child(pn, cindex);
1271
1272 if (n == NULL) {
1273#ifdef CONFIG_IP_FIB_TRIE_STATS
1274 t->stats.null_node_hit++;
1275#endif
1276 goto backtrace;
1277 }
1278
1279 if (IS_TNODE(n)) {
1280#define HL_OPTIMIZE
1281#ifdef HL_OPTIMIZE
1282 struct tnode *cn = (struct tnode *)n;
1283 t_key node_prefix, key_prefix, pref_mismatch;
1284 int mp;
1285
1286 /*
1287 * It's a tnode, and we can do some extra checks here if we
1288 * like, to avoid descending into a dead-end branch.
1289 * This tnode is in the parent's child array at index
1290 * key[p_pos..p_pos+p_bits] but potentially with some bits
1291 * chopped off, so in reality the index may be just a
1292 * subprefix, padded with zero at the end.
1293 * We can also take a look at any skipped bits in this
1294 * tnode - everything up to p_pos is supposed to be ok,
1295 * and the non-chopped bits of the index (se previous
1296 * paragraph) are also guaranteed ok, but the rest is
1297 * considered unknown.
1298 *
1299 * The skipped bits are key[pos+bits..cn->pos].
1300 */
1301
1302 /* If current_prefix_length < pos+bits, we are already doing
1303 * actual prefix matching, which means everything from
1304 * pos+(bits-chopped_off) onward must be zero along some
1305 * branch of this subtree - otherwise there is *no* valid
1306 * prefix present. Here we can only check the skipped
1307 * bits. Remember, since we have already indexed into the
1308 * parent's child array, we know that the bits we chopped of
1309 * *are* zero.
1310 */
1311
1312 /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */
1313
1314 if (current_prefix_length < pos+bits) {
1315 if (tkey_extract_bits(cn->key, current_prefix_length,
1316 cn->pos - current_prefix_length) != 0 ||
1317 !(cn->child[0]))
1318 goto backtrace;
1319 }
1320
1321 /*
1322 * If chopped_off=0, the index is fully validated and we
1323 * only need to look at the skipped bits for this, the new,
1324 * tnode. What we actually want to do is to find out if
1325 * these skipped bits match our key perfectly, or if we will
1326 * have to count on finding a matching prefix further down,
1327 * because if we do, we would like to have some way of
1328 * verifying the existence of such a prefix at this point.
1329 */
1330
1331 /* The only thing we can do at this point is to verify that
1332 * any such matching prefix can indeed be a prefix to our
1333 * key, and if the bits in the node we are inspecting that
1334 * do not match our key are not ZERO, this cannot be true.
1335 * Thus, find out where there is a mismatch (before cn->pos)
1336 * and verify that all the mismatching bits are zero in the
1337 * new tnode's key.
1338 */
1339
1340 /* Note: We aren't very concerned about the piece of the key
1341 * that precede pn->pos+pn->bits, since these have already been
1342 * checked. The bits after cn->pos aren't checked since these are
1343 * by definition "unknown" at this point. Thus, what we want to
1344 * see is if we are about to enter the "prefix matching" state,
1345 * and in that case verify that the skipped bits that will prevail
1346 * throughout this subtree are zero, as they have to be if we are
1347 * to find a matching prefix.
1348 */
1349
1350 node_prefix = MASK_PFX(cn->key, cn->pos);
1351 key_prefix = MASK_PFX(key, cn->pos);
1352 pref_mismatch = key_prefix^node_prefix;
1353 mp = 0;
1354
1355 /* In short: If skipped bits in this node do not match the search
1356 * key, enter the "prefix matching" state.directly.
1357 */
1358 if (pref_mismatch) {
1359 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
1360 mp++;
1361 pref_mismatch = pref_mismatch <<1;
1362 }
1363 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
1364
1365 if (key_prefix != 0)
1366 goto backtrace;
1367
1368 if (current_prefix_length >= cn->pos)
1369 current_prefix_length=mp;
1370 }
1371#endif
1372 pn = (struct tnode *)n; /* Descend */
1373 chopped_off = 0;
1374 continue;
1375 }
1376 if (IS_LEAF(n)) {
1377 if( check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret))
1378 goto found;
1379 }
1380backtrace:
1381 chopped_off++;
1382
1383 /* As zero don't change the child key (cindex) */
1384 while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1)))) {
1385 chopped_off++;
1386 }
1387
1388 /* Decrease current_... with bits chopped off */
1389 if (current_prefix_length > pn->pos + pn->bits - chopped_off)
1390 current_prefix_length = pn->pos + pn->bits - chopped_off;
1391
1392 /*
1393 * Either we do the actual chop off according or if we have
1394 * chopped off all bits in this tnode walk up to our parent.
1395 */
1396
1397 if(chopped_off <= pn->bits)
1398 cindex &= ~(1 << (chopped_off-1));
1399 else {
1400 if( NODE_PARENT(pn) == NULL)
1401 goto failed;
1402
1403 /* Get Child's index */
1404 cindex = tkey_extract_bits(pn->key, NODE_PARENT(pn)->pos, NODE_PARENT(pn)->bits);
1405 pn = NODE_PARENT(pn);
1406 chopped_off = 0;
1407
1408#ifdef CONFIG_IP_FIB_TRIE_STATS
1409 t->stats.backtrack++;
1410#endif
1411 goto backtrace;
1412 }
1413 }
1414failed:
1415 ret = 1;
1416found:
1417 read_unlock(&fib_lock);
1418 return ret;
1419}
1420
1421static int trie_leaf_remove(struct trie *t, t_key key)
1422{
1423 t_key cindex;
1424 struct tnode *tp = NULL;
1425 struct node *n = t->trie;
1426 struct leaf *l;
1427
1428 if(trie_debug)
1429 printk("entering trie_leaf_remove(%p)\n", n);
1430
1431 /* Note that in the case skipped bits, those bits are *not* checked!
1432 * When we finish this, we will have NULL or a T_LEAF, and the
1433 * T_LEAF may or may not match our key.
1434 */
1435
1436 while (n != NULL && IS_TNODE(n)) {
1437 struct tnode *tn = (struct tnode *) n;
1438 check_tnode(tn);
1439 n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits));
1440
1441 if(n && NODE_PARENT(n) != tn) {
1442 printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n));
1443 BUG();
1444 }
1445 }
1446 l = (struct leaf *) n;
1447
1448 if(!n || !tkey_equals(l->key, key))
1449 return 0;
1450
1451 /*
1452 * Key found.
1453 * Remove the leaf and rebalance the tree
1454 */
1455
1456 t->revision++;
1457 t->size--;
1458
1459 tp = NODE_PARENT(n);
1460 tnode_free((struct tnode *) n);
1461
1462 if(tp) {
1463 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1464 put_child(t, (struct tnode *)tp, cindex, NULL);
1465 t->trie = trie_rebalance(t, tp);
1466 }
1467 else
1468 t->trie = NULL;
1469
1470 return 1;
1471}
1472
1473static int
1474fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1475 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
1476{
1477 struct trie *t = (struct trie *) tb->tb_data;
1478 u32 key, mask;
1479 int plen = r->rtm_dst_len;
1480 u8 tos = r->rtm_tos;
1481 struct fib_alias *fa, *fa_to_delete;
1482 struct list_head *fa_head;
1483 struct leaf *l;
1484
1485 if (plen > 32)
1486 return -EINVAL;
1487
1488 key = 0;
1489 if (rta->rta_dst)
1490 memcpy(&key, rta->rta_dst, 4);
1491
1492 key = ntohl(key);
1493 mask = ntohl( inet_make_mask(plen) );
1494
1495 if(key & ~mask)
1496 return -EINVAL;
1497
1498 key = key & mask;
1499 l = fib_find_node(t, key);
1500
1501 if(!l)
1502 return -ESRCH;
1503
1504 fa_head = get_fa_head(l, plen);
1505 fa = fib_find_alias(fa_head, tos, 0);
1506
1507 if (!fa)
1508 return -ESRCH;
1509
1510 if (trie_debug)
1511 printk("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
1512
1513 fa_to_delete = NULL;
1514 fa_head = fa->fa_list.prev;
1515 list_for_each_entry(fa, fa_head, fa_list) {
1516 struct fib_info *fi = fa->fa_info;
1517
1518 if (fa->fa_tos != tos)
1519 break;
1520
1521 if ((!r->rtm_type ||
1522 fa->fa_type == r->rtm_type) &&
1523 (r->rtm_scope == RT_SCOPE_NOWHERE ||
1524 fa->fa_scope == r->rtm_scope) &&
1525 (!r->rtm_protocol ||
1526 fi->fib_protocol == r->rtm_protocol) &&
1527 fib_nh_match(r, nlhdr, rta, fi) == 0) {
1528 fa_to_delete = fa;
1529 break;
1530 }
1531 }
1532
1533 if (fa_to_delete) {
1534 int kill_li = 0;
1535 struct leaf_info *li;
1536
1537 fa = fa_to_delete;
1538 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req);
1539
1540 l = fib_find_node(t, key);
1541 li = find_leaf_info(&l->list, plen);
1542
1543 write_lock_bh(&fib_lock);
1544
1545 list_del(&fa->fa_list);
1546
1547 if(list_empty(fa_head)) {
1548 hlist_del(&li->hlist);
1549 kill_li = 1;
1550 }
1551 write_unlock_bh(&fib_lock);
1552
1553 if(kill_li)
1554 free_leaf_info(li);
1555
1556 if(hlist_empty(&l->list))
1557 trie_leaf_remove(t, key);
1558
1559 if (fa->fa_state & FA_S_ACCESSED)
1560 rt_cache_flush(-1);
1561
1562 fn_free_alias(fa);
1563 return 0;
1564 }
1565 return -ESRCH;
1566}
1567
1568static int trie_flush_list(struct trie *t, struct list_head *head)
1569{
1570 struct fib_alias *fa, *fa_node;
1571 int found = 0;
1572
1573 list_for_each_entry_safe(fa, fa_node, head, fa_list) {
1574 struct fib_info *fi = fa->fa_info;
1575
1576 if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
1577
1578 write_lock_bh(&fib_lock);
1579 list_del(&fa->fa_list);
1580 write_unlock_bh(&fib_lock);
1581
1582 fn_free_alias(fa);
1583 found++;
1584 }
1585 }
1586 return found;
1587}
1588
1589static int trie_flush_leaf(struct trie *t, struct leaf *l)
1590{
1591 int found = 0;
1592 struct hlist_head *lih = &l->list;
1593 struct hlist_node *node, *tmp;
1594 struct leaf_info *li = NULL;
1595
1596 hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
1597
1598 found += trie_flush_list(t, &li->falh);
1599
1600 if (list_empty(&li->falh)) {
1601
1602 write_lock_bh(&fib_lock);
1603 hlist_del(&li->hlist);
1604 write_unlock_bh(&fib_lock);
1605
1606 free_leaf_info(li);
1607 }
1608 }
1609 return found;
1610}
1611
1612static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
1613{
1614 struct node *c = (struct node *) thisleaf;
1615 struct tnode *p;
1616 int idx;
1617
1618 if(c == NULL) {
1619 if(t->trie == NULL)
1620 return NULL;
1621
1622 if (IS_LEAF(t->trie)) /* trie w. just a leaf */
1623 return (struct leaf *) t->trie;
1624
1625 p = (struct tnode*) t->trie; /* Start */
1626 }
1627 else
1628 p = (struct tnode *) NODE_PARENT(c);
1629 while (p) {
1630 int pos, last;
1631
1632 /* Find the next child of the parent */
1633 if(c)
1634 pos = 1 + tkey_extract_bits(c->key, p->pos, p->bits);
1635 else
1636 pos = 0;
1637
1638 last = 1 << p->bits;
1639 for(idx = pos; idx < last ; idx++) {
1640 if( p->child[idx]) {
1641
1642 /* Decend if tnode */
1643
1644 while (IS_TNODE(p->child[idx])) {
1645 p = (struct tnode*) p->child[idx];
1646 idx = 0;
1647
1648 /* Rightmost non-NULL branch */
1649 if( p && IS_TNODE(p) )
1650 while ( p->child[idx] == NULL && idx < (1 << p->bits) ) idx++;
1651
1652 /* Done with this tnode? */
1653 if( idx >= (1 << p->bits) || p->child[idx] == NULL )
1654 goto up;
1655 }
1656 return (struct leaf*) p->child[idx];
1657 }
1658 }
1659up:
1660 /* No more children go up one step */
1661 c = (struct node*) p;
1662 p = (struct tnode *) NODE_PARENT(p);
1663 }
1664 return NULL; /* Ready. Root of trie */
1665}
1666
1667static int fn_trie_flush(struct fib_table *tb)
1668{
1669 struct trie *t = (struct trie *) tb->tb_data;
1670 struct leaf *ll = NULL, *l = NULL;
1671 int found = 0, h;
1672
1673 t->revision++;
1674
1675 for (h=0; (l = nextleaf(t, l)) != NULL; h++) {
1676 found += trie_flush_leaf(t, l);
1677
1678 if (ll && hlist_empty(&ll->list))
1679 trie_leaf_remove(t, ll->key);
1680 ll = l;
1681 }
1682
1683 if (ll && hlist_empty(&ll->list))
1684 trie_leaf_remove(t, ll->key);
1685
1686 if(trie_debug)
1687 printk("trie_flush found=%d\n", found);
1688 return found;
1689}
1690
1691static int trie_last_dflt=-1;
1692
1693static void
1694fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1695{
1696 struct trie *t = (struct trie *) tb->tb_data;
1697 int order, last_idx;
1698 struct fib_info *fi = NULL;
1699 struct fib_info *last_resort;
1700 struct fib_alias *fa = NULL;
1701 struct list_head *fa_head;
1702 struct leaf *l;
1703
1704 last_idx = -1;
1705 last_resort = NULL;
1706 order = -1;
1707
1708 read_lock(&fib_lock);
1709
1710 l = fib_find_node(t, 0);
1711 if(!l)
1712 goto out;
1713
1714 fa_head = get_fa_head(l, 0);
1715 if(!fa_head)
1716 goto out;
1717
1718 if (list_empty(fa_head))
1719 goto out;
1720
1721 list_for_each_entry(fa, fa_head, fa_list) {
1722 struct fib_info *next_fi = fa->fa_info;
1723
1724 if (fa->fa_scope != res->scope ||
1725 fa->fa_type != RTN_UNICAST)
1726 continue;
1727
1728 if (next_fi->fib_priority > res->fi->fib_priority)
1729 break;
1730 if (!next_fi->fib_nh[0].nh_gw ||
1731 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1732 continue;
1733 fa->fa_state |= FA_S_ACCESSED;
1734
1735 if (fi == NULL) {
1736 if (next_fi != res->fi)
1737 break;
1738 } else if (!fib_detect_death(fi, order, &last_resort,
1739 &last_idx, &trie_last_dflt)) {
1740 if (res->fi)
1741 fib_info_put(res->fi);
1742 res->fi = fi;
1743 atomic_inc(&fi->fib_clntref);
1744 trie_last_dflt = order;
1745 goto out;
1746 }
1747 fi = next_fi;
1748 order++;
1749 }
1750 if (order <= 0 || fi == NULL) {
1751 trie_last_dflt = -1;
1752 goto out;
1753 }
1754
1755 if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) {
1756 if (res->fi)
1757 fib_info_put(res->fi);
1758 res->fi = fi;
1759 atomic_inc(&fi->fib_clntref);
1760 trie_last_dflt = order;
1761 goto out;
1762 }
1763 if (last_idx >= 0) {
1764 if (res->fi)
1765 fib_info_put(res->fi);
1766 res->fi = last_resort;
1767 if (last_resort)
1768 atomic_inc(&last_resort->fib_clntref);
1769 }
1770 trie_last_dflt = last_idx;
1771 out:;
1772 read_unlock(&fib_lock);
1773}
1774
1775static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb,
1776 struct sk_buff *skb, struct netlink_callback *cb)
1777{
1778 int i, s_i;
1779 struct fib_alias *fa;
1780
1781 u32 xkey=htonl(key);
1782
1783 s_i=cb->args[3];
1784 i = 0;
1785
1786 list_for_each_entry(fa, fah, fa_list) {
1787 if (i < s_i) {
1788 i++;
1789 continue;
1790 }
1791 if (fa->fa_info->fib_nh == NULL) {
1792 printk("Trie error _fib_nh=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
1793 i++;
1794 continue;
1795 }
1796 if (fa->fa_info == NULL) {
1797 printk("Trie error fa_info=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
1798 i++;
1799 continue;
1800 }
1801
1802 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
1803 cb->nlh->nlmsg_seq,
1804 RTM_NEWROUTE,
1805 tb->tb_id,
1806 fa->fa_type,
1807 fa->fa_scope,
1808 &xkey,
1809 plen,
1810 fa->fa_tos,
David S. Miller90f66912005-06-21 14:43:28 -07001811 fa->fa_info, 0) < 0) {
Robert Olsson19baf832005-06-21 12:43:18 -07001812 cb->args[3] = i;
1813 return -1;
1814 }
1815 i++;
1816 }
1817 cb->args[3]=i;
1818 return skb->len;
1819}
1820
1821static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, struct sk_buff *skb,
1822 struct netlink_callback *cb)
1823{
1824 int h, s_h;
1825 struct list_head *fa_head;
1826 struct leaf *l = NULL;
1827 s_h=cb->args[2];
1828
1829 for (h=0; (l = nextleaf(t, l)) != NULL; h++) {
1830
1831 if (h < s_h)
1832 continue;
1833 if (h > s_h)
1834 memset(&cb->args[3], 0,
1835 sizeof(cb->args) - 3*sizeof(cb->args[0]));
1836
1837 fa_head = get_fa_head(l, plen);
1838
1839 if(!fa_head)
1840 continue;
1841
1842 if(list_empty(fa_head))
1843 continue;
1844
1845 if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
1846 cb->args[2]=h;
1847 return -1;
1848 }
1849 }
1850 cb->args[2]=h;
1851 return skb->len;
1852}
1853
1854static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
1855{
1856 int m, s_m;
1857 struct trie *t = (struct trie *) tb->tb_data;
1858
1859 s_m = cb->args[1];
1860
1861 read_lock(&fib_lock);
1862 for (m=0; m<=32; m++) {
1863
1864 if (m < s_m)
1865 continue;
1866 if (m > s_m)
1867 memset(&cb->args[2], 0,
1868 sizeof(cb->args) - 2*sizeof(cb->args[0]));
1869
1870 if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) {
1871 cb->args[1] = m;
1872 goto out;
1873 }
1874 }
1875 read_unlock(&fib_lock);
1876 cb->args[1] = m;
1877 return skb->len;
1878 out:
1879 read_unlock(&fib_lock);
1880 return -1;
1881}
1882
1883/* Fix more generic FIB names for init later */
1884
1885#ifdef CONFIG_IP_MULTIPLE_TABLES
1886struct fib_table * fib_hash_init(int id)
1887#else
1888struct fib_table * __init fib_hash_init(int id)
1889#endif
1890{
1891 struct fib_table *tb;
1892 struct trie *t;
1893
1894 if (fn_alias_kmem == NULL)
1895 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
1896 sizeof(struct fib_alias),
1897 0, SLAB_HWCACHE_ALIGN,
1898 NULL, NULL);
1899
1900 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
1901 GFP_KERNEL);
1902 if (tb == NULL)
1903 return NULL;
1904
1905 tb->tb_id = id;
1906 tb->tb_lookup = fn_trie_lookup;
1907 tb->tb_insert = fn_trie_insert;
1908 tb->tb_delete = fn_trie_delete;
1909 tb->tb_flush = fn_trie_flush;
1910 tb->tb_select_default = fn_trie_select_default;
1911 tb->tb_dump = fn_trie_dump;
1912 memset(tb->tb_data, 0, sizeof(struct trie));
1913
1914 t = (struct trie *) tb->tb_data;
1915
1916 trie_init(t);
1917
1918 if (id == RT_TABLE_LOCAL)
1919 trie_local=t;
1920 else if (id == RT_TABLE_MAIN)
1921 trie_main=t;
1922
1923 if (id == RT_TABLE_LOCAL)
1924 printk("IPv4 FIB: Using LC-trie version %s\n", VERSION);
1925
1926 return tb;
1927}
1928
1929/* Trie dump functions */
1930
1931static void putspace_seq(struct seq_file *seq, int n)
1932{
1933 while (n--) seq_printf(seq, " ");
1934}
1935
1936static void printbin_seq(struct seq_file *seq, unsigned int v, int bits)
1937{
1938 while (bits--)
1939 seq_printf(seq, "%s", (v & (1<<bits))?"1":"0");
1940}
1941
1942static void printnode_seq(struct seq_file *seq, int indent, struct node *n,
1943 int pend, int cindex, int bits)
1944{
1945 putspace_seq(seq, indent);
1946 if (IS_LEAF(n))
1947 seq_printf(seq, "|");
1948 else
1949 seq_printf(seq, "+");
1950 if (bits) {
1951 seq_printf(seq, "%d/", cindex);
1952 printbin_seq(seq, cindex, bits);
1953 seq_printf(seq, ": ");
1954 }
1955 else
1956 seq_printf(seq, "<root>: ");
1957 seq_printf(seq, "%s:%p ", IS_LEAF(n)?"Leaf":"Internal node", n);
1958
1959 if (IS_LEAF(n))
1960 seq_printf(seq, "key=%d.%d.%d.%d\n",
1961 n->key >> 24, (n->key >> 16) % 256, (n->key >> 8) % 256, n->key % 256);
1962 else {
1963 int plen=((struct tnode *)n)->pos;
1964 t_key prf=MASK_PFX(n->key, plen);
1965 seq_printf(seq, "key=%d.%d.%d.%d/%d\n",
1966 prf >> 24, (prf >> 16) % 256, (prf >> 8) % 256, prf % 256, plen);
1967 }
1968 if (IS_LEAF(n)) {
1969 struct leaf *l=(struct leaf *)n;
1970 struct fib_alias *fa;
1971 int i;
1972 for (i=32; i>=0; i--)
1973 if(find_leaf_info(&l->list, i)) {
1974
1975 struct list_head *fa_head = get_fa_head(l, i);
1976
1977 if(!fa_head)
1978 continue;
1979
1980 if(list_empty(fa_head))
1981 continue;
1982
1983 putspace_seq(seq, indent+2);
1984 seq_printf(seq, "{/%d...dumping}\n", i);
1985
1986
1987 list_for_each_entry(fa, fa_head, fa_list) {
1988 putspace_seq(seq, indent+2);
1989 if (fa->fa_info->fib_nh == NULL) {
1990 seq_printf(seq, "Error _fib_nh=NULL\n");
1991 continue;
1992 }
1993 if (fa->fa_info == NULL) {
1994 seq_printf(seq, "Error fa_info=NULL\n");
1995 continue;
1996 }
1997
1998 seq_printf(seq, "{type=%d scope=%d TOS=%d}\n",
1999 fa->fa_type,
2000 fa->fa_scope,
2001 fa->fa_tos);
2002 }
2003 }
2004 }
2005 else if (IS_TNODE(n)) {
2006 struct tnode *tn=(struct tnode *)n;
2007 putspace_seq(seq, indent); seq_printf(seq, "| ");
2008 seq_printf(seq, "{key prefix=%08x/", tn->key&TKEY_GET_MASK(0, tn->pos));
2009 printbin_seq(seq, tkey_extract_bits(tn->key, 0, tn->pos), tn->pos);
2010 seq_printf(seq, "}\n");
2011 putspace_seq(seq, indent); seq_printf(seq, "| ");
2012 seq_printf(seq, "{pos=%d", tn->pos);
2013 seq_printf(seq, " (skip=%d bits)", tn->pos - pend);
2014 seq_printf(seq, " bits=%d (%u children)}\n", tn->bits, (1 << tn->bits));
2015 putspace_seq(seq, indent); seq_printf(seq, "| ");
2016 seq_printf(seq, "{empty=%d full=%d}\n", tn->empty_children, tn->full_children);
2017 }
2018}
2019
2020static void trie_dump_seq(struct seq_file *seq, struct trie *t)
2021{
2022 struct node *n=t->trie;
2023 int cindex=0;
2024 int indent=1;
2025 int pend=0;
2026 int depth = 0;
2027
2028 read_lock(&fib_lock);
2029
2030 seq_printf(seq, "------ trie_dump of t=%p ------\n", t);
2031 if (n) {
2032 printnode_seq(seq, indent, n, pend, cindex, 0);
2033 if (IS_TNODE(n)) {
2034 struct tnode *tn=(struct tnode *)n;
2035 pend = tn->pos+tn->bits;
2036 putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
2037 indent += 3;
2038 depth++;
2039
2040 while (tn && cindex < (1 << tn->bits)) {
2041 if (tn->child[cindex]) {
2042
2043 /* Got a child */
2044
2045 printnode_seq(seq, indent, tn->child[cindex], pend, cindex, tn->bits);
2046 if (IS_LEAF(tn->child[cindex])) {
2047 cindex++;
2048
2049 }
2050 else {
2051 /*
2052 * New tnode. Decend one level
2053 */
2054
2055 depth++;
2056 n=tn->child[cindex];
2057 tn=(struct tnode *)n;
2058 pend=tn->pos+tn->bits;
2059 putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
2060 indent+=3;
2061 cindex=0;
2062 }
2063 }
2064 else
2065 cindex++;
2066
2067 /*
2068 * Test if we are done
2069 */
2070
2071 while (cindex >= (1 << tn->bits)) {
2072
2073 /*
2074 * Move upwards and test for root
2075 * pop off all traversed nodes
2076 */
2077
2078 if (NODE_PARENT(tn) == NULL) {
2079 tn = NULL;
2080 n = NULL;
2081 break;
2082 }
2083 else {
2084 cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
2085 tn = NODE_PARENT(tn);
2086 cindex++;
2087 n=(struct node *)tn;
2088 pend=tn->pos+tn->bits;
2089 indent-=3;
2090 depth--;
2091 }
2092 }
2093 }
2094 }
2095 else n = NULL;
2096 }
2097 else seq_printf(seq, "------ trie is empty\n");
2098
2099 read_unlock(&fib_lock);
2100}
2101
2102static struct trie_stat *trie_stat_new(void)
2103{
2104 struct trie_stat *s = kmalloc(sizeof(struct trie_stat), GFP_KERNEL);
2105 int i;
2106
2107 if(s) {
2108 s->totdepth = 0;
2109 s->maxdepth = 0;
2110 s->tnodes = 0;
2111 s->leaves = 0;
2112 s->nullpointers = 0;
2113
2114 for(i=0; i< MAX_CHILDS; i++)
2115 s->nodesizes[i] = 0;
2116 }
2117 return s;
2118}
2119
2120static struct trie_stat *trie_collect_stats(struct trie *t)
2121{
2122 struct node *n=t->trie;
2123 struct trie_stat *s = trie_stat_new();
2124 int cindex = 0;
2125 int indent = 1;
2126 int pend = 0;
2127 int depth = 0;
2128
2129 read_lock(&fib_lock);
2130
2131 if (s) {
2132 if (n) {
2133 if (IS_TNODE(n)) {
2134 struct tnode *tn = (struct tnode *)n;
2135 pend=tn->pos+tn->bits;
2136 indent += 3;
2137 s->nodesizes[tn->bits]++;
2138 depth++;
2139
2140 while (tn && cindex < (1 << tn->bits)) {
2141 if (tn->child[cindex]) {
2142 /* Got a child */
2143
2144 if (IS_LEAF(tn->child[cindex])) {
2145 cindex++;
2146
2147 /* stats */
2148 if (depth > s->maxdepth)
2149 s->maxdepth = depth;
2150 s->totdepth += depth;
2151 s->leaves++;
2152 }
2153
2154 else {
2155 /*
2156 * New tnode. Decend one level
2157 */
2158
2159 s->tnodes++;
2160 s->nodesizes[tn->bits]++;
2161 depth++;
2162
2163 n = tn->child[cindex];
2164 tn = (struct tnode *)n;
2165 pend = tn->pos+tn->bits;
2166
2167 indent += 3;
2168 cindex = 0;
2169 }
2170 }
2171 else {
2172 cindex++;
2173 s->nullpointers++;
2174 }
2175
2176 /*
2177 * Test if we are done
2178 */
2179
2180 while (cindex >= (1 << tn->bits)) {
2181
2182 /*
2183 * Move upwards and test for root
2184 * pop off all traversed nodes
2185 */
2186
2187
2188 if (NODE_PARENT(tn) == NULL) {
2189 tn = NULL;
2190 n = NULL;
2191 break;
2192 }
2193 else {
2194 cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
2195 tn = NODE_PARENT(tn);
2196 cindex++;
2197 n = (struct node *)tn;
2198 pend=tn->pos+tn->bits;
2199 indent -= 3;
2200 depth--;
2201 }
2202 }
2203 }
2204 }
2205 else n = NULL;
2206 }
2207 }
2208
2209 read_unlock(&fib_lock);
2210 return s;
2211}
2212
2213#ifdef CONFIG_PROC_FS
2214
2215static struct fib_alias *fib_triestat_get_first(struct seq_file *seq)
2216{
2217 return NULL;
2218}
2219
2220static struct fib_alias *fib_triestat_get_next(struct seq_file *seq)
2221{
2222 return NULL;
2223}
2224
2225static void *fib_triestat_seq_start(struct seq_file *seq, loff_t *pos)
2226{
2227 void *v = NULL;
2228
2229 if (ip_fib_main_table)
2230 v = *pos ? fib_triestat_get_next(seq) : SEQ_START_TOKEN;
2231 return v;
2232}
2233
2234static void *fib_triestat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2235{
2236 ++*pos;
2237 return v == SEQ_START_TOKEN ? fib_triestat_get_first(seq) : fib_triestat_get_next(seq);
2238}
2239
2240static void fib_triestat_seq_stop(struct seq_file *seq, void *v)
2241{
2242
2243}
2244
2245/*
2246 * This outputs /proc/net/fib_triestats
2247 *
2248 * It always works in backward compatibility mode.
2249 * The format of the file is not supposed to be changed.
2250 */
2251
2252static void collect_and_show(struct trie *t, struct seq_file *seq)
2253{
2254 int bytes = 0; /* How many bytes are used, a ref is 4 bytes */
2255 int i, max, pointers;
2256 struct trie_stat *stat;
2257 int avdepth;
2258
2259 stat = trie_collect_stats(t);
2260
2261 bytes=0;
2262 seq_printf(seq, "trie=%p\n", t);
2263
2264 if (stat) {
2265 if (stat->leaves)
2266 avdepth=stat->totdepth*100 / stat->leaves;
2267 else
2268 avdepth=0;
2269 seq_printf(seq, "Aver depth: %d.%02d\n", avdepth / 100, avdepth % 100 );
2270 seq_printf(seq, "Max depth: %4d\n", stat->maxdepth);
2271
2272 seq_printf(seq, "Leaves: %d\n", stat->leaves);
2273 bytes += sizeof(struct leaf) * stat->leaves;
2274 seq_printf(seq, "Internal nodes: %d\n", stat->tnodes);
2275 bytes += sizeof(struct tnode) * stat->tnodes;
2276
2277 max = MAX_CHILDS-1;
2278
2279 while (max >= 0 && stat->nodesizes[max] == 0)
2280 max--;
2281 pointers = 0;
2282
2283 for (i = 1; i <= max; i++)
2284 if (stat->nodesizes[i] != 0) {
2285 seq_printf(seq, " %d: %d", i, stat->nodesizes[i]);
2286 pointers += (1<<i) * stat->nodesizes[i];
2287 }
2288 seq_printf(seq, "\n");
2289 seq_printf(seq, "Pointers: %d\n", pointers);
2290 bytes += sizeof(struct node *) * pointers;
2291 seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers);
2292 seq_printf(seq, "Total size: %d kB\n", bytes / 1024);
2293
2294 kfree(stat);
2295 }
2296
2297#ifdef CONFIG_IP_FIB_TRIE_STATS
2298 seq_printf(seq, "Counters:\n---------\n");
2299 seq_printf(seq,"gets = %d\n", t->stats.gets);
2300 seq_printf(seq,"backtracks = %d\n", t->stats.backtrack);
2301 seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed);
2302 seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss);
2303 seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit);
2304#ifdef CLEAR_STATS
2305 memset(&(t->stats), 0, sizeof(t->stats));
2306#endif
2307#endif /* CONFIG_IP_FIB_TRIE_STATS */
2308}
2309
2310static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2311{
2312 char bf[128];
2313
2314 if (v == SEQ_START_TOKEN) {
2315 seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
2316 sizeof(struct leaf), sizeof(struct tnode));
2317 if (trie_local)
2318 collect_and_show(trie_local, seq);
2319
2320 if (trie_main)
2321 collect_and_show(trie_main, seq);
2322 }
2323 else {
2324 snprintf(bf, sizeof(bf),
2325 "*\t%08X\t%08X", 200, 400);
2326
2327 seq_printf(seq, "%-127s\n", bf);
2328 }
2329 return 0;
2330}
2331
2332static struct seq_operations fib_triestat_seq_ops = {
2333 .start = fib_triestat_seq_start,
2334 .next = fib_triestat_seq_next,
2335 .stop = fib_triestat_seq_stop,
2336 .show = fib_triestat_seq_show,
2337};
2338
2339static int fib_triestat_seq_open(struct inode *inode, struct file *file)
2340{
2341 struct seq_file *seq;
2342 int rc = -ENOMEM;
2343
2344 rc = seq_open(file, &fib_triestat_seq_ops);
2345 if (rc)
2346 goto out_kfree;
2347
2348 seq = file->private_data;
2349out:
2350 return rc;
2351out_kfree:
2352 goto out;
2353}
2354
2355static struct file_operations fib_triestat_seq_fops = {
2356 .owner = THIS_MODULE,
2357 .open = fib_triestat_seq_open,
2358 .read = seq_read,
2359 .llseek = seq_lseek,
2360 .release = seq_release_private,
2361};
2362
2363int __init fib_stat_proc_init(void)
2364{
2365 if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_seq_fops))
2366 return -ENOMEM;
2367 return 0;
2368}
2369
2370void __init fib_stat_proc_exit(void)
2371{
2372 proc_net_remove("fib_triestat");
2373}
2374
2375static struct fib_alias *fib_trie_get_first(struct seq_file *seq)
2376{
2377 return NULL;
2378}
2379
2380static struct fib_alias *fib_trie_get_next(struct seq_file *seq)
2381{
2382 return NULL;
2383}
2384
2385static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
2386{
2387 void *v = NULL;
2388
2389 if (ip_fib_main_table)
2390 v = *pos ? fib_trie_get_next(seq) : SEQ_START_TOKEN;
2391 return v;
2392}
2393
2394static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2395{
2396 ++*pos;
2397 return v == SEQ_START_TOKEN ? fib_trie_get_first(seq) : fib_trie_get_next(seq);
2398}
2399
2400static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2401{
2402
2403}
2404
2405/*
2406 * This outputs /proc/net/fib_trie.
2407 *
2408 * It always works in backward compatibility mode.
2409 * The format of the file is not supposed to be changed.
2410 */
2411
2412static int fib_trie_seq_show(struct seq_file *seq, void *v)
2413{
2414 char bf[128];
2415
2416 if (v == SEQ_START_TOKEN) {
2417 if (trie_local)
2418 trie_dump_seq(seq, trie_local);
2419
2420 if (trie_main)
2421 trie_dump_seq(seq, trie_main);
2422 }
2423
2424 else {
2425 snprintf(bf, sizeof(bf),
2426 "*\t%08X\t%08X", 200, 400);
2427 seq_printf(seq, "%-127s\n", bf);
2428 }
2429
2430 return 0;
2431}
2432
2433static struct seq_operations fib_trie_seq_ops = {
2434 .start = fib_trie_seq_start,
2435 .next = fib_trie_seq_next,
2436 .stop = fib_trie_seq_stop,
2437 .show = fib_trie_seq_show,
2438};
2439
2440static int fib_trie_seq_open(struct inode *inode, struct file *file)
2441{
2442 struct seq_file *seq;
2443 int rc = -ENOMEM;
2444
2445 rc = seq_open(file, &fib_trie_seq_ops);
2446 if (rc)
2447 goto out_kfree;
2448
2449 seq = file->private_data;
2450out:
2451 return rc;
2452out_kfree:
2453 goto out;
2454}
2455
2456static struct file_operations fib_trie_seq_fops = {
2457 .owner = THIS_MODULE,
2458 .open = fib_trie_seq_open,
2459 .read = seq_read,
2460 .llseek = seq_lseek,
2461 .release = seq_release_private,
2462};
2463
2464int __init fib_proc_init(void)
2465{
2466 if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_seq_fops))
2467 return -ENOMEM;
2468 return 0;
2469}
2470
2471void __init fib_proc_exit(void)
2472{
2473 proc_net_remove("fib_trie");
2474}
2475
2476#endif /* CONFIG_PROC_FS */