blob: 395f64df6f9a3d1fea9de6ea9176d6d44b1234d4 [file] [log] [blame]
Robert Olsson19baf832005-06-21 12:43:18 -07001/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version
5 * 2 of the License, or (at your option) any later version.
6 *
7 * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
8 * & Swedish University of Agricultural Sciences.
9 *
10 * Jens Laas <jens.laas@data.slu.se> Swedish University of
11 * Agricultural Sciences.
12 *
13 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
14 *
15 * This work is based on the LPC-trie which is originally descibed in:
16 *
17 * An experimental study of compression methods for dynamic tries
18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
19 * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
20 *
21 *
22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
23 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
24 *
25 * Version: $Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $
26 *
27 *
28 * Code from fib_hash has been reused which includes the following header:
29 *
30 *
31 * INET An implementation of the TCP/IP protocol suite for the LINUX
32 * operating system. INET is implemented using the BSD Socket
33 * interface as the means of communication with the user level.
34 *
35 * IPv4 FIB: lookup engine and maintenance routines.
36 *
37 *
38 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
39 *
40 * This program is free software; you can redistribute it and/or
41 * modify it under the terms of the GNU General Public License
42 * as published by the Free Software Foundation; either version
43 * 2 of the License, or (at your option) any later version.
44 */
45
Robert Olsson2f368952005-07-05 15:02:40 -070046#define VERSION "0.325"
Robert Olsson19baf832005-06-21 12:43:18 -070047
48#include <linux/config.h>
49#include <asm/uaccess.h>
50#include <asm/system.h>
51#include <asm/bitops.h>
52#include <linux/types.h>
53#include <linux/kernel.h>
54#include <linux/sched.h>
55#include <linux/mm.h>
56#include <linux/string.h>
57#include <linux/socket.h>
58#include <linux/sockios.h>
59#include <linux/errno.h>
60#include <linux/in.h>
61#include <linux/inet.h>
62#include <linux/netdevice.h>
63#include <linux/if_arp.h>
64#include <linux/proc_fs.h>
65#include <linux/skbuff.h>
66#include <linux/netlink.h>
67#include <linux/init.h>
68#include <linux/list.h>
69#include <net/ip.h>
70#include <net/protocol.h>
71#include <net/route.h>
72#include <net/tcp.h>
73#include <net/sock.h>
74#include <net/ip_fib.h>
75#include "fib_lookup.h"
76
77#undef CONFIG_IP_FIB_TRIE_STATS
78#define MAX_CHILDS 16384
79
Robert Olsson19baf832005-06-21 12:43:18 -070080#define KEYLENGTH (8*sizeof(t_key))
81#define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l))
82#define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset))
83
84static DEFINE_RWLOCK(fib_lock);
85
86typedef unsigned int t_key;
87
88#define T_TNODE 0
89#define T_LEAF 1
90#define NODE_TYPE_MASK 0x1UL
Olof Johansson91b9a272005-08-09 20:24:39 -070091#define NODE_PARENT(node) \
92 ((struct tnode *)((node)->parent & ~NODE_TYPE_MASK))
93#define NODE_SET_PARENT(node, ptr) \
94 ((node)->parent = (((unsigned long)(ptr)) | \
95 ((node)->parent & NODE_TYPE_MASK)))
96#define NODE_INIT_PARENT(node, type) \
97 ((node)->parent = (type))
98#define NODE_TYPE(node) \
99 ((node)->parent & NODE_TYPE_MASK)
Robert Olsson19baf832005-06-21 12:43:18 -0700100
Olof Johansson91b9a272005-08-09 20:24:39 -0700101#define IS_TNODE(n) (!(n->parent & T_LEAF))
102#define IS_LEAF(n) (n->parent & T_LEAF)
Robert Olsson19baf832005-06-21 12:43:18 -0700103
104struct node {
Olof Johansson91b9a272005-08-09 20:24:39 -0700105 t_key key;
106 unsigned long parent;
Robert Olsson19baf832005-06-21 12:43:18 -0700107};
108
109struct leaf {
Olof Johansson91b9a272005-08-09 20:24:39 -0700110 t_key key;
111 unsigned long parent;
Robert Olsson19baf832005-06-21 12:43:18 -0700112 struct hlist_head list;
113};
114
115struct leaf_info {
116 struct hlist_node hlist;
117 int plen;
118 struct list_head falh;
119};
120
121struct tnode {
Olof Johansson91b9a272005-08-09 20:24:39 -0700122 t_key key;
123 unsigned long parent;
124 unsigned short pos:5; /* 2log(KEYLENGTH) bits needed */
125 unsigned short bits:5; /* 2log(KEYLENGTH) bits needed */
126 unsigned short full_children; /* KEYLENGTH bits needed */
127 unsigned short empty_children; /* KEYLENGTH bits needed */
128 struct node *child[0];
Robert Olsson19baf832005-06-21 12:43:18 -0700129};
130
131#ifdef CONFIG_IP_FIB_TRIE_STATS
132struct trie_use_stats {
133 unsigned int gets;
134 unsigned int backtrack;
135 unsigned int semantic_match_passed;
136 unsigned int semantic_match_miss;
137 unsigned int null_node_hit;
Robert Olsson2f368952005-07-05 15:02:40 -0700138 unsigned int resize_node_skipped;
Robert Olsson19baf832005-06-21 12:43:18 -0700139};
140#endif
141
142struct trie_stat {
143 unsigned int totdepth;
144 unsigned int maxdepth;
145 unsigned int tnodes;
146 unsigned int leaves;
147 unsigned int nullpointers;
148 unsigned int nodesizes[MAX_CHILDS];
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700149};
Robert Olsson19baf832005-06-21 12:43:18 -0700150
151struct trie {
Olof Johansson91b9a272005-08-09 20:24:39 -0700152 struct node *trie;
Robert Olsson19baf832005-06-21 12:43:18 -0700153#ifdef CONFIG_IP_FIB_TRIE_STATS
154 struct trie_use_stats stats;
155#endif
Olof Johansson91b9a272005-08-09 20:24:39 -0700156 int size;
Robert Olsson19baf832005-06-21 12:43:18 -0700157 unsigned int revision;
158};
159
160static int trie_debug = 0;
161
Olof Johansson91b9a272005-08-09 20:24:39 -0700162#define DBG(x...) do { if (trie_debug) printk(x); } while (0)
163
Robert Olsson19baf832005-06-21 12:43:18 -0700164static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
165static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull);
Robert Olsson19baf832005-06-21 12:43:18 -0700166static struct node *resize(struct trie *t, struct tnode *tn);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700167static struct tnode *inflate(struct trie *t, struct tnode *tn);
168static struct tnode *halve(struct trie *t, struct tnode *tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700169static void tnode_free(struct tnode *tn);
170static void trie_dump_seq(struct seq_file *seq, struct trie *t);
171extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio);
172extern int fib_detect_death(struct fib_info *fi, int order,
Olof Johansson91b9a272005-08-09 20:24:39 -0700173 struct fib_info **last_resort, int *last_idx, int *dflt);
Robert Olsson19baf832005-06-21 12:43:18 -0700174
175extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, int z, int tb_id,
Olof Johansson91b9a272005-08-09 20:24:39 -0700176 struct nlmsghdr *n, struct netlink_skb_parms *req);
Robert Olsson19baf832005-06-21 12:43:18 -0700177
178static kmem_cache_t *fn_alias_kmem;
179static struct trie *trie_local = NULL, *trie_main = NULL;
180
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700181static inline struct node *tnode_get_child(struct tnode *tn, int i)
Robert Olsson19baf832005-06-21 12:43:18 -0700182{
Olof Johansson91b9a272005-08-09 20:24:39 -0700183 BUG_ON(i >= 1 << tn->bits);
Robert Olsson19baf832005-06-21 12:43:18 -0700184
Olof Johansson91b9a272005-08-09 20:24:39 -0700185 return tn->child[i];
Robert Olsson19baf832005-06-21 12:43:18 -0700186}
187
Stephen Hemmignerbb435b82005-08-09 20:25:39 -0700188static inline int tnode_child_length(const struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700189{
Olof Johansson91b9a272005-08-09 20:24:39 -0700190 return 1 << tn->bits;
Robert Olsson19baf832005-06-21 12:43:18 -0700191}
192
Robert Olsson19baf832005-06-21 12:43:18 -0700193static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
194{
Olof Johansson91b9a272005-08-09 20:24:39 -0700195 if (offset < KEYLENGTH)
Robert Olsson19baf832005-06-21 12:43:18 -0700196 return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
Olof Johansson91b9a272005-08-09 20:24:39 -0700197 else
Robert Olsson19baf832005-06-21 12:43:18 -0700198 return 0;
199}
200
201static inline int tkey_equals(t_key a, t_key b)
202{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700203 return a == b;
Robert Olsson19baf832005-06-21 12:43:18 -0700204}
205
206static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
207{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700208 if (bits == 0 || offset >= KEYLENGTH)
209 return 1;
Olof Johansson91b9a272005-08-09 20:24:39 -0700210 bits = bits > KEYLENGTH ? KEYLENGTH : bits;
211 return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700212}
Robert Olsson19baf832005-06-21 12:43:18 -0700213
214static inline int tkey_mismatch(t_key a, int offset, t_key b)
215{
216 t_key diff = a ^ b;
217 int i = offset;
218
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700219 if (!diff)
220 return 0;
221 while ((diff << i) >> (KEYLENGTH-1) == 0)
Robert Olsson19baf832005-06-21 12:43:18 -0700222 i++;
223 return i;
224}
225
Olof Johansson91b9a272005-08-09 20:24:39 -0700226/* Candidate for fib_semantics */
Robert Olsson19baf832005-06-21 12:43:18 -0700227
228static void fn_free_alias(struct fib_alias *fa)
229{
230 fib_release_info(fa->fa_info);
231 kmem_cache_free(fn_alias_kmem, fa);
232}
233
234/*
235 To understand this stuff, an understanding of keys and all their bits is
236 necessary. Every node in the trie has a key associated with it, but not
237 all of the bits in that key are significant.
238
239 Consider a node 'n' and its parent 'tp'.
240
241 If n is a leaf, every bit in its key is significant. Its presence is
242 necessitaded by path compression, since during a tree traversal (when
243 searching for a leaf - unless we are doing an insertion) we will completely
244 ignore all skipped bits we encounter. Thus we need to verify, at the end of
245 a potentially successful search, that we have indeed been walking the
246 correct key path.
247
248 Note that we can never "miss" the correct key in the tree if present by
249 following the wrong path. Path compression ensures that segments of the key
250 that are the same for all keys with a given prefix are skipped, but the
251 skipped part *is* identical for each node in the subtrie below the skipped
252 bit! trie_insert() in this implementation takes care of that - note the
253 call to tkey_sub_equals() in trie_insert().
254
255 if n is an internal node - a 'tnode' here, the various parts of its key
256 have many different meanings.
257
258 Example:
259 _________________________________________________________________
260 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
261 -----------------------------------------------------------------
262 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
263
264 _________________________________________________________________
265 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
266 -----------------------------------------------------------------
267 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
268
269 tp->pos = 7
270 tp->bits = 3
271 n->pos = 15
Olof Johansson91b9a272005-08-09 20:24:39 -0700272 n->bits = 4
Robert Olsson19baf832005-06-21 12:43:18 -0700273
274 First, let's just ignore the bits that come before the parent tp, that is
275 the bits from 0 to (tp->pos-1). They are *known* but at this point we do
276 not use them for anything.
277
278 The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
279 index into the parent's child array. That is, they will be used to find
280 'n' among tp's children.
281
282 The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
283 for the node n.
284
285 All the bits we have seen so far are significant to the node n. The rest
286 of the bits are really not needed or indeed known in n->key.
287
288 The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
289 n's child array, and will of course be different for each child.
290
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700291
Robert Olsson19baf832005-06-21 12:43:18 -0700292 The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
293 at this point.
294
295*/
296
297static void check_tnode(struct tnode *tn)
298{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700299 if (tn && tn->pos+tn->bits > 32) {
Robert Olsson19baf832005-06-21 12:43:18 -0700300 printk("TNODE ERROR tn=%p, pos=%d, bits=%d\n", tn, tn->pos, tn->bits);
301 }
302}
303
304static int halve_threshold = 25;
305static int inflate_threshold = 50;
306
307static struct leaf *leaf_new(void)
308{
309 struct leaf *l = kmalloc(sizeof(struct leaf), GFP_KERNEL);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700310 if (l) {
Robert Olsson19baf832005-06-21 12:43:18 -0700311 NODE_INIT_PARENT(l, T_LEAF);
312 INIT_HLIST_HEAD(&l->list);
313 }
314 return l;
315}
316
317static struct leaf_info *leaf_info_new(int plen)
318{
319 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
Olof Johansson91b9a272005-08-09 20:24:39 -0700320
321 if (!li)
322 return NULL;
323
324 li->plen = plen;
325 INIT_LIST_HEAD(&li->falh);
326
Robert Olsson19baf832005-06-21 12:43:18 -0700327 return li;
328}
329
330static inline void free_leaf(struct leaf *l)
331{
332 kfree(l);
333}
334
335static inline void free_leaf_info(struct leaf_info *li)
336{
337 kfree(li);
338}
339
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700340static struct tnode *tnode_alloc(unsigned int size)
341{
342 if (size <= PAGE_SIZE) {
343 return kmalloc(size, GFP_KERNEL);
344 } else {
345 return (struct tnode *)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700346 __get_free_pages(GFP_KERNEL, get_order(size));
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700347 }
348}
349
350static void __tnode_free(struct tnode *tn)
351{
352 unsigned int size = sizeof(struct tnode) +
Olof Johansson91b9a272005-08-09 20:24:39 -0700353 (1 << tn->bits) * sizeof(struct node *);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700354
355 if (size <= PAGE_SIZE)
356 kfree(tn);
357 else
358 free_pages((unsigned long)tn, get_order(size));
359}
360
Robert Olsson19baf832005-06-21 12:43:18 -0700361static struct tnode* tnode_new(t_key key, int pos, int bits)
362{
363 int nchildren = 1<<bits;
364 int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700365 struct tnode *tn = tnode_alloc(sz);
Robert Olsson19baf832005-06-21 12:43:18 -0700366
Olof Johansson91b9a272005-08-09 20:24:39 -0700367 if (tn) {
Robert Olsson19baf832005-06-21 12:43:18 -0700368 memset(tn, 0, sz);
369 NODE_INIT_PARENT(tn, T_TNODE);
370 tn->pos = pos;
371 tn->bits = bits;
372 tn->key = key;
373 tn->full_children = 0;
374 tn->empty_children = 1<<bits;
375 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700376
Olof Johansson91b9a272005-08-09 20:24:39 -0700377 DBG("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode),
378 (unsigned int) (sizeof(struct node) * 1<<bits));
Robert Olsson19baf832005-06-21 12:43:18 -0700379 return tn;
380}
381
382static void tnode_free(struct tnode *tn)
383{
Olof Johansson91b9a272005-08-09 20:24:39 -0700384 BUG_ON(!tn);
385
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700386 if (IS_LEAF(tn)) {
Robert Olsson19baf832005-06-21 12:43:18 -0700387 free_leaf((struct leaf *)tn);
Olof Johansson91b9a272005-08-09 20:24:39 -0700388 DBG("FL %p \n", tn);
389 } else {
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700390 __tnode_free(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -0700391 DBG("FT %p \n", tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700392 }
393}
394
395/*
396 * Check whether a tnode 'n' is "full", i.e. it is an internal node
397 * and no bits are skipped. See discussion in dyntree paper p. 6
398 */
399
Stephen Hemmignerbb435b82005-08-09 20:25:39 -0700400static inline int tnode_full(const struct tnode *tn, const struct node *n)
Robert Olsson19baf832005-06-21 12:43:18 -0700401{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700402 if (n == NULL || IS_LEAF(n))
Robert Olsson19baf832005-06-21 12:43:18 -0700403 return 0;
404
405 return ((struct tnode *) n)->pos == tn->pos + tn->bits;
406}
407
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700408static inline void put_child(struct trie *t, struct tnode *tn, int i, struct node *n)
Robert Olsson19baf832005-06-21 12:43:18 -0700409{
410 tnode_put_child_reorg(tn, i, n, -1);
411}
412
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700413 /*
Robert Olsson19baf832005-06-21 12:43:18 -0700414 * Add a child at position i overwriting the old value.
415 * Update the value of full_children and empty_children.
416 */
417
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700418static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700419{
420 struct node *chi;
421 int isfull;
422
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700423 if (i >= 1<<tn->bits) {
Robert Olsson19baf832005-06-21 12:43:18 -0700424 printk("bits=%d, i=%d\n", tn->bits, i);
Olof Johansson91b9a272005-08-09 20:24:39 -0700425 BUG();
Robert Olsson19baf832005-06-21 12:43:18 -0700426 }
427 write_lock_bh(&fib_lock);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700428 chi = tn->child[i];
Robert Olsson19baf832005-06-21 12:43:18 -0700429
430 /* update emptyChildren */
431 if (n == NULL && chi != NULL)
432 tn->empty_children++;
433 else if (n != NULL && chi == NULL)
434 tn->empty_children--;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700435
Robert Olsson19baf832005-06-21 12:43:18 -0700436 /* update fullChildren */
Olof Johansson91b9a272005-08-09 20:24:39 -0700437 if (wasfull == -1)
Robert Olsson19baf832005-06-21 12:43:18 -0700438 wasfull = tnode_full(tn, chi);
439
440 isfull = tnode_full(tn, n);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700441 if (wasfull && !isfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700442 tn->full_children--;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700443 else if (!wasfull && isfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700444 tn->full_children++;
Olof Johansson91b9a272005-08-09 20:24:39 -0700445
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700446 if (n)
447 NODE_SET_PARENT(n, tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700448
449 tn->child[i] = n;
450 write_unlock_bh(&fib_lock);
451}
452
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700453static struct node *resize(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700454{
455 int i;
Robert Olsson2f368952005-07-05 15:02:40 -0700456 int err = 0;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700457 struct tnode *old_tn;
Robert Olsson19baf832005-06-21 12:43:18 -0700458
459 if (!tn)
460 return NULL;
461
Olof Johansson91b9a272005-08-09 20:24:39 -0700462 DBG("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
463 tn, inflate_threshold, halve_threshold);
Robert Olsson19baf832005-06-21 12:43:18 -0700464
465 /* No children */
466 if (tn->empty_children == tnode_child_length(tn)) {
467 tnode_free(tn);
468 return NULL;
469 }
470 /* One child */
471 if (tn->empty_children == tnode_child_length(tn) - 1)
472 for (i = 0; i < tnode_child_length(tn); i++) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700473 struct node *n;
Robert Olsson19baf832005-06-21 12:43:18 -0700474
475 write_lock_bh(&fib_lock);
Olof Johansson91b9a272005-08-09 20:24:39 -0700476 n = tn->child[i];
477 if (!n) {
Robert Olsson19baf832005-06-21 12:43:18 -0700478 write_unlock_bh(&fib_lock);
Olof Johansson91b9a272005-08-09 20:24:39 -0700479 continue;
Robert Olsson19baf832005-06-21 12:43:18 -0700480 }
Olof Johansson91b9a272005-08-09 20:24:39 -0700481
482 /* compress one level */
483 NODE_INIT_PARENT(n, NODE_TYPE(n));
484
Robert Olsson19baf832005-06-21 12:43:18 -0700485 write_unlock_bh(&fib_lock);
Olof Johansson91b9a272005-08-09 20:24:39 -0700486 tnode_free(tn);
487 return n;
Robert Olsson19baf832005-06-21 12:43:18 -0700488 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700489 /*
Robert Olsson19baf832005-06-21 12:43:18 -0700490 * Double as long as the resulting node has a number of
491 * nonempty nodes that are above the threshold.
492 */
493
494 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700495 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
496 * the Helsinki University of Technology and Matti Tikkanen of Nokia
Robert Olsson19baf832005-06-21 12:43:18 -0700497 * Telecommunications, page 6:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700498 * "A node is doubled if the ratio of non-empty children to all
Robert Olsson19baf832005-06-21 12:43:18 -0700499 * children in the *doubled* node is at least 'high'."
500 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700501 * 'high' in this instance is the variable 'inflate_threshold'. It
502 * is expressed as a percentage, so we multiply it with
503 * tnode_child_length() and instead of multiplying by 2 (since the
504 * child array will be doubled by inflate()) and multiplying
505 * the left-hand side by 100 (to handle the percentage thing) we
Robert Olsson19baf832005-06-21 12:43:18 -0700506 * multiply the left-hand side by 50.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700507 *
508 * The left-hand side may look a bit weird: tnode_child_length(tn)
509 * - tn->empty_children is of course the number of non-null children
510 * in the current node. tn->full_children is the number of "full"
Robert Olsson19baf832005-06-21 12:43:18 -0700511 * children, that is non-null tnodes with a skip value of 0.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700512 * All of those will be doubled in the resulting inflated tnode, so
Robert Olsson19baf832005-06-21 12:43:18 -0700513 * we just count them one extra time here.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700514 *
Robert Olsson19baf832005-06-21 12:43:18 -0700515 * A clearer way to write this would be:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700516 *
Robert Olsson19baf832005-06-21 12:43:18 -0700517 * to_be_doubled = tn->full_children;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700518 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
Robert Olsson19baf832005-06-21 12:43:18 -0700519 * tn->full_children;
520 *
521 * new_child_length = tnode_child_length(tn) * 2;
522 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700523 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
Robert Olsson19baf832005-06-21 12:43:18 -0700524 * new_child_length;
525 * if (new_fill_factor >= inflate_threshold)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700526 *
527 * ...and so on, tho it would mess up the while () loop.
528 *
Robert Olsson19baf832005-06-21 12:43:18 -0700529 * anyway,
530 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
531 * inflate_threshold
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700532 *
Robert Olsson19baf832005-06-21 12:43:18 -0700533 * avoid a division:
534 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
535 * inflate_threshold * new_child_length
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700536 *
Robert Olsson19baf832005-06-21 12:43:18 -0700537 * expand not_to_be_doubled and to_be_doubled, and shorten:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700538 * 100 * (tnode_child_length(tn) - tn->empty_children +
Olof Johansson91b9a272005-08-09 20:24:39 -0700539 * tn->full_children) >= inflate_threshold * new_child_length
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700540 *
Robert Olsson19baf832005-06-21 12:43:18 -0700541 * expand new_child_length:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700542 * 100 * (tnode_child_length(tn) - tn->empty_children +
Olof Johansson91b9a272005-08-09 20:24:39 -0700543 * tn->full_children) >=
Robert Olsson19baf832005-06-21 12:43:18 -0700544 * inflate_threshold * tnode_child_length(tn) * 2
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700545 *
Robert Olsson19baf832005-06-21 12:43:18 -0700546 * shorten again:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700547 * 50 * (tn->full_children + tnode_child_length(tn) -
Olof Johansson91b9a272005-08-09 20:24:39 -0700548 * tn->empty_children) >= inflate_threshold *
Robert Olsson19baf832005-06-21 12:43:18 -0700549 * tnode_child_length(tn)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700550 *
Robert Olsson19baf832005-06-21 12:43:18 -0700551 */
552
553 check_tnode(tn);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700554
Robert Olsson2f368952005-07-05 15:02:40 -0700555 err = 0;
Robert Olsson19baf832005-06-21 12:43:18 -0700556 while ((tn->full_children > 0 &&
557 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
558 inflate_threshold * tnode_child_length(tn))) {
559
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700560 old_tn = tn;
561 tn = inflate(t, tn);
562 if (IS_ERR(tn)) {
563 tn = old_tn;
Robert Olsson2f368952005-07-05 15:02:40 -0700564#ifdef CONFIG_IP_FIB_TRIE_STATS
565 t->stats.resize_node_skipped++;
566#endif
567 break;
568 }
Robert Olsson19baf832005-06-21 12:43:18 -0700569 }
570
571 check_tnode(tn);
572
573 /*
574 * Halve as long as the number of empty children in this
575 * node is above threshold.
576 */
Robert Olsson2f368952005-07-05 15:02:40 -0700577
578 err = 0;
Robert Olsson19baf832005-06-21 12:43:18 -0700579 while (tn->bits > 1 &&
580 100 * (tnode_child_length(tn) - tn->empty_children) <
Robert Olsson2f368952005-07-05 15:02:40 -0700581 halve_threshold * tnode_child_length(tn)) {
Robert Olsson19baf832005-06-21 12:43:18 -0700582
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700583 old_tn = tn;
584 tn = halve(t, tn);
585 if (IS_ERR(tn)) {
586 tn = old_tn;
Robert Olsson2f368952005-07-05 15:02:40 -0700587#ifdef CONFIG_IP_FIB_TRIE_STATS
588 t->stats.resize_node_skipped++;
589#endif
590 break;
591 }
592 }
593
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700594
Robert Olsson19baf832005-06-21 12:43:18 -0700595 /* Only one child remains */
596
597 if (tn->empty_children == tnode_child_length(tn) - 1)
598 for (i = 0; i < tnode_child_length(tn); i++) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700599 struct node *n;
600
Robert Olsson19baf832005-06-21 12:43:18 -0700601 write_lock_bh(&fib_lock);
Robert Olsson19baf832005-06-21 12:43:18 -0700602
Olof Johansson91b9a272005-08-09 20:24:39 -0700603 n = tn->child[i];
604 if (!n) {
Robert Olsson19baf832005-06-21 12:43:18 -0700605 write_unlock_bh(&fib_lock);
Olof Johansson91b9a272005-08-09 20:24:39 -0700606 continue;
Robert Olsson19baf832005-06-21 12:43:18 -0700607 }
Olof Johansson91b9a272005-08-09 20:24:39 -0700608
609 /* compress one level */
610
611 NODE_INIT_PARENT(n, NODE_TYPE(n));
612
Robert Olsson19baf832005-06-21 12:43:18 -0700613 write_unlock_bh(&fib_lock);
Olof Johansson91b9a272005-08-09 20:24:39 -0700614 tnode_free(tn);
615 return n;
Robert Olsson19baf832005-06-21 12:43:18 -0700616 }
617
618 return (struct node *) tn;
619}
620
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700621static struct tnode *inflate(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700622{
623 struct tnode *inode;
624 struct tnode *oldtnode = tn;
625 int olen = tnode_child_length(tn);
626 int i;
627
Olof Johansson91b9a272005-08-09 20:24:39 -0700628 DBG("In inflate\n");
Robert Olsson19baf832005-06-21 12:43:18 -0700629
630 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
631
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700632 if (!tn)
633 return ERR_PTR(-ENOMEM);
Robert Olsson2f368952005-07-05 15:02:40 -0700634
635 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700636 * Preallocate and store tnodes before the actual work so we
637 * don't get into an inconsistent state if memory allocation
638 * fails. In case of failure we return the oldnode and inflate
Robert Olsson2f368952005-07-05 15:02:40 -0700639 * of tnode is ignored.
640 */
Olof Johansson91b9a272005-08-09 20:24:39 -0700641
642 for (i = 0; i < olen; i++) {
Robert Olsson2f368952005-07-05 15:02:40 -0700643 struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i);
644
645 if (inode &&
646 IS_TNODE(inode) &&
647 inode->pos == oldtnode->pos + oldtnode->bits &&
648 inode->bits > 1) {
649 struct tnode *left, *right;
Robert Olsson2f368952005-07-05 15:02:40 -0700650 t_key m = TKEY_GET_MASK(inode->pos, 1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700651
Robert Olsson2f368952005-07-05 15:02:40 -0700652 left = tnode_new(inode->key&(~m), inode->pos + 1,
653 inode->bits - 1);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700654 if (!left)
655 goto nomem;
Olof Johansson91b9a272005-08-09 20:24:39 -0700656
Robert Olsson2f368952005-07-05 15:02:40 -0700657 right = tnode_new(inode->key|m, inode->pos + 1,
658 inode->bits - 1);
659
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700660 if (!right) {
661 tnode_free(left);
662 goto nomem;
663 }
Robert Olsson2f368952005-07-05 15:02:40 -0700664
665 put_child(t, tn, 2*i, (struct node *) left);
666 put_child(t, tn, 2*i+1, (struct node *) right);
667 }
668 }
669
Olof Johansson91b9a272005-08-09 20:24:39 -0700670 for (i = 0; i < olen; i++) {
Robert Olsson19baf832005-06-21 12:43:18 -0700671 struct node *node = tnode_get_child(oldtnode, i);
Olof Johansson91b9a272005-08-09 20:24:39 -0700672 struct tnode *left, *right;
673 int size, j;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700674
Robert Olsson19baf832005-06-21 12:43:18 -0700675 /* An empty child */
676 if (node == NULL)
677 continue;
678
679 /* A leaf or an internal node with skipped bits */
680
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700681 if (IS_LEAF(node) || ((struct tnode *) node)->pos >
Robert Olsson19baf832005-06-21 12:43:18 -0700682 tn->pos + tn->bits - 1) {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700683 if (tkey_extract_bits(node->key, oldtnode->pos + oldtnode->bits,
Robert Olsson19baf832005-06-21 12:43:18 -0700684 1) == 0)
685 put_child(t, tn, 2*i, node);
686 else
687 put_child(t, tn, 2*i+1, node);
688 continue;
689 }
690
691 /* An internal node with two children */
692 inode = (struct tnode *) node;
693
694 if (inode->bits == 1) {
695 put_child(t, tn, 2*i, inode->child[0]);
696 put_child(t, tn, 2*i+1, inode->child[1]);
697
698 tnode_free(inode);
Olof Johansson91b9a272005-08-09 20:24:39 -0700699 continue;
Robert Olsson19baf832005-06-21 12:43:18 -0700700 }
701
Olof Johansson91b9a272005-08-09 20:24:39 -0700702 /* An internal node with more than two children */
Robert Olsson19baf832005-06-21 12:43:18 -0700703
Olof Johansson91b9a272005-08-09 20:24:39 -0700704 /* We will replace this node 'inode' with two new
705 * ones, 'left' and 'right', each with half of the
706 * original children. The two new nodes will have
707 * a position one bit further down the key and this
708 * means that the "significant" part of their keys
709 * (see the discussion near the top of this file)
710 * will differ by one bit, which will be "0" in
711 * left's key and "1" in right's key. Since we are
712 * moving the key position by one step, the bit that
713 * we are moving away from - the bit at position
714 * (inode->pos) - is the one that will differ between
715 * left and right. So... we synthesize that bit in the
716 * two new keys.
717 * The mask 'm' below will be a single "one" bit at
718 * the position (inode->pos)
719 */
Robert Olsson19baf832005-06-21 12:43:18 -0700720
Olof Johansson91b9a272005-08-09 20:24:39 -0700721 /* Use the old key, but set the new significant
722 * bit to zero.
723 */
Robert Olsson19baf832005-06-21 12:43:18 -0700724
Olof Johansson91b9a272005-08-09 20:24:39 -0700725 left = (struct tnode *) tnode_get_child(tn, 2*i);
726 put_child(t, tn, 2*i, NULL);
Robert Olsson19baf832005-06-21 12:43:18 -0700727
Olof Johansson91b9a272005-08-09 20:24:39 -0700728 BUG_ON(!left);
Robert Olsson2f368952005-07-05 15:02:40 -0700729
Olof Johansson91b9a272005-08-09 20:24:39 -0700730 right = (struct tnode *) tnode_get_child(tn, 2*i+1);
731 put_child(t, tn, 2*i+1, NULL);
Robert Olsson2f368952005-07-05 15:02:40 -0700732
Olof Johansson91b9a272005-08-09 20:24:39 -0700733 BUG_ON(!right);
Robert Olsson2f368952005-07-05 15:02:40 -0700734
Olof Johansson91b9a272005-08-09 20:24:39 -0700735 size = tnode_child_length(left);
736 for (j = 0; j < size; j++) {
737 put_child(t, left, j, inode->child[j]);
738 put_child(t, right, j, inode->child[j + size]);
Robert Olsson19baf832005-06-21 12:43:18 -0700739 }
Olof Johansson91b9a272005-08-09 20:24:39 -0700740 put_child(t, tn, 2*i, resize(t, left));
741 put_child(t, tn, 2*i+1, resize(t, right));
742
743 tnode_free(inode);
Robert Olsson19baf832005-06-21 12:43:18 -0700744 }
745 tnode_free(oldtnode);
746 return tn;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700747nomem:
748 {
749 int size = tnode_child_length(tn);
750 int j;
751
752 for(j = 0; j < size; j++)
753 if (tn->child[j])
754 tnode_free((struct tnode *)tn->child[j]);
755
756 tnode_free(tn);
757
758 return ERR_PTR(-ENOMEM);
759 }
Robert Olsson19baf832005-06-21 12:43:18 -0700760}
761
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700762static struct tnode *halve(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700763{
764 struct tnode *oldtnode = tn;
765 struct node *left, *right;
766 int i;
767 int olen = tnode_child_length(tn);
768
Olof Johansson91b9a272005-08-09 20:24:39 -0700769 DBG("In halve\n");
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700770
771 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
Robert Olsson19baf832005-06-21 12:43:18 -0700772
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700773 if (!tn)
774 return ERR_PTR(-ENOMEM);
Robert Olsson2f368952005-07-05 15:02:40 -0700775
776 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700777 * Preallocate and store tnodes before the actual work so we
778 * don't get into an inconsistent state if memory allocation
779 * fails. In case of failure we return the oldnode and halve
Robert Olsson2f368952005-07-05 15:02:40 -0700780 * of tnode is ignored.
781 */
782
Olof Johansson91b9a272005-08-09 20:24:39 -0700783 for (i = 0; i < olen; i += 2) {
Robert Olsson2f368952005-07-05 15:02:40 -0700784 left = tnode_get_child(oldtnode, i);
785 right = tnode_get_child(oldtnode, i+1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700786
Robert Olsson2f368952005-07-05 15:02:40 -0700787 /* Two nonempty children */
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700788 if (left && right) {
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700789 struct tnode *newn;
790
791 newn = tnode_new(left->key, tn->pos + tn->bits, 1);
792
793 if (!newn)
794 goto nomem;
795
796 put_child(t, tn, i/2, (struct node *)newn);
Robert Olsson2f368952005-07-05 15:02:40 -0700797 }
Robert Olsson2f368952005-07-05 15:02:40 -0700798
Robert Olsson2f368952005-07-05 15:02:40 -0700799 }
Robert Olsson19baf832005-06-21 12:43:18 -0700800
Olof Johansson91b9a272005-08-09 20:24:39 -0700801 for (i = 0; i < olen; i += 2) {
802 struct tnode *newBinNode;
803
Robert Olsson19baf832005-06-21 12:43:18 -0700804 left = tnode_get_child(oldtnode, i);
805 right = tnode_get_child(oldtnode, i+1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700806
Robert Olsson19baf832005-06-21 12:43:18 -0700807 /* At least one of the children is empty */
808 if (left == NULL) {
809 if (right == NULL) /* Both are empty */
810 continue;
811 put_child(t, tn, i/2, right);
Olof Johansson91b9a272005-08-09 20:24:39 -0700812 continue;
813 }
814
815 if (right == NULL) {
Robert Olsson19baf832005-06-21 12:43:18 -0700816 put_child(t, tn, i/2, left);
Olof Johansson91b9a272005-08-09 20:24:39 -0700817 continue;
818 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700819
Robert Olsson19baf832005-06-21 12:43:18 -0700820 /* Two nonempty children */
Olof Johansson91b9a272005-08-09 20:24:39 -0700821 newBinNode = (struct tnode *) tnode_get_child(tn, i/2);
822 put_child(t, tn, i/2, NULL);
Robert Olsson19baf832005-06-21 12:43:18 -0700823
Olof Johansson91b9a272005-08-09 20:24:39 -0700824 BUG_ON(!newBinNode);
Robert Olsson19baf832005-06-21 12:43:18 -0700825
Olof Johansson91b9a272005-08-09 20:24:39 -0700826 put_child(t, newBinNode, 0, left);
827 put_child(t, newBinNode, 1, right);
828 put_child(t, tn, i/2, resize(t, newBinNode));
Robert Olsson19baf832005-06-21 12:43:18 -0700829 }
830 tnode_free(oldtnode);
831 return tn;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700832nomem:
833 {
834 int size = tnode_child_length(tn);
835 int j;
836
837 for(j = 0; j < size; j++)
838 if (tn->child[j])
839 tnode_free((struct tnode *)tn->child[j]);
840
841 tnode_free(tn);
842
843 return ERR_PTR(-ENOMEM);
844 }
Robert Olsson19baf832005-06-21 12:43:18 -0700845}
846
Olof Johansson91b9a272005-08-09 20:24:39 -0700847static void trie_init(struct trie *t)
Robert Olsson19baf832005-06-21 12:43:18 -0700848{
Olof Johansson91b9a272005-08-09 20:24:39 -0700849 if (!t)
850 return;
851
852 t->size = 0;
853 t->trie = NULL;
854 t->revision = 0;
Robert Olsson19baf832005-06-21 12:43:18 -0700855#ifdef CONFIG_IP_FIB_TRIE_STATS
Olof Johansson91b9a272005-08-09 20:24:39 -0700856 memset(&t->stats, 0, sizeof(struct trie_use_stats));
Robert Olsson19baf832005-06-21 12:43:18 -0700857#endif
Robert Olsson19baf832005-06-21 12:43:18 -0700858}
859
860static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen)
861{
862 struct hlist_node *node;
863 struct leaf_info *li;
864
Olof Johansson91b9a272005-08-09 20:24:39 -0700865 hlist_for_each_entry(li, node, head, hlist)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700866 if (li->plen == plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700867 return li;
Olof Johansson91b9a272005-08-09 20:24:39 -0700868
Robert Olsson19baf832005-06-21 12:43:18 -0700869 return NULL;
870}
871
872static inline struct list_head * get_fa_head(struct leaf *l, int plen)
873{
Robert Olsson19baf832005-06-21 12:43:18 -0700874 struct leaf_info *li = find_leaf_info(&l->list, plen);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700875
Olof Johansson91b9a272005-08-09 20:24:39 -0700876 if (!li)
877 return NULL;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700878
Olof Johansson91b9a272005-08-09 20:24:39 -0700879 return &li->falh;
Robert Olsson19baf832005-06-21 12:43:18 -0700880}
881
882static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
883{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700884 struct leaf_info *li = NULL, *last = NULL;
Olof Johansson91b9a272005-08-09 20:24:39 -0700885 struct hlist_node *node;
Robert Olsson19baf832005-06-21 12:43:18 -0700886
887 write_lock_bh(&fib_lock);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700888
Olof Johansson91b9a272005-08-09 20:24:39 -0700889 if (hlist_empty(head)) {
Robert Olsson19baf832005-06-21 12:43:18 -0700890 hlist_add_head(&new->hlist, head);
Olof Johansson91b9a272005-08-09 20:24:39 -0700891 } else {
892 hlist_for_each_entry(li, node, head, hlist) {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700893 if (new->plen > li->plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700894 break;
Olof Johansson91b9a272005-08-09 20:24:39 -0700895
Robert Olsson19baf832005-06-21 12:43:18 -0700896 last = li;
897 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700898 if (last)
Robert Olsson19baf832005-06-21 12:43:18 -0700899 hlist_add_after(&last->hlist, &new->hlist);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700900 else
Robert Olsson19baf832005-06-21 12:43:18 -0700901 hlist_add_before(&new->hlist, &li->hlist);
902 }
903 write_unlock_bh(&fib_lock);
904}
905
906static struct leaf *
907fib_find_node(struct trie *t, u32 key)
908{
909 int pos;
910 struct tnode *tn;
911 struct node *n;
912
913 pos = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700914 n = t->trie;
Robert Olsson19baf832005-06-21 12:43:18 -0700915
916 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
917 tn = (struct tnode *) n;
Olof Johansson91b9a272005-08-09 20:24:39 -0700918
Robert Olsson19baf832005-06-21 12:43:18 -0700919 check_tnode(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -0700920
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700921 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700922 pos = tn->pos + tn->bits;
Robert Olsson19baf832005-06-21 12:43:18 -0700923 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
Olof Johansson91b9a272005-08-09 20:24:39 -0700924 } else
Robert Olsson19baf832005-06-21 12:43:18 -0700925 break;
926 }
927 /* Case we have found a leaf. Compare prefixes */
928
Olof Johansson91b9a272005-08-09 20:24:39 -0700929 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key))
930 return (struct leaf *)n;
931
Robert Olsson19baf832005-06-21 12:43:18 -0700932 return NULL;
933}
934
935static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
936{
Olof Johansson91b9a272005-08-09 20:24:39 -0700937 int i;
Robert Olsson19baf832005-06-21 12:43:18 -0700938 int wasfull;
939 t_key cindex, key;
940 struct tnode *tp = NULL;
941
Olof Johansson91b9a272005-08-09 20:24:39 -0700942 BUG_ON(!tn);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700943
Robert Olsson19baf832005-06-21 12:43:18 -0700944 key = tn->key;
945 i = 0;
946
947 while (tn != NULL && NODE_PARENT(tn) != NULL) {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700948 if (i > 10) {
Robert Olsson19baf832005-06-21 12:43:18 -0700949 printk("Rebalance tn=%p \n", tn);
Olof Johansson91b9a272005-08-09 20:24:39 -0700950 if (tn)
951 printk("tn->parent=%p \n", NODE_PARENT(tn));
952
Robert Olsson19baf832005-06-21 12:43:18 -0700953 printk("Rebalance tp=%p \n", tp);
Olof Johansson91b9a272005-08-09 20:24:39 -0700954 if (tp)
955 printk("tp->parent=%p \n", NODE_PARENT(tp));
Robert Olsson19baf832005-06-21 12:43:18 -0700956 }
957
Olof Johansson91b9a272005-08-09 20:24:39 -0700958 BUG_ON(i > 12); /* Why is this a bug? -ojn */
Robert Olsson19baf832005-06-21 12:43:18 -0700959 i++;
960
961 tp = NODE_PARENT(tn);
962 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
963 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
964 tn = (struct tnode *) resize (t, (struct tnode *)tn);
965 tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull);
Olof Johansson91b9a272005-08-09 20:24:39 -0700966
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700967 if (!NODE_PARENT(tn))
Robert Olsson19baf832005-06-21 12:43:18 -0700968 break;
969
970 tn = NODE_PARENT(tn);
971 }
972 /* Handle last (top) tnode */
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700973 if (IS_TNODE(tn))
Robert Olsson19baf832005-06-21 12:43:18 -0700974 tn = (struct tnode*) resize(t, (struct tnode *)tn);
975
976 return (struct node*) tn;
977}
978
Robert Olssonf835e472005-06-28 15:00:39 -0700979static struct list_head *
980fib_insert_node(struct trie *t, int *err, u32 key, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700981{
982 int pos, newpos;
983 struct tnode *tp = NULL, *tn = NULL;
984 struct node *n;
985 struct leaf *l;
986 int missbit;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700987 struct list_head *fa_head = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -0700988 struct leaf_info *li;
989 t_key cindex;
990
991 pos = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700992 n = t->trie;
Robert Olsson19baf832005-06-21 12:43:18 -0700993
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700994 /* If we point to NULL, stop. Either the tree is empty and we should
995 * just put a new leaf in if, or we have reached an empty child slot,
Robert Olsson19baf832005-06-21 12:43:18 -0700996 * and we should just put our new leaf in that.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700997 * If we point to a T_TNODE, check if it matches our key. Note that
998 * a T_TNODE might be skipping any number of bits - its 'pos' need
Robert Olsson19baf832005-06-21 12:43:18 -0700999 * not be the parent's 'pos'+'bits'!
1000 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001001 * If it does match the current key, get pos/bits from it, extract
Robert Olsson19baf832005-06-21 12:43:18 -07001002 * the index from our key, push the T_TNODE and walk the tree.
1003 *
1004 * If it doesn't, we have to replace it with a new T_TNODE.
1005 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001006 * If we point to a T_LEAF, it might or might not have the same key
1007 * as we do. If it does, just change the value, update the T_LEAF's
1008 * value, and return it.
Robert Olsson19baf832005-06-21 12:43:18 -07001009 * If it doesn't, we need to replace it with a T_TNODE.
1010 */
1011
1012 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
1013 tn = (struct tnode *) n;
Olof Johansson91b9a272005-08-09 20:24:39 -07001014
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001015 check_tnode(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001016
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001017 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
Robert Olsson19baf832005-06-21 12:43:18 -07001018 tp = tn;
Olof Johansson91b9a272005-08-09 20:24:39 -07001019 pos = tn->pos + tn->bits;
Robert Olsson19baf832005-06-21 12:43:18 -07001020 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
1021
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001022 if (n && NODE_PARENT(n) != tn) {
Robert Olsson19baf832005-06-21 12:43:18 -07001023 printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n));
1024 BUG();
1025 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001026 } else
Robert Olsson19baf832005-06-21 12:43:18 -07001027 break;
1028 }
1029
1030 /*
1031 * n ----> NULL, LEAF or TNODE
1032 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001033 * tp is n's (parent) ----> NULL or TNODE
Robert Olsson19baf832005-06-21 12:43:18 -07001034 */
1035
Olof Johansson91b9a272005-08-09 20:24:39 -07001036 BUG_ON(tp && IS_LEAF(tp));
Robert Olsson19baf832005-06-21 12:43:18 -07001037
1038 /* Case 1: n is a leaf. Compare prefixes */
1039
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001040 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
Olof Johansson91b9a272005-08-09 20:24:39 -07001041 struct leaf *l = (struct leaf *) n;
1042
Robert Olsson19baf832005-06-21 12:43:18 -07001043 li = leaf_info_new(plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001044
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001045 if (!li) {
Robert Olssonf835e472005-06-28 15:00:39 -07001046 *err = -ENOMEM;
1047 goto err;
1048 }
Robert Olsson19baf832005-06-21 12:43:18 -07001049
1050 fa_head = &li->falh;
1051 insert_leaf_info(&l->list, li);
1052 goto done;
1053 }
1054 t->size++;
1055 l = leaf_new();
1056
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001057 if (!l) {
Robert Olssonf835e472005-06-28 15:00:39 -07001058 *err = -ENOMEM;
1059 goto err;
1060 }
Robert Olsson19baf832005-06-21 12:43:18 -07001061
1062 l->key = key;
1063 li = leaf_info_new(plen);
1064
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001065 if (!li) {
Robert Olssonf835e472005-06-28 15:00:39 -07001066 tnode_free((struct tnode *) l);
1067 *err = -ENOMEM;
1068 goto err;
1069 }
Robert Olsson19baf832005-06-21 12:43:18 -07001070
1071 fa_head = &li->falh;
1072 insert_leaf_info(&l->list, li);
1073
Robert Olsson19baf832005-06-21 12:43:18 -07001074 if (t->trie && n == NULL) {
Olof Johansson91b9a272005-08-09 20:24:39 -07001075 /* Case 2: n is NULL, and will just insert a new leaf */
Robert Olsson19baf832005-06-21 12:43:18 -07001076
1077 NODE_SET_PARENT(l, tp);
Robert Olsson19baf832005-06-21 12:43:18 -07001078
Olof Johansson91b9a272005-08-09 20:24:39 -07001079 BUG_ON(!tp);
1080
1081 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1082 put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
1083 } else {
1084 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001085 /*
1086 * Add a new tnode here
Robert Olsson19baf832005-06-21 12:43:18 -07001087 * first tnode need some special handling
1088 */
1089
1090 if (tp)
Olof Johansson91b9a272005-08-09 20:24:39 -07001091 pos = tp->pos+tp->bits;
Robert Olsson19baf832005-06-21 12:43:18 -07001092 else
Olof Johansson91b9a272005-08-09 20:24:39 -07001093 pos = 0;
1094
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001095 if (n) {
Robert Olsson19baf832005-06-21 12:43:18 -07001096 newpos = tkey_mismatch(key, pos, n->key);
1097 tn = tnode_new(n->key, newpos, 1);
Olof Johansson91b9a272005-08-09 20:24:39 -07001098 } else {
Robert Olsson19baf832005-06-21 12:43:18 -07001099 newpos = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001100 tn = tnode_new(key, newpos, 1); /* First tnode */
Robert Olsson19baf832005-06-21 12:43:18 -07001101 }
Robert Olsson19baf832005-06-21 12:43:18 -07001102
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001103 if (!tn) {
Robert Olssonf835e472005-06-28 15:00:39 -07001104 free_leaf_info(li);
1105 tnode_free((struct tnode *) l);
1106 *err = -ENOMEM;
1107 goto err;
Olof Johansson91b9a272005-08-09 20:24:39 -07001108 }
1109
Robert Olsson19baf832005-06-21 12:43:18 -07001110 NODE_SET_PARENT(tn, tp);
1111
Olof Johansson91b9a272005-08-09 20:24:39 -07001112 missbit = tkey_extract_bits(key, newpos, 1);
Robert Olsson19baf832005-06-21 12:43:18 -07001113 put_child(t, tn, missbit, (struct node *)l);
1114 put_child(t, tn, 1-missbit, n);
1115
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001116 if (tp) {
Robert Olsson19baf832005-06-21 12:43:18 -07001117 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1118 put_child(t, (struct tnode *)tp, cindex, (struct node *)tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001119 } else {
Robert Olsson19baf832005-06-21 12:43:18 -07001120 t->trie = (struct node*) tn; /* First tnode */
1121 tp = tn;
1122 }
1123 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001124
1125 if (tp && tp->pos + tp->bits > 32)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001126 printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
Robert Olsson19baf832005-06-21 12:43:18 -07001127 tp, tp->pos, tp->bits, key, plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001128
Robert Olsson19baf832005-06-21 12:43:18 -07001129 /* Rebalance the trie */
1130 t->trie = trie_rebalance(t, tp);
Robert Olssonf835e472005-06-28 15:00:39 -07001131done:
1132 t->revision++;
Olof Johansson91b9a272005-08-09 20:24:39 -07001133err:
Robert Olsson19baf832005-06-21 12:43:18 -07001134 return fa_head;
1135}
1136
1137static int
1138fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1139 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
1140{
1141 struct trie *t = (struct trie *) tb->tb_data;
1142 struct fib_alias *fa, *new_fa;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001143 struct list_head *fa_head = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001144 struct fib_info *fi;
1145 int plen = r->rtm_dst_len;
1146 int type = r->rtm_type;
1147 u8 tos = r->rtm_tos;
1148 u32 key, mask;
1149 int err;
1150 struct leaf *l;
1151
1152 if (plen > 32)
1153 return -EINVAL;
1154
1155 key = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001156 if (rta->rta_dst)
Robert Olsson19baf832005-06-21 12:43:18 -07001157 memcpy(&key, rta->rta_dst, 4);
1158
1159 key = ntohl(key);
1160
Olof Johansson91b9a272005-08-09 20:24:39 -07001161 DBG("Insert table=%d %08x/%d\n", tb->tb_id, key, plen);
Robert Olsson19baf832005-06-21 12:43:18 -07001162
Olof Johansson91b9a272005-08-09 20:24:39 -07001163 mask = ntohl(inet_make_mask(plen));
Robert Olsson19baf832005-06-21 12:43:18 -07001164
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001165 if (key & ~mask)
Robert Olsson19baf832005-06-21 12:43:18 -07001166 return -EINVAL;
1167
1168 key = key & mask;
1169
Olof Johansson91b9a272005-08-09 20:24:39 -07001170 fi = fib_create_info(r, rta, nlhdr, &err);
1171
1172 if (!fi)
Robert Olsson19baf832005-06-21 12:43:18 -07001173 goto err;
1174
1175 l = fib_find_node(t, key);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001176 fa = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001177
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001178 if (l) {
Robert Olsson19baf832005-06-21 12:43:18 -07001179 fa_head = get_fa_head(l, plen);
1180 fa = fib_find_alias(fa_head, tos, fi->fib_priority);
1181 }
1182
1183 /* Now fa, if non-NULL, points to the first fib alias
1184 * with the same keys [prefix,tos,priority], if such key already
1185 * exists or to the node before which we will insert new one.
1186 *
1187 * If fa is NULL, we will need to allocate a new one and
1188 * insert to the head of f.
1189 *
1190 * If f is NULL, no fib node matched the destination key
1191 * and we need to allocate a new one of those as well.
1192 */
1193
Olof Johansson91b9a272005-08-09 20:24:39 -07001194 if (fa && fa->fa_info->fib_priority == fi->fib_priority) {
Robert Olsson19baf832005-06-21 12:43:18 -07001195 struct fib_alias *fa_orig;
1196
1197 err = -EEXIST;
1198 if (nlhdr->nlmsg_flags & NLM_F_EXCL)
1199 goto out;
1200
1201 if (nlhdr->nlmsg_flags & NLM_F_REPLACE) {
1202 struct fib_info *fi_drop;
1203 u8 state;
1204
1205 write_lock_bh(&fib_lock);
1206
1207 fi_drop = fa->fa_info;
1208 fa->fa_info = fi;
1209 fa->fa_type = type;
1210 fa->fa_scope = r->rtm_scope;
1211 state = fa->fa_state;
1212 fa->fa_state &= ~FA_S_ACCESSED;
1213
1214 write_unlock_bh(&fib_lock);
1215
1216 fib_release_info(fi_drop);
1217 if (state & FA_S_ACCESSED)
Olof Johansson91b9a272005-08-09 20:24:39 -07001218 rt_cache_flush(-1);
Robert Olsson19baf832005-06-21 12:43:18 -07001219
Olof Johansson91b9a272005-08-09 20:24:39 -07001220 goto succeeded;
Robert Olsson19baf832005-06-21 12:43:18 -07001221 }
1222 /* Error if we find a perfect match which
1223 * uses the same scope, type, and nexthop
1224 * information.
1225 */
1226 fa_orig = fa;
1227 list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) {
1228 if (fa->fa_tos != tos)
1229 break;
1230 if (fa->fa_info->fib_priority != fi->fib_priority)
1231 break;
1232 if (fa->fa_type == type &&
1233 fa->fa_scope == r->rtm_scope &&
1234 fa->fa_info == fi) {
1235 goto out;
1236 }
1237 }
1238 if (!(nlhdr->nlmsg_flags & NLM_F_APPEND))
1239 fa = fa_orig;
1240 }
1241 err = -ENOENT;
Olof Johansson91b9a272005-08-09 20:24:39 -07001242 if (!(nlhdr->nlmsg_flags & NLM_F_CREATE))
Robert Olsson19baf832005-06-21 12:43:18 -07001243 goto out;
1244
1245 err = -ENOBUFS;
1246 new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL);
1247 if (new_fa == NULL)
1248 goto out;
1249
1250 new_fa->fa_info = fi;
1251 new_fa->fa_tos = tos;
1252 new_fa->fa_type = type;
1253 new_fa->fa_scope = r->rtm_scope;
1254 new_fa->fa_state = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001255 /*
1256 * Insert new entry to the list.
1257 */
1258
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001259 if (!fa_head) {
Robert Olssonf835e472005-06-28 15:00:39 -07001260 fa_head = fib_insert_node(t, &err, key, plen);
1261 err = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001262 if (err)
Robert Olssonf835e472005-06-28 15:00:39 -07001263 goto out_free_new_fa;
1264 }
Robert Olsson19baf832005-06-21 12:43:18 -07001265
1266 write_lock_bh(&fib_lock);
1267
Olof Johansson91b9a272005-08-09 20:24:39 -07001268 list_add_tail(&new_fa->fa_list, (fa ? &fa->fa_list : fa_head));
Robert Olsson19baf832005-06-21 12:43:18 -07001269
1270 write_unlock_bh(&fib_lock);
1271
1272 rt_cache_flush(-1);
1273 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req);
1274succeeded:
1275 return 0;
Robert Olssonf835e472005-06-28 15:00:39 -07001276
1277out_free_new_fa:
1278 kmem_cache_free(fn_alias_kmem, new_fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001279out:
1280 fib_release_info(fi);
Olof Johansson91b9a272005-08-09 20:24:39 -07001281err:
Robert Olsson19baf832005-06-21 12:43:18 -07001282 return err;
1283}
1284
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001285static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *plen, const struct flowi *flp,
Patrick McHardy06c74272005-08-23 22:06:09 -07001286 struct fib_result *res)
Robert Olsson19baf832005-06-21 12:43:18 -07001287{
Patrick McHardy06c74272005-08-23 22:06:09 -07001288 int err, i;
Robert Olsson19baf832005-06-21 12:43:18 -07001289 t_key mask;
1290 struct leaf_info *li;
1291 struct hlist_head *hhead = &l->list;
1292 struct hlist_node *node;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001293
Robert Olsson19baf832005-06-21 12:43:18 -07001294 hlist_for_each_entry(li, node, hhead, hlist) {
Robert Olsson19baf832005-06-21 12:43:18 -07001295 i = li->plen;
1296 mask = ntohl(inet_make_mask(i));
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001297 if (l->key != (key & mask))
Robert Olsson19baf832005-06-21 12:43:18 -07001298 continue;
1299
Patrick McHardy06c74272005-08-23 22:06:09 -07001300 if ((err = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) <= 0) {
Robert Olsson19baf832005-06-21 12:43:18 -07001301 *plen = i;
1302#ifdef CONFIG_IP_FIB_TRIE_STATS
1303 t->stats.semantic_match_passed++;
1304#endif
Patrick McHardy06c74272005-08-23 22:06:09 -07001305 return err;
Robert Olsson19baf832005-06-21 12:43:18 -07001306 }
1307#ifdef CONFIG_IP_FIB_TRIE_STATS
1308 t->stats.semantic_match_miss++;
1309#endif
1310 }
Patrick McHardy06c74272005-08-23 22:06:09 -07001311 return 1;
Robert Olsson19baf832005-06-21 12:43:18 -07001312}
1313
1314static int
1315fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1316{
1317 struct trie *t = (struct trie *) tb->tb_data;
1318 int plen, ret = 0;
1319 struct node *n;
1320 struct tnode *pn;
1321 int pos, bits;
Olof Johansson91b9a272005-08-09 20:24:39 -07001322 t_key key = ntohl(flp->fl4_dst);
Robert Olsson19baf832005-06-21 12:43:18 -07001323 int chopped_off;
1324 t_key cindex = 0;
1325 int current_prefix_length = KEYLENGTH;
Olof Johansson91b9a272005-08-09 20:24:39 -07001326 struct tnode *cn;
1327 t_key node_prefix, key_prefix, pref_mismatch;
1328 int mp;
1329
Robert Olsson19baf832005-06-21 12:43:18 -07001330 n = t->trie;
1331
1332 read_lock(&fib_lock);
Olof Johansson91b9a272005-08-09 20:24:39 -07001333
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001334 if (!n)
Robert Olsson19baf832005-06-21 12:43:18 -07001335 goto failed;
1336
1337#ifdef CONFIG_IP_FIB_TRIE_STATS
1338 t->stats.gets++;
1339#endif
1340
1341 /* Just a leaf? */
1342 if (IS_LEAF(n)) {
Patrick McHardy06c74272005-08-23 22:06:09 -07001343 if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
Robert Olsson19baf832005-06-21 12:43:18 -07001344 goto found;
1345 goto failed;
1346 }
1347 pn = (struct tnode *) n;
1348 chopped_off = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001349
Olof Johansson91b9a272005-08-09 20:24:39 -07001350 while (pn) {
Robert Olsson19baf832005-06-21 12:43:18 -07001351 pos = pn->pos;
1352 bits = pn->bits;
1353
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001354 if (!chopped_off)
Robert Olsson19baf832005-06-21 12:43:18 -07001355 cindex = tkey_extract_bits(MASK_PFX(key, current_prefix_length), pos, bits);
1356
1357 n = tnode_get_child(pn, cindex);
1358
1359 if (n == NULL) {
1360#ifdef CONFIG_IP_FIB_TRIE_STATS
1361 t->stats.null_node_hit++;
1362#endif
1363 goto backtrace;
1364 }
1365
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001366 if (IS_LEAF(n)) {
Patrick McHardy06c74272005-08-23 22:06:09 -07001367 if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
Robert Olsson19baf832005-06-21 12:43:18 -07001368 goto found;
Olof Johansson91b9a272005-08-09 20:24:39 -07001369 else
1370 goto backtrace;
1371 }
1372
1373#define HL_OPTIMIZE
1374#ifdef HL_OPTIMIZE
1375 cn = (struct tnode *)n;
1376
1377 /*
1378 * It's a tnode, and we can do some extra checks here if we
1379 * like, to avoid descending into a dead-end branch.
1380 * This tnode is in the parent's child array at index
1381 * key[p_pos..p_pos+p_bits] but potentially with some bits
1382 * chopped off, so in reality the index may be just a
1383 * subprefix, padded with zero at the end.
1384 * We can also take a look at any skipped bits in this
1385 * tnode - everything up to p_pos is supposed to be ok,
1386 * and the non-chopped bits of the index (se previous
1387 * paragraph) are also guaranteed ok, but the rest is
1388 * considered unknown.
1389 *
1390 * The skipped bits are key[pos+bits..cn->pos].
1391 */
1392
1393 /* If current_prefix_length < pos+bits, we are already doing
1394 * actual prefix matching, which means everything from
1395 * pos+(bits-chopped_off) onward must be zero along some
1396 * branch of this subtree - otherwise there is *no* valid
1397 * prefix present. Here we can only check the skipped
1398 * bits. Remember, since we have already indexed into the
1399 * parent's child array, we know that the bits we chopped of
1400 * *are* zero.
1401 */
1402
1403 /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */
1404
1405 if (current_prefix_length < pos+bits) {
1406 if (tkey_extract_bits(cn->key, current_prefix_length,
1407 cn->pos - current_prefix_length) != 0 ||
1408 !(cn->child[0]))
1409 goto backtrace;
1410 }
1411
1412 /*
1413 * If chopped_off=0, the index is fully validated and we
1414 * only need to look at the skipped bits for this, the new,
1415 * tnode. What we actually want to do is to find out if
1416 * these skipped bits match our key perfectly, or if we will
1417 * have to count on finding a matching prefix further down,
1418 * because if we do, we would like to have some way of
1419 * verifying the existence of such a prefix at this point.
1420 */
1421
1422 /* The only thing we can do at this point is to verify that
1423 * any such matching prefix can indeed be a prefix to our
1424 * key, and if the bits in the node we are inspecting that
1425 * do not match our key are not ZERO, this cannot be true.
1426 * Thus, find out where there is a mismatch (before cn->pos)
1427 * and verify that all the mismatching bits are zero in the
1428 * new tnode's key.
1429 */
1430
1431 /* Note: We aren't very concerned about the piece of the key
1432 * that precede pn->pos+pn->bits, since these have already been
1433 * checked. The bits after cn->pos aren't checked since these are
1434 * by definition "unknown" at this point. Thus, what we want to
1435 * see is if we are about to enter the "prefix matching" state,
1436 * and in that case verify that the skipped bits that will prevail
1437 * throughout this subtree are zero, as they have to be if we are
1438 * to find a matching prefix.
1439 */
1440
1441 node_prefix = MASK_PFX(cn->key, cn->pos);
1442 key_prefix = MASK_PFX(key, cn->pos);
1443 pref_mismatch = key_prefix^node_prefix;
1444 mp = 0;
1445
1446 /* In short: If skipped bits in this node do not match the search
1447 * key, enter the "prefix matching" state.directly.
1448 */
1449 if (pref_mismatch) {
1450 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
1451 mp++;
1452 pref_mismatch = pref_mismatch <<1;
1453 }
1454 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
1455
1456 if (key_prefix != 0)
1457 goto backtrace;
1458
1459 if (current_prefix_length >= cn->pos)
1460 current_prefix_length = mp;
1461 }
1462#endif
1463 pn = (struct tnode *)n; /* Descend */
1464 chopped_off = 0;
1465 continue;
1466
Robert Olsson19baf832005-06-21 12:43:18 -07001467backtrace:
1468 chopped_off++;
1469
1470 /* As zero don't change the child key (cindex) */
Olof Johansson91b9a272005-08-09 20:24:39 -07001471 while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1))))
Robert Olsson19baf832005-06-21 12:43:18 -07001472 chopped_off++;
Robert Olsson19baf832005-06-21 12:43:18 -07001473
1474 /* Decrease current_... with bits chopped off */
1475 if (current_prefix_length > pn->pos + pn->bits - chopped_off)
1476 current_prefix_length = pn->pos + pn->bits - chopped_off;
Olof Johansson91b9a272005-08-09 20:24:39 -07001477
Robert Olsson19baf832005-06-21 12:43:18 -07001478 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001479 * Either we do the actual chop off according or if we have
Robert Olsson19baf832005-06-21 12:43:18 -07001480 * chopped off all bits in this tnode walk up to our parent.
1481 */
1482
Olof Johansson91b9a272005-08-09 20:24:39 -07001483 if (chopped_off <= pn->bits) {
Robert Olsson19baf832005-06-21 12:43:18 -07001484 cindex &= ~(1 << (chopped_off-1));
Olof Johansson91b9a272005-08-09 20:24:39 -07001485 } else {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001486 if (NODE_PARENT(pn) == NULL)
Robert Olsson19baf832005-06-21 12:43:18 -07001487 goto failed;
Olof Johansson91b9a272005-08-09 20:24:39 -07001488
Robert Olsson19baf832005-06-21 12:43:18 -07001489 /* Get Child's index */
1490 cindex = tkey_extract_bits(pn->key, NODE_PARENT(pn)->pos, NODE_PARENT(pn)->bits);
1491 pn = NODE_PARENT(pn);
1492 chopped_off = 0;
1493
1494#ifdef CONFIG_IP_FIB_TRIE_STATS
1495 t->stats.backtrack++;
1496#endif
1497 goto backtrace;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001498 }
Robert Olsson19baf832005-06-21 12:43:18 -07001499 }
1500failed:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001501 ret = 1;
Robert Olsson19baf832005-06-21 12:43:18 -07001502found:
1503 read_unlock(&fib_lock);
1504 return ret;
1505}
1506
1507static int trie_leaf_remove(struct trie *t, t_key key)
1508{
1509 t_key cindex;
1510 struct tnode *tp = NULL;
1511 struct node *n = t->trie;
1512 struct leaf *l;
1513
Olof Johansson91b9a272005-08-09 20:24:39 -07001514 DBG("entering trie_leaf_remove(%p)\n", n);
Robert Olsson19baf832005-06-21 12:43:18 -07001515
1516 /* Note that in the case skipped bits, those bits are *not* checked!
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001517 * When we finish this, we will have NULL or a T_LEAF, and the
Robert Olsson19baf832005-06-21 12:43:18 -07001518 * T_LEAF may or may not match our key.
1519 */
1520
Olof Johansson91b9a272005-08-09 20:24:39 -07001521 while (n != NULL && IS_TNODE(n)) {
Robert Olsson19baf832005-06-21 12:43:18 -07001522 struct tnode *tn = (struct tnode *) n;
1523 check_tnode(tn);
1524 n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits));
1525
Olof Johansson91b9a272005-08-09 20:24:39 -07001526 if (n && NODE_PARENT(n) != tn) {
1527 printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n));
1528 BUG();
1529 }
1530 }
Robert Olsson19baf832005-06-21 12:43:18 -07001531 l = (struct leaf *) n;
1532
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001533 if (!n || !tkey_equals(l->key, key))
Robert Olsson19baf832005-06-21 12:43:18 -07001534 return 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001535
1536 /*
1537 * Key found.
1538 * Remove the leaf and rebalance the tree
Robert Olsson19baf832005-06-21 12:43:18 -07001539 */
1540
1541 t->revision++;
1542 t->size--;
1543
1544 tp = NODE_PARENT(n);
1545 tnode_free((struct tnode *) n);
1546
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001547 if (tp) {
Robert Olsson19baf832005-06-21 12:43:18 -07001548 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1549 put_child(t, (struct tnode *)tp, cindex, NULL);
1550 t->trie = trie_rebalance(t, tp);
Olof Johansson91b9a272005-08-09 20:24:39 -07001551 } else
Robert Olsson19baf832005-06-21 12:43:18 -07001552 t->trie = NULL;
1553
1554 return 1;
1555}
1556
1557static int
1558fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
Olof Johansson91b9a272005-08-09 20:24:39 -07001559 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
Robert Olsson19baf832005-06-21 12:43:18 -07001560{
1561 struct trie *t = (struct trie *) tb->tb_data;
1562 u32 key, mask;
1563 int plen = r->rtm_dst_len;
1564 u8 tos = r->rtm_tos;
1565 struct fib_alias *fa, *fa_to_delete;
1566 struct list_head *fa_head;
1567 struct leaf *l;
Olof Johansson91b9a272005-08-09 20:24:39 -07001568 int kill_li = 0;
1569 struct leaf_info *li;
1570
Robert Olsson19baf832005-06-21 12:43:18 -07001571
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001572 if (plen > 32)
Robert Olsson19baf832005-06-21 12:43:18 -07001573 return -EINVAL;
1574
1575 key = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001576 if (rta->rta_dst)
Robert Olsson19baf832005-06-21 12:43:18 -07001577 memcpy(&key, rta->rta_dst, 4);
1578
1579 key = ntohl(key);
Olof Johansson91b9a272005-08-09 20:24:39 -07001580 mask = ntohl(inet_make_mask(plen));
Robert Olsson19baf832005-06-21 12:43:18 -07001581
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001582 if (key & ~mask)
Robert Olsson19baf832005-06-21 12:43:18 -07001583 return -EINVAL;
1584
1585 key = key & mask;
1586 l = fib_find_node(t, key);
1587
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001588 if (!l)
Robert Olsson19baf832005-06-21 12:43:18 -07001589 return -ESRCH;
1590
1591 fa_head = get_fa_head(l, plen);
1592 fa = fib_find_alias(fa_head, tos, 0);
1593
1594 if (!fa)
1595 return -ESRCH;
1596
Olof Johansson91b9a272005-08-09 20:24:39 -07001597 DBG("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
Robert Olsson19baf832005-06-21 12:43:18 -07001598
1599 fa_to_delete = NULL;
1600 fa_head = fa->fa_list.prev;
1601 list_for_each_entry(fa, fa_head, fa_list) {
1602 struct fib_info *fi = fa->fa_info;
1603
1604 if (fa->fa_tos != tos)
1605 break;
1606
1607 if ((!r->rtm_type ||
1608 fa->fa_type == r->rtm_type) &&
1609 (r->rtm_scope == RT_SCOPE_NOWHERE ||
1610 fa->fa_scope == r->rtm_scope) &&
1611 (!r->rtm_protocol ||
1612 fi->fib_protocol == r->rtm_protocol) &&
1613 fib_nh_match(r, nlhdr, rta, fi) == 0) {
1614 fa_to_delete = fa;
1615 break;
1616 }
1617 }
1618
Olof Johansson91b9a272005-08-09 20:24:39 -07001619 if (!fa_to_delete)
1620 return -ESRCH;
Robert Olsson19baf832005-06-21 12:43:18 -07001621
Olof Johansson91b9a272005-08-09 20:24:39 -07001622 fa = fa_to_delete;
1623 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req);
Robert Olsson19baf832005-06-21 12:43:18 -07001624
Olof Johansson91b9a272005-08-09 20:24:39 -07001625 l = fib_find_node(t, key);
1626 li = find_leaf_info(&l->list, plen);
Robert Olsson19baf832005-06-21 12:43:18 -07001627
Olof Johansson91b9a272005-08-09 20:24:39 -07001628 write_lock_bh(&fib_lock);
Robert Olsson19baf832005-06-21 12:43:18 -07001629
Olof Johansson91b9a272005-08-09 20:24:39 -07001630 list_del(&fa->fa_list);
Robert Olsson19baf832005-06-21 12:43:18 -07001631
Olof Johansson91b9a272005-08-09 20:24:39 -07001632 if (list_empty(fa_head)) {
1633 hlist_del(&li->hlist);
1634 kill_li = 1;
Robert Olsson19baf832005-06-21 12:43:18 -07001635 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001636 write_unlock_bh(&fib_lock);
1637
1638 if (kill_li)
1639 free_leaf_info(li);
1640
1641 if (hlist_empty(&l->list))
1642 trie_leaf_remove(t, key);
1643
1644 if (fa->fa_state & FA_S_ACCESSED)
1645 rt_cache_flush(-1);
1646
1647 fn_free_alias(fa);
1648 return 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001649}
1650
1651static int trie_flush_list(struct trie *t, struct list_head *head)
1652{
1653 struct fib_alias *fa, *fa_node;
1654 int found = 0;
1655
1656 list_for_each_entry_safe(fa, fa_node, head, fa_list) {
1657 struct fib_info *fi = fa->fa_info;
Robert Olsson19baf832005-06-21 12:43:18 -07001658
Olof Johansson91b9a272005-08-09 20:24:39 -07001659 if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001660 write_lock_bh(&fib_lock);
Robert Olsson19baf832005-06-21 12:43:18 -07001661 list_del(&fa->fa_list);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001662 write_unlock_bh(&fib_lock);
Robert Olsson19baf832005-06-21 12:43:18 -07001663
1664 fn_free_alias(fa);
1665 found++;
1666 }
1667 }
1668 return found;
1669}
1670
1671static int trie_flush_leaf(struct trie *t, struct leaf *l)
1672{
1673 int found = 0;
1674 struct hlist_head *lih = &l->list;
1675 struct hlist_node *node, *tmp;
1676 struct leaf_info *li = NULL;
1677
1678 hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
Robert Olsson19baf832005-06-21 12:43:18 -07001679 found += trie_flush_list(t, &li->falh);
1680
1681 if (list_empty(&li->falh)) {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001682 write_lock_bh(&fib_lock);
Robert Olsson19baf832005-06-21 12:43:18 -07001683 hlist_del(&li->hlist);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001684 write_unlock_bh(&fib_lock);
Robert Olsson19baf832005-06-21 12:43:18 -07001685
1686 free_leaf_info(li);
1687 }
1688 }
1689 return found;
1690}
1691
1692static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
1693{
1694 struct node *c = (struct node *) thisleaf;
1695 struct tnode *p;
1696 int idx;
1697
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001698 if (c == NULL) {
1699 if (t->trie == NULL)
Robert Olsson19baf832005-06-21 12:43:18 -07001700 return NULL;
1701
1702 if (IS_LEAF(t->trie)) /* trie w. just a leaf */
1703 return (struct leaf *) t->trie;
1704
1705 p = (struct tnode*) t->trie; /* Start */
Olof Johansson91b9a272005-08-09 20:24:39 -07001706 } else
Robert Olsson19baf832005-06-21 12:43:18 -07001707 p = (struct tnode *) NODE_PARENT(c);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001708
Robert Olsson19baf832005-06-21 12:43:18 -07001709 while (p) {
1710 int pos, last;
1711
1712 /* Find the next child of the parent */
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001713 if (c)
1714 pos = 1 + tkey_extract_bits(c->key, p->pos, p->bits);
1715 else
Robert Olsson19baf832005-06-21 12:43:18 -07001716 pos = 0;
1717
1718 last = 1 << p->bits;
Olof Johansson91b9a272005-08-09 20:24:39 -07001719 for (idx = pos; idx < last ; idx++) {
1720 if (!p->child[idx])
1721 continue;
Robert Olsson19baf832005-06-21 12:43:18 -07001722
Olof Johansson91b9a272005-08-09 20:24:39 -07001723 /* Decend if tnode */
1724 while (IS_TNODE(p->child[idx])) {
1725 p = (struct tnode*) p->child[idx];
1726 idx = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001727
Olof Johansson91b9a272005-08-09 20:24:39 -07001728 /* Rightmost non-NULL branch */
1729 if (p && IS_TNODE(p))
1730 while (p->child[idx] == NULL && idx < (1 << p->bits)) idx++;
Robert Olsson19baf832005-06-21 12:43:18 -07001731
Olof Johansson91b9a272005-08-09 20:24:39 -07001732 /* Done with this tnode? */
1733 if (idx >= (1 << p->bits) || p->child[idx] == NULL)
1734 goto up;
Robert Olsson19baf832005-06-21 12:43:18 -07001735 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001736 return (struct leaf*) p->child[idx];
Robert Olsson19baf832005-06-21 12:43:18 -07001737 }
1738up:
1739 /* No more children go up one step */
Olof Johansson91b9a272005-08-09 20:24:39 -07001740 c = (struct node *) p;
Robert Olsson19baf832005-06-21 12:43:18 -07001741 p = (struct tnode *) NODE_PARENT(p);
1742 }
1743 return NULL; /* Ready. Root of trie */
1744}
1745
1746static int fn_trie_flush(struct fib_table *tb)
1747{
1748 struct trie *t = (struct trie *) tb->tb_data;
1749 struct leaf *ll = NULL, *l = NULL;
1750 int found = 0, h;
1751
1752 t->revision++;
1753
Olof Johansson91b9a272005-08-09 20:24:39 -07001754 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
Robert Olsson19baf832005-06-21 12:43:18 -07001755 found += trie_flush_leaf(t, l);
1756
1757 if (ll && hlist_empty(&ll->list))
1758 trie_leaf_remove(t, ll->key);
1759 ll = l;
1760 }
1761
1762 if (ll && hlist_empty(&ll->list))
1763 trie_leaf_remove(t, ll->key);
1764
Olof Johansson91b9a272005-08-09 20:24:39 -07001765 DBG("trie_flush found=%d\n", found);
Robert Olsson19baf832005-06-21 12:43:18 -07001766 return found;
1767}
1768
Olof Johansson91b9a272005-08-09 20:24:39 -07001769static int trie_last_dflt = -1;
Robert Olsson19baf832005-06-21 12:43:18 -07001770
1771static void
1772fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1773{
1774 struct trie *t = (struct trie *) tb->tb_data;
1775 int order, last_idx;
1776 struct fib_info *fi = NULL;
1777 struct fib_info *last_resort;
1778 struct fib_alias *fa = NULL;
1779 struct list_head *fa_head;
1780 struct leaf *l;
1781
1782 last_idx = -1;
1783 last_resort = NULL;
1784 order = -1;
1785
1786 read_lock(&fib_lock);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001787
Robert Olsson19baf832005-06-21 12:43:18 -07001788 l = fib_find_node(t, 0);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001789 if (!l)
Robert Olsson19baf832005-06-21 12:43:18 -07001790 goto out;
1791
1792 fa_head = get_fa_head(l, 0);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001793 if (!fa_head)
Robert Olsson19baf832005-06-21 12:43:18 -07001794 goto out;
1795
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001796 if (list_empty(fa_head))
Robert Olsson19baf832005-06-21 12:43:18 -07001797 goto out;
1798
1799 list_for_each_entry(fa, fa_head, fa_list) {
1800 struct fib_info *next_fi = fa->fa_info;
Olof Johansson91b9a272005-08-09 20:24:39 -07001801
Robert Olsson19baf832005-06-21 12:43:18 -07001802 if (fa->fa_scope != res->scope ||
1803 fa->fa_type != RTN_UNICAST)
1804 continue;
Olof Johansson91b9a272005-08-09 20:24:39 -07001805
Robert Olsson19baf832005-06-21 12:43:18 -07001806 if (next_fi->fib_priority > res->fi->fib_priority)
1807 break;
1808 if (!next_fi->fib_nh[0].nh_gw ||
1809 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1810 continue;
1811 fa->fa_state |= FA_S_ACCESSED;
Olof Johansson91b9a272005-08-09 20:24:39 -07001812
Robert Olsson19baf832005-06-21 12:43:18 -07001813 if (fi == NULL) {
1814 if (next_fi != res->fi)
1815 break;
1816 } else if (!fib_detect_death(fi, order, &last_resort,
1817 &last_idx, &trie_last_dflt)) {
1818 if (res->fi)
1819 fib_info_put(res->fi);
1820 res->fi = fi;
1821 atomic_inc(&fi->fib_clntref);
1822 trie_last_dflt = order;
1823 goto out;
1824 }
1825 fi = next_fi;
1826 order++;
1827 }
1828 if (order <= 0 || fi == NULL) {
1829 trie_last_dflt = -1;
1830 goto out;
1831 }
1832
1833 if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) {
1834 if (res->fi)
1835 fib_info_put(res->fi);
1836 res->fi = fi;
1837 atomic_inc(&fi->fib_clntref);
1838 trie_last_dflt = order;
1839 goto out;
1840 }
1841 if (last_idx >= 0) {
1842 if (res->fi)
1843 fib_info_put(res->fi);
1844 res->fi = last_resort;
1845 if (last_resort)
1846 atomic_inc(&last_resort->fib_clntref);
1847 }
1848 trie_last_dflt = last_idx;
1849 out:;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001850 read_unlock(&fib_lock);
Robert Olsson19baf832005-06-21 12:43:18 -07001851}
1852
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001853static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb,
Robert Olsson19baf832005-06-21 12:43:18 -07001854 struct sk_buff *skb, struct netlink_callback *cb)
1855{
1856 int i, s_i;
1857 struct fib_alias *fa;
1858
Olof Johansson91b9a272005-08-09 20:24:39 -07001859 u32 xkey = htonl(key);
Robert Olsson19baf832005-06-21 12:43:18 -07001860
Olof Johansson91b9a272005-08-09 20:24:39 -07001861 s_i = cb->args[3];
Robert Olsson19baf832005-06-21 12:43:18 -07001862 i = 0;
1863
1864 list_for_each_entry(fa, fah, fa_list) {
1865 if (i < s_i) {
1866 i++;
1867 continue;
1868 }
1869 if (fa->fa_info->fib_nh == NULL) {
1870 printk("Trie error _fib_nh=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
1871 i++;
1872 continue;
1873 }
1874 if (fa->fa_info == NULL) {
1875 printk("Trie error fa_info=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
1876 i++;
1877 continue;
1878 }
1879
1880 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
1881 cb->nlh->nlmsg_seq,
1882 RTM_NEWROUTE,
1883 tb->tb_id,
1884 fa->fa_type,
1885 fa->fa_scope,
1886 &xkey,
1887 plen,
1888 fa->fa_tos,
David S. Miller90f66912005-06-21 14:43:28 -07001889 fa->fa_info, 0) < 0) {
Robert Olsson19baf832005-06-21 12:43:18 -07001890 cb->args[3] = i;
1891 return -1;
Olof Johansson91b9a272005-08-09 20:24:39 -07001892 }
Robert Olsson19baf832005-06-21 12:43:18 -07001893 i++;
1894 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001895 cb->args[3] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001896 return skb->len;
1897}
1898
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001899static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, struct sk_buff *skb,
Robert Olsson19baf832005-06-21 12:43:18 -07001900 struct netlink_callback *cb)
1901{
1902 int h, s_h;
1903 struct list_head *fa_head;
1904 struct leaf *l = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001905
Olof Johansson91b9a272005-08-09 20:24:39 -07001906 s_h = cb->args[2];
Robert Olsson19baf832005-06-21 12:43:18 -07001907
Olof Johansson91b9a272005-08-09 20:24:39 -07001908 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
Robert Olsson19baf832005-06-21 12:43:18 -07001909 if (h < s_h)
1910 continue;
1911 if (h > s_h)
1912 memset(&cb->args[3], 0,
1913 sizeof(cb->args) - 3*sizeof(cb->args[0]));
1914
1915 fa_head = get_fa_head(l, plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001916
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001917 if (!fa_head)
Robert Olsson19baf832005-06-21 12:43:18 -07001918 continue;
1919
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001920 if (list_empty(fa_head))
Robert Olsson19baf832005-06-21 12:43:18 -07001921 continue;
1922
1923 if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
Olof Johansson91b9a272005-08-09 20:24:39 -07001924 cb->args[2] = h;
Robert Olsson19baf832005-06-21 12:43:18 -07001925 return -1;
1926 }
1927 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001928 cb->args[2] = h;
Robert Olsson19baf832005-06-21 12:43:18 -07001929 return skb->len;
1930}
1931
1932static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
1933{
1934 int m, s_m;
1935 struct trie *t = (struct trie *) tb->tb_data;
1936
1937 s_m = cb->args[1];
1938
1939 read_lock(&fib_lock);
Olof Johansson91b9a272005-08-09 20:24:39 -07001940 for (m = 0; m <= 32; m++) {
Robert Olsson19baf832005-06-21 12:43:18 -07001941 if (m < s_m)
1942 continue;
1943 if (m > s_m)
1944 memset(&cb->args[2], 0,
Olof Johansson91b9a272005-08-09 20:24:39 -07001945 sizeof(cb->args) - 2*sizeof(cb->args[0]));
Robert Olsson19baf832005-06-21 12:43:18 -07001946
1947 if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) {
1948 cb->args[1] = m;
1949 goto out;
1950 }
1951 }
1952 read_unlock(&fib_lock);
1953 cb->args[1] = m;
1954 return skb->len;
Olof Johansson91b9a272005-08-09 20:24:39 -07001955out:
Robert Olsson19baf832005-06-21 12:43:18 -07001956 read_unlock(&fib_lock);
1957 return -1;
1958}
1959
1960/* Fix more generic FIB names for init later */
1961
1962#ifdef CONFIG_IP_MULTIPLE_TABLES
1963struct fib_table * fib_hash_init(int id)
1964#else
1965struct fib_table * __init fib_hash_init(int id)
1966#endif
1967{
1968 struct fib_table *tb;
1969 struct trie *t;
1970
1971 if (fn_alias_kmem == NULL)
1972 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
1973 sizeof(struct fib_alias),
1974 0, SLAB_HWCACHE_ALIGN,
1975 NULL, NULL);
1976
1977 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
1978 GFP_KERNEL);
1979 if (tb == NULL)
1980 return NULL;
1981
1982 tb->tb_id = id;
1983 tb->tb_lookup = fn_trie_lookup;
1984 tb->tb_insert = fn_trie_insert;
1985 tb->tb_delete = fn_trie_delete;
1986 tb->tb_flush = fn_trie_flush;
1987 tb->tb_select_default = fn_trie_select_default;
1988 tb->tb_dump = fn_trie_dump;
1989 memset(tb->tb_data, 0, sizeof(struct trie));
1990
1991 t = (struct trie *) tb->tb_data;
1992
1993 trie_init(t);
1994
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001995 if (id == RT_TABLE_LOCAL)
Olof Johansson91b9a272005-08-09 20:24:39 -07001996 trie_local = t;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001997 else if (id == RT_TABLE_MAIN)
Olof Johansson91b9a272005-08-09 20:24:39 -07001998 trie_main = t;
Robert Olsson19baf832005-06-21 12:43:18 -07001999
2000 if (id == RT_TABLE_LOCAL)
2001 printk("IPv4 FIB: Using LC-trie version %s\n", VERSION);
2002
2003 return tb;
2004}
2005
2006/* Trie dump functions */
2007
2008static void putspace_seq(struct seq_file *seq, int n)
2009{
Olof Johansson91b9a272005-08-09 20:24:39 -07002010 while (n--)
2011 seq_printf(seq, " ");
Robert Olsson19baf832005-06-21 12:43:18 -07002012}
2013
2014static void printbin_seq(struct seq_file *seq, unsigned int v, int bits)
2015{
2016 while (bits--)
2017 seq_printf(seq, "%s", (v & (1<<bits))?"1":"0");
2018}
2019
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002020static void printnode_seq(struct seq_file *seq, int indent, struct node *n,
Robert Olsson19baf832005-06-21 12:43:18 -07002021 int pend, int cindex, int bits)
2022{
2023 putspace_seq(seq, indent);
2024 if (IS_LEAF(n))
2025 seq_printf(seq, "|");
2026 else
2027 seq_printf(seq, "+");
2028 if (bits) {
2029 seq_printf(seq, "%d/", cindex);
2030 printbin_seq(seq, cindex, bits);
2031 seq_printf(seq, ": ");
Olof Johansson91b9a272005-08-09 20:24:39 -07002032 } else
Robert Olsson19baf832005-06-21 12:43:18 -07002033 seq_printf(seq, "<root>: ");
2034 seq_printf(seq, "%s:%p ", IS_LEAF(n)?"Leaf":"Internal node", n);
2035
Robert Olsson19baf832005-06-21 12:43:18 -07002036 if (IS_LEAF(n)) {
Olof Johansson91b9a272005-08-09 20:24:39 -07002037 struct leaf *l = (struct leaf *)n;
Robert Olsson19baf832005-06-21 12:43:18 -07002038 struct fib_alias *fa;
2039 int i;
Olof Johansson91b9a272005-08-09 20:24:39 -07002040
2041 seq_printf(seq, "key=%d.%d.%d.%d\n",
2042 n->key >> 24, (n->key >> 16) % 256, (n->key >> 8) % 256, n->key % 256);
2043
2044 for (i = 32; i >= 0; i--)
2045 if (find_leaf_info(&l->list, i)) {
Robert Olsson19baf832005-06-21 12:43:18 -07002046 struct list_head *fa_head = get_fa_head(l, i);
Olof Johansson91b9a272005-08-09 20:24:39 -07002047
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002048 if (!fa_head)
Robert Olsson19baf832005-06-21 12:43:18 -07002049 continue;
2050
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002051 if (list_empty(fa_head))
Robert Olsson19baf832005-06-21 12:43:18 -07002052 continue;
2053
2054 putspace_seq(seq, indent+2);
2055 seq_printf(seq, "{/%d...dumping}\n", i);
2056
Robert Olsson19baf832005-06-21 12:43:18 -07002057 list_for_each_entry(fa, fa_head, fa_list) {
2058 putspace_seq(seq, indent+2);
Robert Olsson19baf832005-06-21 12:43:18 -07002059 if (fa->fa_info == NULL) {
2060 seq_printf(seq, "Error fa_info=NULL\n");
2061 continue;
2062 }
Olof Johansson91b9a272005-08-09 20:24:39 -07002063 if (fa->fa_info->fib_nh == NULL) {
2064 seq_printf(seq, "Error _fib_nh=NULL\n");
2065 continue;
2066 }
Robert Olsson19baf832005-06-21 12:43:18 -07002067
2068 seq_printf(seq, "{type=%d scope=%d TOS=%d}\n",
2069 fa->fa_type,
2070 fa->fa_scope,
2071 fa->fa_tos);
2072 }
2073 }
Olof Johansson91b9a272005-08-09 20:24:39 -07002074 } else {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002075 struct tnode *tn = (struct tnode *)n;
Olof Johansson91b9a272005-08-09 20:24:39 -07002076 int plen = ((struct tnode *)n)->pos;
2077 t_key prf = MASK_PFX(n->key, plen);
2078
2079 seq_printf(seq, "key=%d.%d.%d.%d/%d\n",
2080 prf >> 24, (prf >> 16) % 256, (prf >> 8) % 256, prf % 256, plen);
2081
Robert Olsson19baf832005-06-21 12:43:18 -07002082 putspace_seq(seq, indent); seq_printf(seq, "| ");
Olof Johansson91b9a272005-08-09 20:24:39 -07002083 seq_printf(seq, "{key prefix=%08x/", tn->key & TKEY_GET_MASK(0, tn->pos));
Robert Olsson19baf832005-06-21 12:43:18 -07002084 printbin_seq(seq, tkey_extract_bits(tn->key, 0, tn->pos), tn->pos);
2085 seq_printf(seq, "}\n");
2086 putspace_seq(seq, indent); seq_printf(seq, "| ");
2087 seq_printf(seq, "{pos=%d", tn->pos);
2088 seq_printf(seq, " (skip=%d bits)", tn->pos - pend);
2089 seq_printf(seq, " bits=%d (%u children)}\n", tn->bits, (1 << tn->bits));
2090 putspace_seq(seq, indent); seq_printf(seq, "| ");
2091 seq_printf(seq, "{empty=%d full=%d}\n", tn->empty_children, tn->full_children);
2092 }
2093}
2094
2095static void trie_dump_seq(struct seq_file *seq, struct trie *t)
2096{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002097 struct node *n = t->trie;
Olof Johansson91b9a272005-08-09 20:24:39 -07002098 int cindex = 0;
2099 int indent = 1;
2100 int pend = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07002101 int depth = 0;
Olof Johansson91b9a272005-08-09 20:24:39 -07002102 struct tnode *tn;
Robert Olsson19baf832005-06-21 12:43:18 -07002103
2104 read_lock(&fib_lock);
2105
2106 seq_printf(seq, "------ trie_dump of t=%p ------\n", t);
Robert Olsson19baf832005-06-21 12:43:18 -07002107
Olof Johansson91b9a272005-08-09 20:24:39 -07002108 if (!n) {
2109 seq_printf(seq, "------ trie is empty\n");
Robert Olsson19baf832005-06-21 12:43:18 -07002110
Olof Johansson91b9a272005-08-09 20:24:39 -07002111 read_unlock(&fib_lock);
2112 return;
Robert Olsson19baf832005-06-21 12:43:18 -07002113 }
Olof Johansson91b9a272005-08-09 20:24:39 -07002114
2115 printnode_seq(seq, indent, n, pend, cindex, 0);
2116
2117 if (!IS_TNODE(n)) {
2118 read_unlock(&fib_lock);
2119 return;
2120 }
2121
2122 tn = (struct tnode *)n;
2123 pend = tn->pos+tn->bits;
2124 putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
2125 indent += 3;
2126 depth++;
2127
2128 while (tn && cindex < (1 << tn->bits)) {
2129 if (tn->child[cindex]) {
2130 /* Got a child */
2131
2132 printnode_seq(seq, indent, tn->child[cindex], pend, cindex, tn->bits);
2133 if (IS_LEAF(tn->child[cindex])) {
2134 cindex++;
2135 } else {
2136 /*
2137 * New tnode. Decend one level
2138 */
2139
2140 depth++;
2141 tn = (struct tnode *)tn->child[cindex];
2142 pend = tn->pos + tn->bits;
2143 putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
2144 indent += 3;
2145 cindex = 0;
2146 }
2147 } else
2148 cindex++;
2149
2150 /*
2151 * Test if we are done
2152 */
2153
2154 while (cindex >= (1 << tn->bits)) {
2155 /*
2156 * Move upwards and test for root
2157 * pop off all traversed nodes
2158 */
2159
2160 if (NODE_PARENT(tn) == NULL) {
2161 tn = NULL;
2162 break;
2163 }
2164
2165 cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
2166 cindex++;
2167 tn = NODE_PARENT(tn);
2168 pend = tn->pos + tn->bits;
2169 indent -= 3;
2170 depth--;
2171 }
2172 }
Robert Olsson19baf832005-06-21 12:43:18 -07002173
2174 read_unlock(&fib_lock);
2175}
2176
2177static struct trie_stat *trie_stat_new(void)
2178{
Olof Johansson91b9a272005-08-09 20:24:39 -07002179 struct trie_stat *s;
Robert Olsson19baf832005-06-21 12:43:18 -07002180 int i;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002181
Olof Johansson91b9a272005-08-09 20:24:39 -07002182 s = kmalloc(sizeof(struct trie_stat), GFP_KERNEL);
2183 if (!s)
2184 return NULL;
2185
2186 s->totdepth = 0;
2187 s->maxdepth = 0;
2188 s->tnodes = 0;
2189 s->leaves = 0;
2190 s->nullpointers = 0;
2191
2192 for (i = 0; i < MAX_CHILDS; i++)
2193 s->nodesizes[i] = 0;
2194
Robert Olsson19baf832005-06-21 12:43:18 -07002195 return s;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002196}
Robert Olsson19baf832005-06-21 12:43:18 -07002197
2198static struct trie_stat *trie_collect_stats(struct trie *t)
2199{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002200 struct node *n = t->trie;
Robert Olsson19baf832005-06-21 12:43:18 -07002201 struct trie_stat *s = trie_stat_new();
2202 int cindex = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07002203 int pend = 0;
2204 int depth = 0;
2205
Olof Johansson91b9a272005-08-09 20:24:39 -07002206 if (!s)
2207 return NULL;
2208 if (!n)
2209 return s;
Robert Olsson19baf832005-06-21 12:43:18 -07002210
Olof Johansson91b9a272005-08-09 20:24:39 -07002211 read_lock(&fib_lock);
Robert Olsson19baf832005-06-21 12:43:18 -07002212
Olof Johansson91b9a272005-08-09 20:24:39 -07002213 if (IS_TNODE(n)) {
2214 struct tnode *tn = (struct tnode *)n;
2215 pend = tn->pos+tn->bits;
2216 s->nodesizes[tn->bits]++;
2217 depth++;
Robert Olsson19baf832005-06-21 12:43:18 -07002218
Olof Johansson91b9a272005-08-09 20:24:39 -07002219 while (tn && cindex < (1 << tn->bits)) {
2220 if (tn->child[cindex]) {
2221 /* Got a child */
Robert Olsson19baf832005-06-21 12:43:18 -07002222
Olof Johansson91b9a272005-08-09 20:24:39 -07002223 if (IS_LEAF(tn->child[cindex])) {
2224 cindex++;
2225
2226 /* stats */
2227 if (depth > s->maxdepth)
2228 s->maxdepth = depth;
2229 s->totdepth += depth;
2230 s->leaves++;
2231 } else {
Robert Olsson19baf832005-06-21 12:43:18 -07002232 /*
Olof Johansson91b9a272005-08-09 20:24:39 -07002233 * New tnode. Decend one level
Robert Olsson19baf832005-06-21 12:43:18 -07002234 */
Robert Olsson19baf832005-06-21 12:43:18 -07002235
Olof Johansson91b9a272005-08-09 20:24:39 -07002236 s->tnodes++;
2237 s->nodesizes[tn->bits]++;
2238 depth++;
Robert Olsson19baf832005-06-21 12:43:18 -07002239
Olof Johansson91b9a272005-08-09 20:24:39 -07002240 n = tn->child[cindex];
2241 tn = (struct tnode *)n;
2242 pend = tn->pos+tn->bits;
2243
2244 cindex = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07002245 }
Olof Johansson91b9a272005-08-09 20:24:39 -07002246 } else {
2247 cindex++;
2248 s->nullpointers++;
Robert Olsson19baf832005-06-21 12:43:18 -07002249 }
Olof Johansson91b9a272005-08-09 20:24:39 -07002250
2251 /*
2252 * Test if we are done
2253 */
2254
2255 while (cindex >= (1 << tn->bits)) {
2256 /*
2257 * Move upwards and test for root
2258 * pop off all traversed nodes
2259 */
2260
2261 if (NODE_PARENT(tn) == NULL) {
2262 tn = NULL;
2263 n = NULL;
2264 break;
2265 }
2266
2267 cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
2268 tn = NODE_PARENT(tn);
2269 cindex++;
2270 n = (struct node *)tn;
2271 pend = tn->pos+tn->bits;
2272 depth--;
2273 }
Robert Olsson19baf832005-06-21 12:43:18 -07002274 }
2275 }
2276
Olof Johansson91b9a272005-08-09 20:24:39 -07002277 read_unlock(&fib_lock);
Robert Olsson19baf832005-06-21 12:43:18 -07002278 return s;
2279}
2280
2281#ifdef CONFIG_PROC_FS
2282
2283static struct fib_alias *fib_triestat_get_first(struct seq_file *seq)
2284{
2285 return NULL;
2286}
2287
2288static struct fib_alias *fib_triestat_get_next(struct seq_file *seq)
2289{
2290 return NULL;
2291}
2292
2293static void *fib_triestat_seq_start(struct seq_file *seq, loff_t *pos)
2294{
Olof Johansson91b9a272005-08-09 20:24:39 -07002295 if (!ip_fib_main_table)
2296 return NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07002297
Olof Johansson91b9a272005-08-09 20:24:39 -07002298 if (*pos)
2299 return fib_triestat_get_next(seq);
2300 else
2301 return SEQ_START_TOKEN;
Robert Olsson19baf832005-06-21 12:43:18 -07002302}
2303
2304static void *fib_triestat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2305{
2306 ++*pos;
Olof Johansson91b9a272005-08-09 20:24:39 -07002307 if (v == SEQ_START_TOKEN)
2308 return fib_triestat_get_first(seq);
2309 else
2310 return fib_triestat_get_next(seq);
Robert Olsson19baf832005-06-21 12:43:18 -07002311}
2312
2313static void fib_triestat_seq_stop(struct seq_file *seq, void *v)
2314{
2315
2316}
2317
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002318/*
Robert Olsson19baf832005-06-21 12:43:18 -07002319 * This outputs /proc/net/fib_triestats
2320 *
2321 * It always works in backward compatibility mode.
2322 * The format of the file is not supposed to be changed.
2323 */
2324
2325static void collect_and_show(struct trie *t, struct seq_file *seq)
2326{
2327 int bytes = 0; /* How many bytes are used, a ref is 4 bytes */
2328 int i, max, pointers;
Olof Johansson91b9a272005-08-09 20:24:39 -07002329 struct trie_stat *stat;
Robert Olsson19baf832005-06-21 12:43:18 -07002330 int avdepth;
2331
2332 stat = trie_collect_stats(t);
2333
Olof Johansson91b9a272005-08-09 20:24:39 -07002334 bytes = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07002335 seq_printf(seq, "trie=%p\n", t);
2336
2337 if (stat) {
2338 if (stat->leaves)
Olof Johansson91b9a272005-08-09 20:24:39 -07002339 avdepth = stat->totdepth*100 / stat->leaves;
Robert Olsson19baf832005-06-21 12:43:18 -07002340 else
Olof Johansson91b9a272005-08-09 20:24:39 -07002341 avdepth = 0;
2342 seq_printf(seq, "Aver depth: %d.%02d\n", avdepth / 100, avdepth % 100);
Robert Olsson19baf832005-06-21 12:43:18 -07002343 seq_printf(seq, "Max depth: %4d\n", stat->maxdepth);
Olof Johansson91b9a272005-08-09 20:24:39 -07002344
Robert Olsson19baf832005-06-21 12:43:18 -07002345 seq_printf(seq, "Leaves: %d\n", stat->leaves);
2346 bytes += sizeof(struct leaf) * stat->leaves;
2347 seq_printf(seq, "Internal nodes: %d\n", stat->tnodes);
2348 bytes += sizeof(struct tnode) * stat->tnodes;
2349
2350 max = MAX_CHILDS-1;
2351
2352 while (max >= 0 && stat->nodesizes[max] == 0)
2353 max--;
2354 pointers = 0;
2355
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002356 for (i = 1; i <= max; i++)
Robert Olsson19baf832005-06-21 12:43:18 -07002357 if (stat->nodesizes[i] != 0) {
2358 seq_printf(seq, " %d: %d", i, stat->nodesizes[i]);
2359 pointers += (1<<i) * stat->nodesizes[i];
2360 }
2361 seq_printf(seq, "\n");
2362 seq_printf(seq, "Pointers: %d\n", pointers);
2363 bytes += sizeof(struct node *) * pointers;
2364 seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers);
2365 seq_printf(seq, "Total size: %d kB\n", bytes / 1024);
2366
2367 kfree(stat);
2368 }
2369
2370#ifdef CONFIG_IP_FIB_TRIE_STATS
2371 seq_printf(seq, "Counters:\n---------\n");
2372 seq_printf(seq,"gets = %d\n", t->stats.gets);
2373 seq_printf(seq,"backtracks = %d\n", t->stats.backtrack);
2374 seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed);
2375 seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss);
2376 seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit);
Robert Olsson2f368952005-07-05 15:02:40 -07002377 seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped);
Robert Olsson19baf832005-06-21 12:43:18 -07002378#ifdef CLEAR_STATS
2379 memset(&(t->stats), 0, sizeof(t->stats));
2380#endif
2381#endif /* CONFIG_IP_FIB_TRIE_STATS */
2382}
2383
2384static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2385{
2386 char bf[128];
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002387
Robert Olsson19baf832005-06-21 12:43:18 -07002388 if (v == SEQ_START_TOKEN) {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002389 seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
Robert Olsson19baf832005-06-21 12:43:18 -07002390 sizeof(struct leaf), sizeof(struct tnode));
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002391 if (trie_local)
Robert Olsson19baf832005-06-21 12:43:18 -07002392 collect_and_show(trie_local, seq);
2393
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002394 if (trie_main)
Robert Olsson19baf832005-06-21 12:43:18 -07002395 collect_and_show(trie_main, seq);
Olof Johansson91b9a272005-08-09 20:24:39 -07002396 } else {
2397 snprintf(bf, sizeof(bf), "*\t%08X\t%08X", 200, 400);
2398
Robert Olsson19baf832005-06-21 12:43:18 -07002399 seq_printf(seq, "%-127s\n", bf);
2400 }
2401 return 0;
2402}
2403
2404static struct seq_operations fib_triestat_seq_ops = {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002405 .start = fib_triestat_seq_start,
2406 .next = fib_triestat_seq_next,
2407 .stop = fib_triestat_seq_stop,
2408 .show = fib_triestat_seq_show,
Robert Olsson19baf832005-06-21 12:43:18 -07002409};
2410
2411static int fib_triestat_seq_open(struct inode *inode, struct file *file)
2412{
2413 struct seq_file *seq;
2414 int rc = -ENOMEM;
2415
2416 rc = seq_open(file, &fib_triestat_seq_ops);
2417 if (rc)
2418 goto out_kfree;
2419
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002420 seq = file->private_data;
Robert Olsson19baf832005-06-21 12:43:18 -07002421out:
2422 return rc;
2423out_kfree:
2424 goto out;
2425}
2426
2427static struct file_operations fib_triestat_seq_fops = {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002428 .owner = THIS_MODULE,
2429 .open = fib_triestat_seq_open,
2430 .read = seq_read,
2431 .llseek = seq_lseek,
2432 .release = seq_release_private,
Robert Olsson19baf832005-06-21 12:43:18 -07002433};
2434
2435int __init fib_stat_proc_init(void)
2436{
2437 if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_seq_fops))
2438 return -ENOMEM;
2439 return 0;
2440}
2441
2442void __init fib_stat_proc_exit(void)
2443{
2444 proc_net_remove("fib_triestat");
2445}
2446
2447static struct fib_alias *fib_trie_get_first(struct seq_file *seq)
2448{
2449 return NULL;
2450}
2451
2452static struct fib_alias *fib_trie_get_next(struct seq_file *seq)
2453{
2454 return NULL;
2455}
2456
2457static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
2458{
Olof Johansson91b9a272005-08-09 20:24:39 -07002459 if (!ip_fib_main_table)
2460 return NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07002461
Olof Johansson91b9a272005-08-09 20:24:39 -07002462 if (*pos)
2463 return fib_trie_get_next(seq);
2464 else
2465 return SEQ_START_TOKEN;
Robert Olsson19baf832005-06-21 12:43:18 -07002466}
2467
2468static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2469{
2470 ++*pos;
Olof Johansson91b9a272005-08-09 20:24:39 -07002471 if (v == SEQ_START_TOKEN)
2472 return fib_trie_get_first(seq);
2473 else
2474 return fib_trie_get_next(seq);
2475
Robert Olsson19baf832005-06-21 12:43:18 -07002476}
2477
2478static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2479{
Robert Olsson19baf832005-06-21 12:43:18 -07002480}
2481
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002482/*
Robert Olsson19baf832005-06-21 12:43:18 -07002483 * This outputs /proc/net/fib_trie.
2484 *
2485 * It always works in backward compatibility mode.
2486 * The format of the file is not supposed to be changed.
2487 */
2488
2489static int fib_trie_seq_show(struct seq_file *seq, void *v)
2490{
2491 char bf[128];
2492
2493 if (v == SEQ_START_TOKEN) {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002494 if (trie_local)
Robert Olsson19baf832005-06-21 12:43:18 -07002495 trie_dump_seq(seq, trie_local);
2496
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002497 if (trie_main)
Robert Olsson19baf832005-06-21 12:43:18 -07002498 trie_dump_seq(seq, trie_main);
Olof Johansson91b9a272005-08-09 20:24:39 -07002499 } else {
Robert Olsson19baf832005-06-21 12:43:18 -07002500 snprintf(bf, sizeof(bf),
2501 "*\t%08X\t%08X", 200, 400);
2502 seq_printf(seq, "%-127s\n", bf);
2503 }
2504
2505 return 0;
2506}
2507
2508static struct seq_operations fib_trie_seq_ops = {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002509 .start = fib_trie_seq_start,
2510 .next = fib_trie_seq_next,
2511 .stop = fib_trie_seq_stop,
2512 .show = fib_trie_seq_show,
Robert Olsson19baf832005-06-21 12:43:18 -07002513};
2514
2515static int fib_trie_seq_open(struct inode *inode, struct file *file)
2516{
2517 struct seq_file *seq;
2518 int rc = -ENOMEM;
2519
2520 rc = seq_open(file, &fib_trie_seq_ops);
2521 if (rc)
2522 goto out_kfree;
2523
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002524 seq = file->private_data;
Robert Olsson19baf832005-06-21 12:43:18 -07002525out:
2526 return rc;
2527out_kfree:
2528 goto out;
2529}
2530
2531static struct file_operations fib_trie_seq_fops = {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002532 .owner = THIS_MODULE,
2533 .open = fib_trie_seq_open,
2534 .read = seq_read,
2535 .llseek = seq_lseek,
2536 .release= seq_release_private,
Robert Olsson19baf832005-06-21 12:43:18 -07002537};
2538
2539int __init fib_proc_init(void)
2540{
2541 if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_seq_fops))
2542 return -ENOMEM;
2543 return 0;
2544}
2545
2546void __init fib_proc_exit(void)
2547{
2548 proc_net_remove("fib_trie");
2549}
2550
2551#endif /* CONFIG_PROC_FS */