blob: d1a39b1277d6beb141d82e19a036a10861990047 [file] [log] [blame]
Robert Olsson19baf832005-06-21 12:43:18 -07001/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version
5 * 2 of the License, or (at your option) any later version.
6 *
7 * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
8 * & Swedish University of Agricultural Sciences.
9 *
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090010 * Jens Laas <jens.laas@data.slu.se> Swedish University of
Robert Olsson19baf832005-06-21 12:43:18 -070011 * Agricultural Sciences.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090012 *
Robert Olsson19baf832005-06-21 12:43:18 -070013 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
14 *
15 * This work is based on the LPC-trie which is originally descibed in:
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090016 *
Robert Olsson19baf832005-06-21 12:43:18 -070017 * An experimental study of compression methods for dynamic tries
18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
19 * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
20 *
21 *
22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
23 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
24 *
Robert Olsson19baf832005-06-21 12:43:18 -070025 *
26 * Code from fib_hash has been reused which includes the following header:
27 *
28 *
29 * INET An implementation of the TCP/IP protocol suite for the LINUX
30 * operating system. INET is implemented using the BSD Socket
31 * interface as the means of communication with the user level.
32 *
33 * IPv4 FIB: lookup engine and maintenance routines.
34 *
35 *
36 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
37 *
38 * This program is free software; you can redistribute it and/or
39 * modify it under the terms of the GNU General Public License
40 * as published by the Free Software Foundation; either version
41 * 2 of the License, or (at your option) any later version.
Robert Olssonfd966252005-12-22 11:25:10 -080042 *
43 * Substantial contributions to this work comes from:
44 *
45 * David S. Miller, <davem@davemloft.net>
46 * Stephen Hemminger <shemminger@osdl.org>
47 * Paul E. McKenney <paulmck@us.ibm.com>
48 * Patrick McHardy <kaber@trash.net>
Robert Olsson19baf832005-06-21 12:43:18 -070049 */
50
Robert Olsson05eee482007-03-19 16:27:37 -070051#define VERSION "0.408"
Robert Olsson19baf832005-06-21 12:43:18 -070052
Robert Olsson19baf832005-06-21 12:43:18 -070053#include <asm/uaccess.h>
54#include <asm/system.h>
Jiri Slaby1977f032007-10-18 23:40:25 -070055#include <linux/bitops.h>
Robert Olsson19baf832005-06-21 12:43:18 -070056#include <linux/types.h>
57#include <linux/kernel.h>
Robert Olsson19baf832005-06-21 12:43:18 -070058#include <linux/mm.h>
59#include <linux/string.h>
60#include <linux/socket.h>
61#include <linux/sockios.h>
62#include <linux/errno.h>
63#include <linux/in.h>
64#include <linux/inet.h>
Stephen Hemmingercd8787a2006-01-03 14:38:34 -080065#include <linux/inetdevice.h>
Robert Olsson19baf832005-06-21 12:43:18 -070066#include <linux/netdevice.h>
67#include <linux/if_arp.h>
68#include <linux/proc_fs.h>
Robert Olsson2373ce12005-08-25 13:01:29 -070069#include <linux/rcupdate.h>
Robert Olsson19baf832005-06-21 12:43:18 -070070#include <linux/skbuff.h>
71#include <linux/netlink.h>
72#include <linux/init.h>
73#include <linux/list.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020074#include <net/net_namespace.h>
Robert Olsson19baf832005-06-21 12:43:18 -070075#include <net/ip.h>
76#include <net/protocol.h>
77#include <net/route.h>
78#include <net/tcp.h>
79#include <net/sock.h>
80#include <net/ip_fib.h>
81#include "fib_lookup.h"
82
Robert Olsson06ef9212006-03-20 21:35:01 -080083#define MAX_STAT_DEPTH 32
Robert Olsson19baf832005-06-21 12:43:18 -070084
Robert Olsson19baf832005-06-21 12:43:18 -070085#define KEYLENGTH (8*sizeof(t_key))
Robert Olsson19baf832005-06-21 12:43:18 -070086
Robert Olsson19baf832005-06-21 12:43:18 -070087typedef unsigned int t_key;
88
89#define T_TNODE 0
90#define T_LEAF 1
91#define NODE_TYPE_MASK 0x1UL
Robert Olsson2373ce12005-08-25 13:01:29 -070092#define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK)
93
Olof Johansson91b9a272005-08-09 20:24:39 -070094#define IS_TNODE(n) (!(n->parent & T_LEAF))
95#define IS_LEAF(n) (n->parent & T_LEAF)
Robert Olsson19baf832005-06-21 12:43:18 -070096
97struct node {
Olof Johansson91b9a272005-08-09 20:24:39 -070098 unsigned long parent;
Eric Dumazet8d9654442008-01-13 22:31:44 -080099 t_key key;
Robert Olsson19baf832005-06-21 12:43:18 -0700100};
101
102struct leaf {
Olof Johansson91b9a272005-08-09 20:24:39 -0700103 unsigned long parent;
Eric Dumazet8d9654442008-01-13 22:31:44 -0800104 t_key key;
Robert Olsson19baf832005-06-21 12:43:18 -0700105 struct hlist_head list;
Robert Olsson2373ce12005-08-25 13:01:29 -0700106 struct rcu_head rcu;
Robert Olsson19baf832005-06-21 12:43:18 -0700107};
108
109struct leaf_info {
110 struct hlist_node hlist;
Robert Olsson2373ce12005-08-25 13:01:29 -0700111 struct rcu_head rcu;
Robert Olsson19baf832005-06-21 12:43:18 -0700112 int plen;
113 struct list_head falh;
114};
115
116struct tnode {
Olof Johansson91b9a272005-08-09 20:24:39 -0700117 unsigned long parent;
Eric Dumazet8d9654442008-01-13 22:31:44 -0800118 t_key key;
Eric Dumazet112d8cf2008-01-12 21:27:41 -0800119 unsigned char pos; /* 2log(KEYLENGTH) bits needed */
120 unsigned char bits; /* 2log(KEYLENGTH) bits needed */
Eric Dumazet8d9654442008-01-13 22:31:44 -0800121 unsigned int full_children; /* KEYLENGTH bits needed */
122 unsigned int empty_children; /* KEYLENGTH bits needed */
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700123 union {
124 struct rcu_head rcu;
125 struct work_struct work;
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700126 struct tnode *tnode_free;
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700127 };
Olof Johansson91b9a272005-08-09 20:24:39 -0700128 struct node *child[0];
Robert Olsson19baf832005-06-21 12:43:18 -0700129};
130
131#ifdef CONFIG_IP_FIB_TRIE_STATS
132struct trie_use_stats {
133 unsigned int gets;
134 unsigned int backtrack;
135 unsigned int semantic_match_passed;
136 unsigned int semantic_match_miss;
137 unsigned int null_node_hit;
Robert Olsson2f368952005-07-05 15:02:40 -0700138 unsigned int resize_node_skipped;
Robert Olsson19baf832005-06-21 12:43:18 -0700139};
140#endif
141
142struct trie_stat {
143 unsigned int totdepth;
144 unsigned int maxdepth;
145 unsigned int tnodes;
146 unsigned int leaves;
147 unsigned int nullpointers;
Stephen Hemminger93672292008-01-22 21:54:05 -0800148 unsigned int prefixes;
Robert Olsson06ef9212006-03-20 21:35:01 -0800149 unsigned int nodesizes[MAX_STAT_DEPTH];
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700150};
Robert Olsson19baf832005-06-21 12:43:18 -0700151
152struct trie {
Olof Johansson91b9a272005-08-09 20:24:39 -0700153 struct node *trie;
Robert Olsson19baf832005-06-21 12:43:18 -0700154#ifdef CONFIG_IP_FIB_TRIE_STATS
155 struct trie_use_stats stats;
156#endif
Robert Olsson19baf832005-06-21 12:43:18 -0700157};
158
Robert Olsson19baf832005-06-21 12:43:18 -0700159static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800160static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
161 int wasfull);
Robert Olsson19baf832005-06-21 12:43:18 -0700162static struct node *resize(struct trie *t, struct tnode *tn);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700163static struct tnode *inflate(struct trie *t, struct tnode *tn);
164static struct tnode *halve(struct trie *t, struct tnode *tn);
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700165/* tnodes to free after resize(); protected by RTNL */
166static struct tnode *tnode_free_head;
Robert Olsson19baf832005-06-21 12:43:18 -0700167
Christoph Lametere18b8902006-12-06 20:33:20 -0800168static struct kmem_cache *fn_alias_kmem __read_mostly;
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -0800169static struct kmem_cache *trie_leaf_kmem __read_mostly;
Robert Olsson19baf832005-06-21 12:43:18 -0700170
Stephen Hemminger06801912007-08-10 15:22:13 -0700171static inline struct tnode *node_parent(struct node *node)
172{
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800173 return (struct tnode *)(node->parent & ~NODE_TYPE_MASK);
174}
Stephen Hemminger06801912007-08-10 15:22:13 -0700175
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800176static inline struct tnode *node_parent_rcu(struct node *node)
177{
178 struct tnode *ret = node_parent(node);
179
Stephen Hemminger06801912007-08-10 15:22:13 -0700180 return rcu_dereference(ret);
181}
182
Stephen Hemminger6440cc92008-03-22 17:59:58 -0700183/* Same as rcu_assign_pointer
184 * but that macro() assumes that value is a pointer.
185 */
Stephen Hemminger06801912007-08-10 15:22:13 -0700186static inline void node_set_parent(struct node *node, struct tnode *ptr)
187{
Stephen Hemminger6440cc92008-03-22 17:59:58 -0700188 smp_wmb();
189 node->parent = (unsigned long)ptr | NODE_TYPE(node);
Stephen Hemminger06801912007-08-10 15:22:13 -0700190}
Robert Olsson2373ce12005-08-25 13:01:29 -0700191
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800192static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i)
Robert Olsson19baf832005-06-21 12:43:18 -0700193{
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800194 BUG_ON(i >= 1U << tn->bits);
Robert Olsson19baf832005-06-21 12:43:18 -0700195
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800196 return tn->child[i];
197}
198
199static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i)
200{
201 struct node *ret = tnode_get_child(tn, i);
202
203 return rcu_dereference(ret);
Robert Olsson19baf832005-06-21 12:43:18 -0700204}
205
Stephen Hemmignerbb435b82005-08-09 20:25:39 -0700206static inline int tnode_child_length(const struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700207{
Olof Johansson91b9a272005-08-09 20:24:39 -0700208 return 1 << tn->bits;
Robert Olsson19baf832005-06-21 12:43:18 -0700209}
210
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -0700211static inline t_key mask_pfx(t_key k, unsigned short l)
212{
213 return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);
214}
215
Robert Olsson19baf832005-06-21 12:43:18 -0700216static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
217{
Olof Johansson91b9a272005-08-09 20:24:39 -0700218 if (offset < KEYLENGTH)
Robert Olsson19baf832005-06-21 12:43:18 -0700219 return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
Olof Johansson91b9a272005-08-09 20:24:39 -0700220 else
Robert Olsson19baf832005-06-21 12:43:18 -0700221 return 0;
222}
223
224static inline int tkey_equals(t_key a, t_key b)
225{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700226 return a == b;
Robert Olsson19baf832005-06-21 12:43:18 -0700227}
228
229static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
230{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700231 if (bits == 0 || offset >= KEYLENGTH)
232 return 1;
Olof Johansson91b9a272005-08-09 20:24:39 -0700233 bits = bits > KEYLENGTH ? KEYLENGTH : bits;
234 return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700235}
Robert Olsson19baf832005-06-21 12:43:18 -0700236
237static inline int tkey_mismatch(t_key a, int offset, t_key b)
238{
239 t_key diff = a ^ b;
240 int i = offset;
241
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700242 if (!diff)
243 return 0;
244 while ((diff << i) >> (KEYLENGTH-1) == 0)
Robert Olsson19baf832005-06-21 12:43:18 -0700245 i++;
246 return i;
247}
248
Robert Olsson19baf832005-06-21 12:43:18 -0700249/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900250 To understand this stuff, an understanding of keys and all their bits is
251 necessary. Every node in the trie has a key associated with it, but not
Robert Olsson19baf832005-06-21 12:43:18 -0700252 all of the bits in that key are significant.
253
254 Consider a node 'n' and its parent 'tp'.
255
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900256 If n is a leaf, every bit in its key is significant. Its presence is
257 necessitated by path compression, since during a tree traversal (when
258 searching for a leaf - unless we are doing an insertion) we will completely
259 ignore all skipped bits we encounter. Thus we need to verify, at the end of
260 a potentially successful search, that we have indeed been walking the
Robert Olsson19baf832005-06-21 12:43:18 -0700261 correct key path.
262
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900263 Note that we can never "miss" the correct key in the tree if present by
264 following the wrong path. Path compression ensures that segments of the key
265 that are the same for all keys with a given prefix are skipped, but the
266 skipped part *is* identical for each node in the subtrie below the skipped
267 bit! trie_insert() in this implementation takes care of that - note the
Robert Olsson19baf832005-06-21 12:43:18 -0700268 call to tkey_sub_equals() in trie_insert().
269
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900270 if n is an internal node - a 'tnode' here, the various parts of its key
Robert Olsson19baf832005-06-21 12:43:18 -0700271 have many different meanings.
272
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900273 Example:
Robert Olsson19baf832005-06-21 12:43:18 -0700274 _________________________________________________________________
275 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
276 -----------------------------------------------------------------
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900277 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
Robert Olsson19baf832005-06-21 12:43:18 -0700278
279 _________________________________________________________________
280 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
281 -----------------------------------------------------------------
282 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
283
284 tp->pos = 7
285 tp->bits = 3
286 n->pos = 15
Olof Johansson91b9a272005-08-09 20:24:39 -0700287 n->bits = 4
Robert Olsson19baf832005-06-21 12:43:18 -0700288
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900289 First, let's just ignore the bits that come before the parent tp, that is
290 the bits from 0 to (tp->pos-1). They are *known* but at this point we do
Robert Olsson19baf832005-06-21 12:43:18 -0700291 not use them for anything.
292
293 The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900294 index into the parent's child array. That is, they will be used to find
Robert Olsson19baf832005-06-21 12:43:18 -0700295 'n' among tp's children.
296
297 The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
298 for the node n.
299
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900300 All the bits we have seen so far are significant to the node n. The rest
Robert Olsson19baf832005-06-21 12:43:18 -0700301 of the bits are really not needed or indeed known in n->key.
302
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900303 The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
Robert Olsson19baf832005-06-21 12:43:18 -0700304 n's child array, and will of course be different for each child.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900305
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700306
Robert Olsson19baf832005-06-21 12:43:18 -0700307 The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
308 at this point.
309
310*/
311
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700312static inline void check_tnode(const struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700313{
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700314 WARN_ON(tn && tn->pos+tn->bits > 32);
Robert Olsson19baf832005-06-21 12:43:18 -0700315}
316
Denis V. Lunevf5026fa2007-12-13 09:47:57 -0800317static const int halve_threshold = 25;
318static const int inflate_threshold = 50;
319static const int halve_threshold_root = 8;
320static const int inflate_threshold_root = 15;
Robert Olsson19baf832005-06-21 12:43:18 -0700321
Robert Olsson2373ce12005-08-25 13:01:29 -0700322
323static void __alias_free_mem(struct rcu_head *head)
324{
325 struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
326 kmem_cache_free(fn_alias_kmem, fa);
327}
328
329static inline void alias_free_mem_rcu(struct fib_alias *fa)
330{
331 call_rcu(&fa->rcu, __alias_free_mem);
332}
333
334static void __leaf_free_rcu(struct rcu_head *head)
335{
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -0800336 struct leaf *l = container_of(head, struct leaf, rcu);
337 kmem_cache_free(trie_leaf_kmem, l);
Robert Olsson2373ce12005-08-25 13:01:29 -0700338}
339
Stephen Hemminger387a5482008-04-10 03:47:34 -0700340static inline void free_leaf(struct leaf *l)
341{
342 call_rcu_bh(&l->rcu, __leaf_free_rcu);
343}
344
Robert Olsson2373ce12005-08-25 13:01:29 -0700345static void __leaf_info_free_rcu(struct rcu_head *head)
346{
347 kfree(container_of(head, struct leaf_info, rcu));
348}
349
350static inline void free_leaf_info(struct leaf_info *leaf)
351{
352 call_rcu(&leaf->rcu, __leaf_info_free_rcu);
353}
354
Eric Dumazet8d9654442008-01-13 22:31:44 -0800355static struct tnode *tnode_alloc(size_t size)
Robert Olsson2373ce12005-08-25 13:01:29 -0700356{
Robert Olsson2373ce12005-08-25 13:01:29 -0700357 if (size <= PAGE_SIZE)
Eric Dumazet8d9654442008-01-13 22:31:44 -0800358 return kzalloc(size, GFP_KERNEL);
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700359 else
360 return __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
361}
Robert Olsson2373ce12005-08-25 13:01:29 -0700362
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700363static void __tnode_vfree(struct work_struct *arg)
364{
365 struct tnode *tn = container_of(arg, struct tnode, work);
366 vfree(tn);
Robert Olsson2373ce12005-08-25 13:01:29 -0700367}
368
369static void __tnode_free_rcu(struct rcu_head *head)
370{
371 struct tnode *tn = container_of(head, struct tnode, rcu);
Eric Dumazet8d9654442008-01-13 22:31:44 -0800372 size_t size = sizeof(struct tnode) +
373 (sizeof(struct node *) << tn->bits);
Robert Olsson2373ce12005-08-25 13:01:29 -0700374
375 if (size <= PAGE_SIZE)
376 kfree(tn);
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700377 else {
378 INIT_WORK(&tn->work, __tnode_vfree);
379 schedule_work(&tn->work);
380 }
Robert Olsson2373ce12005-08-25 13:01:29 -0700381}
382
383static inline void tnode_free(struct tnode *tn)
384{
Stephen Hemminger387a5482008-04-10 03:47:34 -0700385 if (IS_LEAF(tn))
386 free_leaf((struct leaf *) tn);
387 else
Robert Olsson550e29b2006-04-04 12:53:35 -0700388 call_rcu(&tn->rcu, __tnode_free_rcu);
Robert Olsson2373ce12005-08-25 13:01:29 -0700389}
390
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700391static void tnode_free_safe(struct tnode *tn)
392{
393 BUG_ON(IS_LEAF(tn));
394
395 if (node_parent((struct node *) tn)) {
396 tn->tnode_free = tnode_free_head;
397 tnode_free_head = tn;
398 } else {
399 tnode_free(tn);
400 }
401}
402
403static void tnode_free_flush(void)
404{
405 struct tnode *tn;
406
407 while ((tn = tnode_free_head)) {
408 tnode_free_head = tn->tnode_free;
409 tn->tnode_free = NULL;
410 tnode_free(tn);
411 }
412}
413
Robert Olsson19baf832005-06-21 12:43:18 -0700414static struct leaf *leaf_new(void)
415{
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -0800416 struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700417 if (l) {
Robert Olsson2373ce12005-08-25 13:01:29 -0700418 l->parent = T_LEAF;
Robert Olsson19baf832005-06-21 12:43:18 -0700419 INIT_HLIST_HEAD(&l->list);
420 }
421 return l;
422}
423
424static struct leaf_info *leaf_info_new(int plen)
425{
426 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
Robert Olsson2373ce12005-08-25 13:01:29 -0700427 if (li) {
428 li->plen = plen;
429 INIT_LIST_HEAD(&li->falh);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700430 }
Robert Olsson2373ce12005-08-25 13:01:29 -0700431 return li;
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700432}
433
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800434static struct tnode *tnode_new(t_key key, int pos, int bits)
Robert Olsson19baf832005-06-21 12:43:18 -0700435{
Eric Dumazet8d9654442008-01-13 22:31:44 -0800436 size_t sz = sizeof(struct tnode) + (sizeof(struct node *) << bits);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700437 struct tnode *tn = tnode_alloc(sz);
Robert Olsson19baf832005-06-21 12:43:18 -0700438
Olof Johansson91b9a272005-08-09 20:24:39 -0700439 if (tn) {
Robert Olsson2373ce12005-08-25 13:01:29 -0700440 tn->parent = T_TNODE;
Robert Olsson19baf832005-06-21 12:43:18 -0700441 tn->pos = pos;
442 tn->bits = bits;
443 tn->key = key;
444 tn->full_children = 0;
445 tn->empty_children = 1<<bits;
446 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700447
Eric Dumazet8d9654442008-01-13 22:31:44 -0800448 pr_debug("AT %p s=%u %lu\n", tn, (unsigned int) sizeof(struct tnode),
449 (unsigned long) (sizeof(struct node) << bits));
Robert Olsson19baf832005-06-21 12:43:18 -0700450 return tn;
451}
452
Robert Olsson19baf832005-06-21 12:43:18 -0700453/*
454 * Check whether a tnode 'n' is "full", i.e. it is an internal node
455 * and no bits are skipped. See discussion in dyntree paper p. 6
456 */
457
Stephen Hemmignerbb435b82005-08-09 20:25:39 -0700458static inline int tnode_full(const struct tnode *tn, const struct node *n)
Robert Olsson19baf832005-06-21 12:43:18 -0700459{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700460 if (n == NULL || IS_LEAF(n))
Robert Olsson19baf832005-06-21 12:43:18 -0700461 return 0;
462
463 return ((struct tnode *) n)->pos == tn->pos + tn->bits;
464}
465
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800466static inline void put_child(struct trie *t, struct tnode *tn, int i,
467 struct node *n)
Robert Olsson19baf832005-06-21 12:43:18 -0700468{
469 tnode_put_child_reorg(tn, i, n, -1);
470}
471
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700472 /*
Robert Olsson19baf832005-06-21 12:43:18 -0700473 * Add a child at position i overwriting the old value.
474 * Update the value of full_children and empty_children.
475 */
476
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800477static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
478 int wasfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700479{
Robert Olsson2373ce12005-08-25 13:01:29 -0700480 struct node *chi = tn->child[i];
Robert Olsson19baf832005-06-21 12:43:18 -0700481 int isfull;
482
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700483 BUG_ON(i >= 1<<tn->bits);
484
Robert Olsson19baf832005-06-21 12:43:18 -0700485 /* update emptyChildren */
486 if (n == NULL && chi != NULL)
487 tn->empty_children++;
488 else if (n != NULL && chi == NULL)
489 tn->empty_children--;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700490
Robert Olsson19baf832005-06-21 12:43:18 -0700491 /* update fullChildren */
Olof Johansson91b9a272005-08-09 20:24:39 -0700492 if (wasfull == -1)
Robert Olsson19baf832005-06-21 12:43:18 -0700493 wasfull = tnode_full(tn, chi);
494
495 isfull = tnode_full(tn, n);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700496 if (wasfull && !isfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700497 tn->full_children--;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700498 else if (!wasfull && isfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700499 tn->full_children++;
Olof Johansson91b9a272005-08-09 20:24:39 -0700500
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700501 if (n)
Stephen Hemminger06801912007-08-10 15:22:13 -0700502 node_set_parent(n, tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700503
Robert Olsson2373ce12005-08-25 13:01:29 -0700504 rcu_assign_pointer(tn->child[i], n);
Robert Olsson19baf832005-06-21 12:43:18 -0700505}
506
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700507static struct node *resize(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700508{
509 int i;
Robert Olsson2f368952005-07-05 15:02:40 -0700510 int err = 0;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700511 struct tnode *old_tn;
Robert Olssone6308be2005-10-04 13:01:58 -0700512 int inflate_threshold_use;
513 int halve_threshold_use;
Robert Olsson05eee482007-03-19 16:27:37 -0700514 int max_resize;
Robert Olsson19baf832005-06-21 12:43:18 -0700515
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900516 if (!tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700517 return NULL;
518
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700519 pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
520 tn, inflate_threshold, halve_threshold);
Robert Olsson19baf832005-06-21 12:43:18 -0700521
522 /* No children */
523 if (tn->empty_children == tnode_child_length(tn)) {
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700524 tnode_free_safe(tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700525 return NULL;
526 }
527 /* One child */
528 if (tn->empty_children == tnode_child_length(tn) - 1)
529 for (i = 0; i < tnode_child_length(tn); i++) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700530 struct node *n;
Robert Olsson19baf832005-06-21 12:43:18 -0700531
Olof Johansson91b9a272005-08-09 20:24:39 -0700532 n = tn->child[i];
Robert Olsson2373ce12005-08-25 13:01:29 -0700533 if (!n)
Olof Johansson91b9a272005-08-09 20:24:39 -0700534 continue;
Olof Johansson91b9a272005-08-09 20:24:39 -0700535
536 /* compress one level */
Stephen Hemminger06801912007-08-10 15:22:13 -0700537 node_set_parent(n, NULL);
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700538 tnode_free_safe(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -0700539 return n;
Robert Olsson19baf832005-06-21 12:43:18 -0700540 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700541 /*
Robert Olsson19baf832005-06-21 12:43:18 -0700542 * Double as long as the resulting node has a number of
543 * nonempty nodes that are above the threshold.
544 */
545
546 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700547 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
548 * the Helsinki University of Technology and Matti Tikkanen of Nokia
Robert Olsson19baf832005-06-21 12:43:18 -0700549 * Telecommunications, page 6:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700550 * "A node is doubled if the ratio of non-empty children to all
Robert Olsson19baf832005-06-21 12:43:18 -0700551 * children in the *doubled* node is at least 'high'."
552 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700553 * 'high' in this instance is the variable 'inflate_threshold'. It
554 * is expressed as a percentage, so we multiply it with
555 * tnode_child_length() and instead of multiplying by 2 (since the
556 * child array will be doubled by inflate()) and multiplying
557 * the left-hand side by 100 (to handle the percentage thing) we
Robert Olsson19baf832005-06-21 12:43:18 -0700558 * multiply the left-hand side by 50.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700559 *
560 * The left-hand side may look a bit weird: tnode_child_length(tn)
561 * - tn->empty_children is of course the number of non-null children
562 * in the current node. tn->full_children is the number of "full"
Robert Olsson19baf832005-06-21 12:43:18 -0700563 * children, that is non-null tnodes with a skip value of 0.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700564 * All of those will be doubled in the resulting inflated tnode, so
Robert Olsson19baf832005-06-21 12:43:18 -0700565 * we just count them one extra time here.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700566 *
Robert Olsson19baf832005-06-21 12:43:18 -0700567 * A clearer way to write this would be:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700568 *
Robert Olsson19baf832005-06-21 12:43:18 -0700569 * to_be_doubled = tn->full_children;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700570 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
Robert Olsson19baf832005-06-21 12:43:18 -0700571 * tn->full_children;
572 *
573 * new_child_length = tnode_child_length(tn) * 2;
574 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700575 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
Robert Olsson19baf832005-06-21 12:43:18 -0700576 * new_child_length;
577 * if (new_fill_factor >= inflate_threshold)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700578 *
579 * ...and so on, tho it would mess up the while () loop.
580 *
Robert Olsson19baf832005-06-21 12:43:18 -0700581 * anyway,
582 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
583 * inflate_threshold
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700584 *
Robert Olsson19baf832005-06-21 12:43:18 -0700585 * avoid a division:
586 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
587 * inflate_threshold * new_child_length
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700588 *
Robert Olsson19baf832005-06-21 12:43:18 -0700589 * expand not_to_be_doubled and to_be_doubled, and shorten:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700590 * 100 * (tnode_child_length(tn) - tn->empty_children +
Olof Johansson91b9a272005-08-09 20:24:39 -0700591 * tn->full_children) >= inflate_threshold * new_child_length
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700592 *
Robert Olsson19baf832005-06-21 12:43:18 -0700593 * expand new_child_length:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700594 * 100 * (tnode_child_length(tn) - tn->empty_children +
Olof Johansson91b9a272005-08-09 20:24:39 -0700595 * tn->full_children) >=
Robert Olsson19baf832005-06-21 12:43:18 -0700596 * inflate_threshold * tnode_child_length(tn) * 2
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700597 *
Robert Olsson19baf832005-06-21 12:43:18 -0700598 * shorten again:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700599 * 50 * (tn->full_children + tnode_child_length(tn) -
Olof Johansson91b9a272005-08-09 20:24:39 -0700600 * tn->empty_children) >= inflate_threshold *
Robert Olsson19baf832005-06-21 12:43:18 -0700601 * tnode_child_length(tn)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700602 *
Robert Olsson19baf832005-06-21 12:43:18 -0700603 */
604
605 check_tnode(tn);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700606
Robert Olssone6308be2005-10-04 13:01:58 -0700607 /* Keep root node larger */
608
Stephen Hemminger132adf52007-03-08 20:44:43 -0800609 if (!tn->parent)
Robert Olssone6308be2005-10-04 13:01:58 -0700610 inflate_threshold_use = inflate_threshold_root;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900611 else
Robert Olssone6308be2005-10-04 13:01:58 -0700612 inflate_threshold_use = inflate_threshold;
613
Robert Olsson2f368952005-07-05 15:02:40 -0700614 err = 0;
Robert Olsson05eee482007-03-19 16:27:37 -0700615 max_resize = 10;
616 while ((tn->full_children > 0 && max_resize-- &&
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800617 50 * (tn->full_children + tnode_child_length(tn)
618 - tn->empty_children)
619 >= inflate_threshold_use * tnode_child_length(tn))) {
Robert Olsson19baf832005-06-21 12:43:18 -0700620
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700621 old_tn = tn;
622 tn = inflate(t, tn);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800623
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700624 if (IS_ERR(tn)) {
625 tn = old_tn;
Robert Olsson2f368952005-07-05 15:02:40 -0700626#ifdef CONFIG_IP_FIB_TRIE_STATS
627 t->stats.resize_node_skipped++;
628#endif
629 break;
630 }
Robert Olsson19baf832005-06-21 12:43:18 -0700631 }
632
Robert Olsson05eee482007-03-19 16:27:37 -0700633 if (max_resize < 0) {
634 if (!tn->parent)
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800635 pr_warning("Fix inflate_threshold_root."
636 " Now=%d size=%d bits\n",
637 inflate_threshold_root, tn->bits);
Robert Olsson05eee482007-03-19 16:27:37 -0700638 else
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800639 pr_warning("Fix inflate_threshold."
640 " Now=%d size=%d bits\n",
641 inflate_threshold, tn->bits);
Robert Olsson05eee482007-03-19 16:27:37 -0700642 }
643
Robert Olsson19baf832005-06-21 12:43:18 -0700644 check_tnode(tn);
645
646 /*
647 * Halve as long as the number of empty children in this
648 * node is above threshold.
649 */
Robert Olsson2f368952005-07-05 15:02:40 -0700650
Robert Olssone6308be2005-10-04 13:01:58 -0700651
652 /* Keep root node larger */
653
Stephen Hemminger132adf52007-03-08 20:44:43 -0800654 if (!tn->parent)
Robert Olssone6308be2005-10-04 13:01:58 -0700655 halve_threshold_use = halve_threshold_root;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900656 else
Robert Olssone6308be2005-10-04 13:01:58 -0700657 halve_threshold_use = halve_threshold;
658
Robert Olsson2f368952005-07-05 15:02:40 -0700659 err = 0;
Robert Olsson05eee482007-03-19 16:27:37 -0700660 max_resize = 10;
661 while (tn->bits > 1 && max_resize-- &&
Robert Olsson19baf832005-06-21 12:43:18 -0700662 100 * (tnode_child_length(tn) - tn->empty_children) <
Robert Olssone6308be2005-10-04 13:01:58 -0700663 halve_threshold_use * tnode_child_length(tn)) {
Robert Olsson19baf832005-06-21 12:43:18 -0700664
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700665 old_tn = tn;
666 tn = halve(t, tn);
667 if (IS_ERR(tn)) {
668 tn = old_tn;
Robert Olsson2f368952005-07-05 15:02:40 -0700669#ifdef CONFIG_IP_FIB_TRIE_STATS
670 t->stats.resize_node_skipped++;
671#endif
672 break;
673 }
674 }
675
Robert Olsson05eee482007-03-19 16:27:37 -0700676 if (max_resize < 0) {
677 if (!tn->parent)
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800678 pr_warning("Fix halve_threshold_root."
679 " Now=%d size=%d bits\n",
680 halve_threshold_root, tn->bits);
Robert Olsson05eee482007-03-19 16:27:37 -0700681 else
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800682 pr_warning("Fix halve_threshold."
683 " Now=%d size=%d bits\n",
684 halve_threshold, tn->bits);
Robert Olsson05eee482007-03-19 16:27:37 -0700685 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700686
Robert Olsson19baf832005-06-21 12:43:18 -0700687 /* Only one child remains */
Robert Olsson19baf832005-06-21 12:43:18 -0700688 if (tn->empty_children == tnode_child_length(tn) - 1)
689 for (i = 0; i < tnode_child_length(tn); i++) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700690 struct node *n;
691
Olof Johansson91b9a272005-08-09 20:24:39 -0700692 n = tn->child[i];
Robert Olsson2373ce12005-08-25 13:01:29 -0700693 if (!n)
Olof Johansson91b9a272005-08-09 20:24:39 -0700694 continue;
Olof Johansson91b9a272005-08-09 20:24:39 -0700695
696 /* compress one level */
697
Stephen Hemminger06801912007-08-10 15:22:13 -0700698 node_set_parent(n, NULL);
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700699 tnode_free_safe(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -0700700 return n;
Robert Olsson19baf832005-06-21 12:43:18 -0700701 }
702
703 return (struct node *) tn;
704}
705
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700706static struct tnode *inflate(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700707{
Robert Olsson19baf832005-06-21 12:43:18 -0700708 struct tnode *oldtnode = tn;
709 int olen = tnode_child_length(tn);
710 int i;
711
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700712 pr_debug("In inflate\n");
Robert Olsson19baf832005-06-21 12:43:18 -0700713
714 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
715
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700716 if (!tn)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700717 return ERR_PTR(-ENOMEM);
Robert Olsson2f368952005-07-05 15:02:40 -0700718
719 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700720 * Preallocate and store tnodes before the actual work so we
721 * don't get into an inconsistent state if memory allocation
722 * fails. In case of failure we return the oldnode and inflate
Robert Olsson2f368952005-07-05 15:02:40 -0700723 * of tnode is ignored.
724 */
Olof Johansson91b9a272005-08-09 20:24:39 -0700725
726 for (i = 0; i < olen; i++) {
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800727 struct tnode *inode;
Robert Olsson2f368952005-07-05 15:02:40 -0700728
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800729 inode = (struct tnode *) tnode_get_child(oldtnode, i);
Robert Olsson2f368952005-07-05 15:02:40 -0700730 if (inode &&
731 IS_TNODE(inode) &&
732 inode->pos == oldtnode->pos + oldtnode->bits &&
733 inode->bits > 1) {
734 struct tnode *left, *right;
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -0700735 t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700736
Robert Olsson2f368952005-07-05 15:02:40 -0700737 left = tnode_new(inode->key&(~m), inode->pos + 1,
738 inode->bits - 1);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700739 if (!left)
740 goto nomem;
Olof Johansson91b9a272005-08-09 20:24:39 -0700741
Robert Olsson2f368952005-07-05 15:02:40 -0700742 right = tnode_new(inode->key|m, inode->pos + 1,
743 inode->bits - 1);
744
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900745 if (!right) {
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700746 tnode_free(left);
747 goto nomem;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900748 }
Robert Olsson2f368952005-07-05 15:02:40 -0700749
750 put_child(t, tn, 2*i, (struct node *) left);
751 put_child(t, tn, 2*i+1, (struct node *) right);
752 }
753 }
754
Olof Johansson91b9a272005-08-09 20:24:39 -0700755 for (i = 0; i < olen; i++) {
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -0800756 struct tnode *inode;
Robert Olsson19baf832005-06-21 12:43:18 -0700757 struct node *node = tnode_get_child(oldtnode, i);
Olof Johansson91b9a272005-08-09 20:24:39 -0700758 struct tnode *left, *right;
759 int size, j;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700760
Robert Olsson19baf832005-06-21 12:43:18 -0700761 /* An empty child */
762 if (node == NULL)
763 continue;
764
765 /* A leaf or an internal node with skipped bits */
766
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700767 if (IS_LEAF(node) || ((struct tnode *) node)->pos >
Robert Olsson19baf832005-06-21 12:43:18 -0700768 tn->pos + tn->bits - 1) {
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800769 if (tkey_extract_bits(node->key,
770 oldtnode->pos + oldtnode->bits,
771 1) == 0)
Robert Olsson19baf832005-06-21 12:43:18 -0700772 put_child(t, tn, 2*i, node);
773 else
774 put_child(t, tn, 2*i+1, node);
775 continue;
776 }
777
778 /* An internal node with two children */
779 inode = (struct tnode *) node;
780
781 if (inode->bits == 1) {
782 put_child(t, tn, 2*i, inode->child[0]);
783 put_child(t, tn, 2*i+1, inode->child[1]);
784
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700785 tnode_free_safe(inode);
Olof Johansson91b9a272005-08-09 20:24:39 -0700786 continue;
Robert Olsson19baf832005-06-21 12:43:18 -0700787 }
788
Olof Johansson91b9a272005-08-09 20:24:39 -0700789 /* An internal node with more than two children */
Robert Olsson19baf832005-06-21 12:43:18 -0700790
Olof Johansson91b9a272005-08-09 20:24:39 -0700791 /* We will replace this node 'inode' with two new
792 * ones, 'left' and 'right', each with half of the
793 * original children. The two new nodes will have
794 * a position one bit further down the key and this
795 * means that the "significant" part of their keys
796 * (see the discussion near the top of this file)
797 * will differ by one bit, which will be "0" in
798 * left's key and "1" in right's key. Since we are
799 * moving the key position by one step, the bit that
800 * we are moving away from - the bit at position
801 * (inode->pos) - is the one that will differ between
802 * left and right. So... we synthesize that bit in the
803 * two new keys.
804 * The mask 'm' below will be a single "one" bit at
805 * the position (inode->pos)
806 */
Robert Olsson19baf832005-06-21 12:43:18 -0700807
Olof Johansson91b9a272005-08-09 20:24:39 -0700808 /* Use the old key, but set the new significant
809 * bit to zero.
810 */
Robert Olsson19baf832005-06-21 12:43:18 -0700811
Olof Johansson91b9a272005-08-09 20:24:39 -0700812 left = (struct tnode *) tnode_get_child(tn, 2*i);
813 put_child(t, tn, 2*i, NULL);
Robert Olsson19baf832005-06-21 12:43:18 -0700814
Olof Johansson91b9a272005-08-09 20:24:39 -0700815 BUG_ON(!left);
Robert Olsson2f368952005-07-05 15:02:40 -0700816
Olof Johansson91b9a272005-08-09 20:24:39 -0700817 right = (struct tnode *) tnode_get_child(tn, 2*i+1);
818 put_child(t, tn, 2*i+1, NULL);
Robert Olsson2f368952005-07-05 15:02:40 -0700819
Olof Johansson91b9a272005-08-09 20:24:39 -0700820 BUG_ON(!right);
Robert Olsson2f368952005-07-05 15:02:40 -0700821
Olof Johansson91b9a272005-08-09 20:24:39 -0700822 size = tnode_child_length(left);
823 for (j = 0; j < size; j++) {
824 put_child(t, left, j, inode->child[j]);
825 put_child(t, right, j, inode->child[j + size]);
Robert Olsson19baf832005-06-21 12:43:18 -0700826 }
Olof Johansson91b9a272005-08-09 20:24:39 -0700827 put_child(t, tn, 2*i, resize(t, left));
828 put_child(t, tn, 2*i+1, resize(t, right));
829
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700830 tnode_free_safe(inode);
Robert Olsson19baf832005-06-21 12:43:18 -0700831 }
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700832 tnode_free_safe(oldtnode);
Robert Olsson19baf832005-06-21 12:43:18 -0700833 return tn;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700834nomem:
835 {
836 int size = tnode_child_length(tn);
837 int j;
838
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700839 for (j = 0; j < size; j++)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700840 if (tn->child[j])
841 tnode_free((struct tnode *)tn->child[j]);
842
843 tnode_free(tn);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700844
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700845 return ERR_PTR(-ENOMEM);
846 }
Robert Olsson19baf832005-06-21 12:43:18 -0700847}
848
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700849static struct tnode *halve(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700850{
851 struct tnode *oldtnode = tn;
852 struct node *left, *right;
853 int i;
854 int olen = tnode_child_length(tn);
855
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700856 pr_debug("In halve\n");
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700857
858 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
Robert Olsson19baf832005-06-21 12:43:18 -0700859
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700860 if (!tn)
861 return ERR_PTR(-ENOMEM);
Robert Olsson2f368952005-07-05 15:02:40 -0700862
863 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700864 * Preallocate and store tnodes before the actual work so we
865 * don't get into an inconsistent state if memory allocation
866 * fails. In case of failure we return the oldnode and halve
Robert Olsson2f368952005-07-05 15:02:40 -0700867 * of tnode is ignored.
868 */
869
Olof Johansson91b9a272005-08-09 20:24:39 -0700870 for (i = 0; i < olen; i += 2) {
Robert Olsson2f368952005-07-05 15:02:40 -0700871 left = tnode_get_child(oldtnode, i);
872 right = tnode_get_child(oldtnode, i+1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700873
Robert Olsson2f368952005-07-05 15:02:40 -0700874 /* Two nonempty children */
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700875 if (left && right) {
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700876 struct tnode *newn;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700877
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700878 newn = tnode_new(left->key, tn->pos + tn->bits, 1);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700879
880 if (!newn)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700881 goto nomem;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700882
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700883 put_child(t, tn, i/2, (struct node *)newn);
Robert Olsson2f368952005-07-05 15:02:40 -0700884 }
Robert Olsson2f368952005-07-05 15:02:40 -0700885
Robert Olsson2f368952005-07-05 15:02:40 -0700886 }
Robert Olsson19baf832005-06-21 12:43:18 -0700887
Olof Johansson91b9a272005-08-09 20:24:39 -0700888 for (i = 0; i < olen; i += 2) {
889 struct tnode *newBinNode;
890
Robert Olsson19baf832005-06-21 12:43:18 -0700891 left = tnode_get_child(oldtnode, i);
892 right = tnode_get_child(oldtnode, i+1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700893
Robert Olsson19baf832005-06-21 12:43:18 -0700894 /* At least one of the children is empty */
895 if (left == NULL) {
896 if (right == NULL) /* Both are empty */
897 continue;
898 put_child(t, tn, i/2, right);
Olof Johansson91b9a272005-08-09 20:24:39 -0700899 continue;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700900 }
Olof Johansson91b9a272005-08-09 20:24:39 -0700901
902 if (right == NULL) {
Robert Olsson19baf832005-06-21 12:43:18 -0700903 put_child(t, tn, i/2, left);
Olof Johansson91b9a272005-08-09 20:24:39 -0700904 continue;
905 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700906
Robert Olsson19baf832005-06-21 12:43:18 -0700907 /* Two nonempty children */
Olof Johansson91b9a272005-08-09 20:24:39 -0700908 newBinNode = (struct tnode *) tnode_get_child(tn, i/2);
909 put_child(t, tn, i/2, NULL);
Olof Johansson91b9a272005-08-09 20:24:39 -0700910 put_child(t, newBinNode, 0, left);
911 put_child(t, newBinNode, 1, right);
912 put_child(t, tn, i/2, resize(t, newBinNode));
Robert Olsson19baf832005-06-21 12:43:18 -0700913 }
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700914 tnode_free_safe(oldtnode);
Robert Olsson19baf832005-06-21 12:43:18 -0700915 return tn;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700916nomem:
917 {
918 int size = tnode_child_length(tn);
919 int j;
920
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700921 for (j = 0; j < size; j++)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700922 if (tn->child[j])
923 tnode_free((struct tnode *)tn->child[j]);
924
925 tnode_free(tn);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700926
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700927 return ERR_PTR(-ENOMEM);
928 }
Robert Olsson19baf832005-06-21 12:43:18 -0700929}
930
Robert Olsson772cb712005-09-19 15:31:18 -0700931/* readside must use rcu_read_lock currently dump routines
Robert Olsson2373ce12005-08-25 13:01:29 -0700932 via get_fa_head and dump */
933
Robert Olsson772cb712005-09-19 15:31:18 -0700934static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700935{
Robert Olsson772cb712005-09-19 15:31:18 -0700936 struct hlist_head *head = &l->list;
Robert Olsson19baf832005-06-21 12:43:18 -0700937 struct hlist_node *node;
938 struct leaf_info *li;
939
Robert Olsson2373ce12005-08-25 13:01:29 -0700940 hlist_for_each_entry_rcu(li, node, head, hlist)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700941 if (li->plen == plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700942 return li;
Olof Johansson91b9a272005-08-09 20:24:39 -0700943
Robert Olsson19baf832005-06-21 12:43:18 -0700944 return NULL;
945}
946
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800947static inline struct list_head *get_fa_head(struct leaf *l, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700948{
Robert Olsson772cb712005-09-19 15:31:18 -0700949 struct leaf_info *li = find_leaf_info(l, plen);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700950
Olof Johansson91b9a272005-08-09 20:24:39 -0700951 if (!li)
952 return NULL;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700953
Olof Johansson91b9a272005-08-09 20:24:39 -0700954 return &li->falh;
Robert Olsson19baf832005-06-21 12:43:18 -0700955}
956
957static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
958{
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900959 struct leaf_info *li = NULL, *last = NULL;
960 struct hlist_node *node;
Robert Olsson19baf832005-06-21 12:43:18 -0700961
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900962 if (hlist_empty(head)) {
963 hlist_add_head_rcu(&new->hlist, head);
964 } else {
965 hlist_for_each_entry(li, node, head, hlist) {
966 if (new->plen > li->plen)
967 break;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700968
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900969 last = li;
970 }
971 if (last)
972 hlist_add_after_rcu(&last->hlist, &new->hlist);
973 else
974 hlist_add_before_rcu(&new->hlist, &li->hlist);
975 }
Robert Olsson19baf832005-06-21 12:43:18 -0700976}
977
Robert Olsson2373ce12005-08-25 13:01:29 -0700978/* rcu_read_lock needs to be hold by caller from readside */
979
Robert Olsson19baf832005-06-21 12:43:18 -0700980static struct leaf *
981fib_find_node(struct trie *t, u32 key)
982{
983 int pos;
984 struct tnode *tn;
985 struct node *n;
986
987 pos = 0;
Robert Olsson2373ce12005-08-25 13:01:29 -0700988 n = rcu_dereference(t->trie);
Robert Olsson19baf832005-06-21 12:43:18 -0700989
990 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
991 tn = (struct tnode *) n;
Olof Johansson91b9a272005-08-09 20:24:39 -0700992
Robert Olsson19baf832005-06-21 12:43:18 -0700993 check_tnode(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -0700994
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700995 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700996 pos = tn->pos + tn->bits;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800997 n = tnode_get_child_rcu(tn,
998 tkey_extract_bits(key,
999 tn->pos,
1000 tn->bits));
Olof Johansson91b9a272005-08-09 20:24:39 -07001001 } else
Robert Olsson19baf832005-06-21 12:43:18 -07001002 break;
1003 }
1004 /* Case we have found a leaf. Compare prefixes */
1005
Olof Johansson91b9a272005-08-09 20:24:39 -07001006 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key))
1007 return (struct leaf *)n;
1008
Robert Olsson19baf832005-06-21 12:43:18 -07001009 return NULL;
1010}
1011
1012static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
1013{
Robert Olsson19baf832005-06-21 12:43:18 -07001014 int wasfull;
Robert Olsson3ed18d72009-05-21 15:20:59 -07001015 t_key cindex, key;
Stephen Hemminger06801912007-08-10 15:22:13 -07001016 struct tnode *tp;
Robert Olsson19baf832005-06-21 12:43:18 -07001017
Robert Olsson3ed18d72009-05-21 15:20:59 -07001018 key = tn->key;
1019
Stephen Hemminger06801912007-08-10 15:22:13 -07001020 while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) {
Robert Olsson19baf832005-06-21 12:43:18 -07001021 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1022 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001023 tn = (struct tnode *) resize(t, (struct tnode *)tn);
1024
1025 tnode_put_child_reorg((struct tnode *)tp, cindex,
1026 (struct node *)tn, wasfull);
Olof Johansson91b9a272005-08-09 20:24:39 -07001027
Stephen Hemminger06801912007-08-10 15:22:13 -07001028 tp = node_parent((struct node *) tn);
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -07001029 tnode_free_flush();
Stephen Hemminger06801912007-08-10 15:22:13 -07001030 if (!tp)
Robert Olsson19baf832005-06-21 12:43:18 -07001031 break;
Stephen Hemminger06801912007-08-10 15:22:13 -07001032 tn = tp;
Robert Olsson19baf832005-06-21 12:43:18 -07001033 }
Stephen Hemminger06801912007-08-10 15:22:13 -07001034
Robert Olsson19baf832005-06-21 12:43:18 -07001035 /* Handle last (top) tnode */
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -07001036 if (IS_TNODE(tn)) {
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001037 tn = (struct tnode *)resize(t, (struct tnode *)tn);
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -07001038 tnode_free_flush();
1039 }
Robert Olsson19baf832005-06-21 12:43:18 -07001040
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001041 return (struct node *)tn;
Robert Olsson19baf832005-06-21 12:43:18 -07001042}
1043
Robert Olsson2373ce12005-08-25 13:01:29 -07001044/* only used from updater-side */
1045
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001046static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -07001047{
1048 int pos, newpos;
1049 struct tnode *tp = NULL, *tn = NULL;
1050 struct node *n;
1051 struct leaf *l;
1052 int missbit;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001053 struct list_head *fa_head = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001054 struct leaf_info *li;
1055 t_key cindex;
1056
1057 pos = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001058 n = t->trie;
Robert Olsson19baf832005-06-21 12:43:18 -07001059
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001060 /* If we point to NULL, stop. Either the tree is empty and we should
1061 * just put a new leaf in if, or we have reached an empty child slot,
Robert Olsson19baf832005-06-21 12:43:18 -07001062 * and we should just put our new leaf in that.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001063 * If we point to a T_TNODE, check if it matches our key. Note that
1064 * a T_TNODE might be skipping any number of bits - its 'pos' need
Robert Olsson19baf832005-06-21 12:43:18 -07001065 * not be the parent's 'pos'+'bits'!
1066 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001067 * If it does match the current key, get pos/bits from it, extract
Robert Olsson19baf832005-06-21 12:43:18 -07001068 * the index from our key, push the T_TNODE and walk the tree.
1069 *
1070 * If it doesn't, we have to replace it with a new T_TNODE.
1071 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001072 * If we point to a T_LEAF, it might or might not have the same key
1073 * as we do. If it does, just change the value, update the T_LEAF's
1074 * value, and return it.
Robert Olsson19baf832005-06-21 12:43:18 -07001075 * If it doesn't, we need to replace it with a T_TNODE.
1076 */
1077
1078 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
1079 tn = (struct tnode *) n;
Olof Johansson91b9a272005-08-09 20:24:39 -07001080
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001081 check_tnode(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001082
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001083 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
Robert Olsson19baf832005-06-21 12:43:18 -07001084 tp = tn;
Olof Johansson91b9a272005-08-09 20:24:39 -07001085 pos = tn->pos + tn->bits;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001086 n = tnode_get_child(tn,
1087 tkey_extract_bits(key,
1088 tn->pos,
1089 tn->bits));
Robert Olsson19baf832005-06-21 12:43:18 -07001090
Stephen Hemminger06801912007-08-10 15:22:13 -07001091 BUG_ON(n && node_parent(n) != tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001092 } else
Robert Olsson19baf832005-06-21 12:43:18 -07001093 break;
1094 }
1095
1096 /*
1097 * n ----> NULL, LEAF or TNODE
1098 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001099 * tp is n's (parent) ----> NULL or TNODE
Robert Olsson19baf832005-06-21 12:43:18 -07001100 */
1101
Olof Johansson91b9a272005-08-09 20:24:39 -07001102 BUG_ON(tp && IS_LEAF(tp));
Robert Olsson19baf832005-06-21 12:43:18 -07001103
1104 /* Case 1: n is a leaf. Compare prefixes */
1105
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001106 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -08001107 l = (struct leaf *) n;
Robert Olsson19baf832005-06-21 12:43:18 -07001108 li = leaf_info_new(plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001109
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001110 if (!li)
1111 return NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001112
1113 fa_head = &li->falh;
1114 insert_leaf_info(&l->list, li);
1115 goto done;
1116 }
Robert Olsson19baf832005-06-21 12:43:18 -07001117 l = leaf_new();
1118
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001119 if (!l)
1120 return NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001121
1122 l->key = key;
1123 li = leaf_info_new(plen);
1124
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001125 if (!li) {
Stephen Hemminger387a5482008-04-10 03:47:34 -07001126 free_leaf(l);
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001127 return NULL;
Robert Olssonf835e472005-06-28 15:00:39 -07001128 }
Robert Olsson19baf832005-06-21 12:43:18 -07001129
1130 fa_head = &li->falh;
1131 insert_leaf_info(&l->list, li);
1132
Robert Olsson19baf832005-06-21 12:43:18 -07001133 if (t->trie && n == NULL) {
Olof Johansson91b9a272005-08-09 20:24:39 -07001134 /* Case 2: n is NULL, and will just insert a new leaf */
Robert Olsson19baf832005-06-21 12:43:18 -07001135
Stephen Hemminger06801912007-08-10 15:22:13 -07001136 node_set_parent((struct node *)l, tp);
Robert Olsson19baf832005-06-21 12:43:18 -07001137
Olof Johansson91b9a272005-08-09 20:24:39 -07001138 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1139 put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
1140 } else {
1141 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001142 /*
1143 * Add a new tnode here
Robert Olsson19baf832005-06-21 12:43:18 -07001144 * first tnode need some special handling
1145 */
1146
1147 if (tp)
Olof Johansson91b9a272005-08-09 20:24:39 -07001148 pos = tp->pos+tp->bits;
Robert Olsson19baf832005-06-21 12:43:18 -07001149 else
Olof Johansson91b9a272005-08-09 20:24:39 -07001150 pos = 0;
1151
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001152 if (n) {
Robert Olsson19baf832005-06-21 12:43:18 -07001153 newpos = tkey_mismatch(key, pos, n->key);
1154 tn = tnode_new(n->key, newpos, 1);
Olof Johansson91b9a272005-08-09 20:24:39 -07001155 } else {
Robert Olsson19baf832005-06-21 12:43:18 -07001156 newpos = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001157 tn = tnode_new(key, newpos, 1); /* First tnode */
Robert Olsson19baf832005-06-21 12:43:18 -07001158 }
Robert Olsson19baf832005-06-21 12:43:18 -07001159
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001160 if (!tn) {
Robert Olssonf835e472005-06-28 15:00:39 -07001161 free_leaf_info(li);
Stephen Hemminger387a5482008-04-10 03:47:34 -07001162 free_leaf(l);
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001163 return NULL;
Olof Johansson91b9a272005-08-09 20:24:39 -07001164 }
1165
Stephen Hemminger06801912007-08-10 15:22:13 -07001166 node_set_parent((struct node *)tn, tp);
Robert Olsson19baf832005-06-21 12:43:18 -07001167
Olof Johansson91b9a272005-08-09 20:24:39 -07001168 missbit = tkey_extract_bits(key, newpos, 1);
Robert Olsson19baf832005-06-21 12:43:18 -07001169 put_child(t, tn, missbit, (struct node *)l);
1170 put_child(t, tn, 1-missbit, n);
1171
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001172 if (tp) {
Robert Olsson19baf832005-06-21 12:43:18 -07001173 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001174 put_child(t, (struct tnode *)tp, cindex,
1175 (struct node *)tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001176 } else {
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001177 rcu_assign_pointer(t->trie, (struct node *)tn);
Robert Olsson19baf832005-06-21 12:43:18 -07001178 tp = tn;
1179 }
1180 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001181
1182 if (tp && tp->pos + tp->bits > 32)
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001183 pr_warning("fib_trie"
1184 " tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
1185 tp, tp->pos, tp->bits, key, plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001186
Robert Olsson19baf832005-06-21 12:43:18 -07001187 /* Rebalance the trie */
Robert Olsson2373ce12005-08-25 13:01:29 -07001188
1189 rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
Robert Olssonf835e472005-06-28 15:00:39 -07001190done:
Robert Olsson19baf832005-06-21 12:43:18 -07001191 return fa_head;
1192}
1193
Robert Olssond562f1f2007-03-26 14:22:22 -07001194/*
1195 * Caller must hold RTNL.
1196 */
Thomas Graf4e902c52006-08-17 18:14:52 -07001197static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
Robert Olsson19baf832005-06-21 12:43:18 -07001198{
1199 struct trie *t = (struct trie *) tb->tb_data;
1200 struct fib_alias *fa, *new_fa;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001201 struct list_head *fa_head = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001202 struct fib_info *fi;
Thomas Graf4e902c52006-08-17 18:14:52 -07001203 int plen = cfg->fc_dst_len;
1204 u8 tos = cfg->fc_tos;
Robert Olsson19baf832005-06-21 12:43:18 -07001205 u32 key, mask;
1206 int err;
1207 struct leaf *l;
1208
1209 if (plen > 32)
1210 return -EINVAL;
1211
Thomas Graf4e902c52006-08-17 18:14:52 -07001212 key = ntohl(cfg->fc_dst);
Robert Olsson19baf832005-06-21 12:43:18 -07001213
Patrick McHardy2dfe55b2006-08-10 23:08:33 -07001214 pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
Robert Olsson19baf832005-06-21 12:43:18 -07001215
Olof Johansson91b9a272005-08-09 20:24:39 -07001216 mask = ntohl(inet_make_mask(plen));
Robert Olsson19baf832005-06-21 12:43:18 -07001217
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001218 if (key & ~mask)
Robert Olsson19baf832005-06-21 12:43:18 -07001219 return -EINVAL;
1220
1221 key = key & mask;
1222
Thomas Graf4e902c52006-08-17 18:14:52 -07001223 fi = fib_create_info(cfg);
1224 if (IS_ERR(fi)) {
1225 err = PTR_ERR(fi);
Robert Olsson19baf832005-06-21 12:43:18 -07001226 goto err;
Thomas Graf4e902c52006-08-17 18:14:52 -07001227 }
Robert Olsson19baf832005-06-21 12:43:18 -07001228
1229 l = fib_find_node(t, key);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001230 fa = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001231
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001232 if (l) {
Robert Olsson19baf832005-06-21 12:43:18 -07001233 fa_head = get_fa_head(l, plen);
1234 fa = fib_find_alias(fa_head, tos, fi->fib_priority);
1235 }
1236
1237 /* Now fa, if non-NULL, points to the first fib alias
1238 * with the same keys [prefix,tos,priority], if such key already
1239 * exists or to the node before which we will insert new one.
1240 *
1241 * If fa is NULL, we will need to allocate a new one and
1242 * insert to the head of f.
1243 *
1244 * If f is NULL, no fib node matched the destination key
1245 * and we need to allocate a new one of those as well.
1246 */
1247
Julian Anastasov936f6f82008-01-28 21:18:06 -08001248 if (fa && fa->fa_tos == tos &&
1249 fa->fa_info->fib_priority == fi->fib_priority) {
1250 struct fib_alias *fa_first, *fa_match;
Robert Olsson19baf832005-06-21 12:43:18 -07001251
1252 err = -EEXIST;
Thomas Graf4e902c52006-08-17 18:14:52 -07001253 if (cfg->fc_nlflags & NLM_F_EXCL)
Robert Olsson19baf832005-06-21 12:43:18 -07001254 goto out;
1255
Julian Anastasov936f6f82008-01-28 21:18:06 -08001256 /* We have 2 goals:
1257 * 1. Find exact match for type, scope, fib_info to avoid
1258 * duplicate routes
1259 * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it
1260 */
1261 fa_match = NULL;
1262 fa_first = fa;
1263 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
1264 list_for_each_entry_continue(fa, fa_head, fa_list) {
1265 if (fa->fa_tos != tos)
1266 break;
1267 if (fa->fa_info->fib_priority != fi->fib_priority)
1268 break;
1269 if (fa->fa_type == cfg->fc_type &&
1270 fa->fa_scope == cfg->fc_scope &&
1271 fa->fa_info == fi) {
1272 fa_match = fa;
1273 break;
1274 }
1275 }
1276
Thomas Graf4e902c52006-08-17 18:14:52 -07001277 if (cfg->fc_nlflags & NLM_F_REPLACE) {
Robert Olsson19baf832005-06-21 12:43:18 -07001278 struct fib_info *fi_drop;
1279 u8 state;
1280
Julian Anastasov936f6f82008-01-28 21:18:06 -08001281 fa = fa_first;
1282 if (fa_match) {
1283 if (fa == fa_match)
1284 err = 0;
Joonwoo Park67250332008-01-18 03:45:18 -08001285 goto out;
Julian Anastasov936f6f82008-01-28 21:18:06 -08001286 }
Robert Olsson2373ce12005-08-25 13:01:29 -07001287 err = -ENOBUFS;
Christoph Lametere94b1762006-12-06 20:33:17 -08001288 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
Robert Olsson2373ce12005-08-25 13:01:29 -07001289 if (new_fa == NULL)
1290 goto out;
Robert Olsson19baf832005-06-21 12:43:18 -07001291
1292 fi_drop = fa->fa_info;
Robert Olsson2373ce12005-08-25 13:01:29 -07001293 new_fa->fa_tos = fa->fa_tos;
1294 new_fa->fa_info = fi;
Thomas Graf4e902c52006-08-17 18:14:52 -07001295 new_fa->fa_type = cfg->fc_type;
1296 new_fa->fa_scope = cfg->fc_scope;
Robert Olsson19baf832005-06-21 12:43:18 -07001297 state = fa->fa_state;
Julian Anastasov936f6f82008-01-28 21:18:06 -08001298 new_fa->fa_state = state & ~FA_S_ACCESSED;
Robert Olsson19baf832005-06-21 12:43:18 -07001299
Robert Olsson2373ce12005-08-25 13:01:29 -07001300 list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
1301 alias_free_mem_rcu(fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001302
1303 fib_release_info(fi_drop);
1304 if (state & FA_S_ACCESSED)
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -07001305 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
Milan Kocianb8f55832007-05-23 14:55:06 -07001306 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
1307 tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
Robert Olsson19baf832005-06-21 12:43:18 -07001308
Olof Johansson91b9a272005-08-09 20:24:39 -07001309 goto succeeded;
Robert Olsson19baf832005-06-21 12:43:18 -07001310 }
1311 /* Error if we find a perfect match which
1312 * uses the same scope, type, and nexthop
1313 * information.
1314 */
Julian Anastasov936f6f82008-01-28 21:18:06 -08001315 if (fa_match)
1316 goto out;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001317
Thomas Graf4e902c52006-08-17 18:14:52 -07001318 if (!(cfg->fc_nlflags & NLM_F_APPEND))
Julian Anastasov936f6f82008-01-28 21:18:06 -08001319 fa = fa_first;
Robert Olsson19baf832005-06-21 12:43:18 -07001320 }
1321 err = -ENOENT;
Thomas Graf4e902c52006-08-17 18:14:52 -07001322 if (!(cfg->fc_nlflags & NLM_F_CREATE))
Robert Olsson19baf832005-06-21 12:43:18 -07001323 goto out;
1324
1325 err = -ENOBUFS;
Christoph Lametere94b1762006-12-06 20:33:17 -08001326 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
Robert Olsson19baf832005-06-21 12:43:18 -07001327 if (new_fa == NULL)
1328 goto out;
1329
1330 new_fa->fa_info = fi;
1331 new_fa->fa_tos = tos;
Thomas Graf4e902c52006-08-17 18:14:52 -07001332 new_fa->fa_type = cfg->fc_type;
1333 new_fa->fa_scope = cfg->fc_scope;
Robert Olsson19baf832005-06-21 12:43:18 -07001334 new_fa->fa_state = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001335 /*
1336 * Insert new entry to the list.
1337 */
1338
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001339 if (!fa_head) {
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001340 fa_head = fib_insert_node(t, key, plen);
1341 if (unlikely(!fa_head)) {
1342 err = -ENOMEM;
Robert Olssonf835e472005-06-28 15:00:39 -07001343 goto out_free_new_fa;
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001344 }
Robert Olssonf835e472005-06-28 15:00:39 -07001345 }
Robert Olsson19baf832005-06-21 12:43:18 -07001346
Robert Olsson2373ce12005-08-25 13:01:29 -07001347 list_add_tail_rcu(&new_fa->fa_list,
1348 (fa ? &fa->fa_list : fa_head));
Robert Olsson19baf832005-06-21 12:43:18 -07001349
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -07001350 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
Thomas Graf4e902c52006-08-17 18:14:52 -07001351 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
Milan Kocianb8f55832007-05-23 14:55:06 -07001352 &cfg->fc_nlinfo, 0);
Robert Olsson19baf832005-06-21 12:43:18 -07001353succeeded:
1354 return 0;
Robert Olssonf835e472005-06-28 15:00:39 -07001355
1356out_free_new_fa:
1357 kmem_cache_free(fn_alias_kmem, new_fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001358out:
1359 fib_release_info(fi);
Olof Johansson91b9a272005-08-09 20:24:39 -07001360err:
Robert Olsson19baf832005-06-21 12:43:18 -07001361 return err;
1362}
1363
Robert Olsson772cb712005-09-19 15:31:18 -07001364/* should be called with rcu_read_lock */
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001365static int check_leaf(struct trie *t, struct leaf *l,
1366 t_key key, const struct flowi *flp,
1367 struct fib_result *res)
Robert Olsson19baf832005-06-21 12:43:18 -07001368{
Robert Olsson19baf832005-06-21 12:43:18 -07001369 struct leaf_info *li;
1370 struct hlist_head *hhead = &l->list;
1371 struct hlist_node *node;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001372
Robert Olsson2373ce12005-08-25 13:01:29 -07001373 hlist_for_each_entry_rcu(li, node, hhead, hlist) {
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001374 int err;
1375 int plen = li->plen;
1376 __be32 mask = inet_make_mask(plen);
1377
Al Viro888454c2006-09-19 13:42:46 -07001378 if (l->key != (key & ntohl(mask)))
Robert Olsson19baf832005-06-21 12:43:18 -07001379 continue;
1380
Rami Rosene204a342009-05-18 01:19:12 +00001381 err = fib_semantic_match(&li->falh, flp, res, plen);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001382
Robert Olsson19baf832005-06-21 12:43:18 -07001383#ifdef CONFIG_IP_FIB_TRIE_STATS
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001384 if (err <= 0)
Robert Olsson19baf832005-06-21 12:43:18 -07001385 t->stats.semantic_match_passed++;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001386 else
1387 t->stats.semantic_match_miss++;
Robert Olsson19baf832005-06-21 12:43:18 -07001388#endif
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001389 if (err <= 0)
Ben Hutchings2e655572008-07-10 16:52:52 -07001390 return err;
Robert Olsson19baf832005-06-21 12:43:18 -07001391 }
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001392
Ben Hutchings2e655572008-07-10 16:52:52 -07001393 return 1;
Robert Olsson19baf832005-06-21 12:43:18 -07001394}
1395
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001396static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp,
1397 struct fib_result *res)
Robert Olsson19baf832005-06-21 12:43:18 -07001398{
1399 struct trie *t = (struct trie *) tb->tb_data;
Ben Hutchings2e655572008-07-10 16:52:52 -07001400 int ret;
Robert Olsson19baf832005-06-21 12:43:18 -07001401 struct node *n;
1402 struct tnode *pn;
1403 int pos, bits;
Olof Johansson91b9a272005-08-09 20:24:39 -07001404 t_key key = ntohl(flp->fl4_dst);
Robert Olsson19baf832005-06-21 12:43:18 -07001405 int chopped_off;
1406 t_key cindex = 0;
1407 int current_prefix_length = KEYLENGTH;
Olof Johansson91b9a272005-08-09 20:24:39 -07001408 struct tnode *cn;
1409 t_key node_prefix, key_prefix, pref_mismatch;
1410 int mp;
1411
Robert Olsson2373ce12005-08-25 13:01:29 -07001412 rcu_read_lock();
Robert Olsson19baf832005-06-21 12:43:18 -07001413
Robert Olsson2373ce12005-08-25 13:01:29 -07001414 n = rcu_dereference(t->trie);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001415 if (!n)
Robert Olsson19baf832005-06-21 12:43:18 -07001416 goto failed;
1417
1418#ifdef CONFIG_IP_FIB_TRIE_STATS
1419 t->stats.gets++;
1420#endif
1421
1422 /* Just a leaf? */
1423 if (IS_LEAF(n)) {
Ben Hutchings2e655572008-07-10 16:52:52 -07001424 ret = check_leaf(t, (struct leaf *)n, key, flp, res);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001425 goto found;
Robert Olsson19baf832005-06-21 12:43:18 -07001426 }
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001427
Robert Olsson19baf832005-06-21 12:43:18 -07001428 pn = (struct tnode *) n;
1429 chopped_off = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001430
Olof Johansson91b9a272005-08-09 20:24:39 -07001431 while (pn) {
Robert Olsson19baf832005-06-21 12:43:18 -07001432 pos = pn->pos;
1433 bits = pn->bits;
1434
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001435 if (!chopped_off)
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -07001436 cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length),
1437 pos, bits);
Robert Olsson19baf832005-06-21 12:43:18 -07001438
1439 n = tnode_get_child(pn, cindex);
1440
1441 if (n == NULL) {
1442#ifdef CONFIG_IP_FIB_TRIE_STATS
1443 t->stats.null_node_hit++;
1444#endif
1445 goto backtrace;
1446 }
1447
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001448 if (IS_LEAF(n)) {
Ben Hutchings2e655572008-07-10 16:52:52 -07001449 ret = check_leaf(t, (struct leaf *)n, key, flp, res);
1450 if (ret > 0)
Olof Johansson91b9a272005-08-09 20:24:39 -07001451 goto backtrace;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001452 goto found;
Olof Johansson91b9a272005-08-09 20:24:39 -07001453 }
1454
Olof Johansson91b9a272005-08-09 20:24:39 -07001455 cn = (struct tnode *)n;
1456
1457 /*
1458 * It's a tnode, and we can do some extra checks here if we
1459 * like, to avoid descending into a dead-end branch.
1460 * This tnode is in the parent's child array at index
1461 * key[p_pos..p_pos+p_bits] but potentially with some bits
1462 * chopped off, so in reality the index may be just a
1463 * subprefix, padded with zero at the end.
1464 * We can also take a look at any skipped bits in this
1465 * tnode - everything up to p_pos is supposed to be ok,
1466 * and the non-chopped bits of the index (se previous
1467 * paragraph) are also guaranteed ok, but the rest is
1468 * considered unknown.
1469 *
1470 * The skipped bits are key[pos+bits..cn->pos].
1471 */
1472
1473 /* If current_prefix_length < pos+bits, we are already doing
1474 * actual prefix matching, which means everything from
1475 * pos+(bits-chopped_off) onward must be zero along some
1476 * branch of this subtree - otherwise there is *no* valid
1477 * prefix present. Here we can only check the skipped
1478 * bits. Remember, since we have already indexed into the
1479 * parent's child array, we know that the bits we chopped of
1480 * *are* zero.
1481 */
1482
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001483 /* NOTA BENE: Checking only skipped bits
1484 for the new node here */
Olof Johansson91b9a272005-08-09 20:24:39 -07001485
1486 if (current_prefix_length < pos+bits) {
1487 if (tkey_extract_bits(cn->key, current_prefix_length,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001488 cn->pos - current_prefix_length)
1489 || !(cn->child[0]))
Olof Johansson91b9a272005-08-09 20:24:39 -07001490 goto backtrace;
1491 }
1492
1493 /*
1494 * If chopped_off=0, the index is fully validated and we
1495 * only need to look at the skipped bits for this, the new,
1496 * tnode. What we actually want to do is to find out if
1497 * these skipped bits match our key perfectly, or if we will
1498 * have to count on finding a matching prefix further down,
1499 * because if we do, we would like to have some way of
1500 * verifying the existence of such a prefix at this point.
1501 */
1502
1503 /* The only thing we can do at this point is to verify that
1504 * any such matching prefix can indeed be a prefix to our
1505 * key, and if the bits in the node we are inspecting that
1506 * do not match our key are not ZERO, this cannot be true.
1507 * Thus, find out where there is a mismatch (before cn->pos)
1508 * and verify that all the mismatching bits are zero in the
1509 * new tnode's key.
1510 */
1511
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001512 /*
1513 * Note: We aren't very concerned about the piece of
1514 * the key that precede pn->pos+pn->bits, since these
1515 * have already been checked. The bits after cn->pos
1516 * aren't checked since these are by definition
1517 * "unknown" at this point. Thus, what we want to see
1518 * is if we are about to enter the "prefix matching"
1519 * state, and in that case verify that the skipped
1520 * bits that will prevail throughout this subtree are
1521 * zero, as they have to be if we are to find a
1522 * matching prefix.
Olof Johansson91b9a272005-08-09 20:24:39 -07001523 */
1524
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -07001525 node_prefix = mask_pfx(cn->key, cn->pos);
1526 key_prefix = mask_pfx(key, cn->pos);
Olof Johansson91b9a272005-08-09 20:24:39 -07001527 pref_mismatch = key_prefix^node_prefix;
1528 mp = 0;
1529
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001530 /*
1531 * In short: If skipped bits in this node do not match
1532 * the search key, enter the "prefix matching"
1533 * state.directly.
Olof Johansson91b9a272005-08-09 20:24:39 -07001534 */
1535 if (pref_mismatch) {
1536 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
1537 mp++;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001538 pref_mismatch = pref_mismatch << 1;
Olof Johansson91b9a272005-08-09 20:24:39 -07001539 }
1540 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
1541
1542 if (key_prefix != 0)
1543 goto backtrace;
1544
1545 if (current_prefix_length >= cn->pos)
1546 current_prefix_length = mp;
1547 }
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001548
Olof Johansson91b9a272005-08-09 20:24:39 -07001549 pn = (struct tnode *)n; /* Descend */
1550 chopped_off = 0;
1551 continue;
1552
Robert Olsson19baf832005-06-21 12:43:18 -07001553backtrace:
1554 chopped_off++;
1555
1556 /* As zero don't change the child key (cindex) */
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001557 while ((chopped_off <= pn->bits)
1558 && !(cindex & (1<<(chopped_off-1))))
Robert Olsson19baf832005-06-21 12:43:18 -07001559 chopped_off++;
Robert Olsson19baf832005-06-21 12:43:18 -07001560
1561 /* Decrease current_... with bits chopped off */
1562 if (current_prefix_length > pn->pos + pn->bits - chopped_off)
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001563 current_prefix_length = pn->pos + pn->bits
1564 - chopped_off;
Olof Johansson91b9a272005-08-09 20:24:39 -07001565
Robert Olsson19baf832005-06-21 12:43:18 -07001566 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001567 * Either we do the actual chop off according or if we have
Robert Olsson19baf832005-06-21 12:43:18 -07001568 * chopped off all bits in this tnode walk up to our parent.
1569 */
1570
Olof Johansson91b9a272005-08-09 20:24:39 -07001571 if (chopped_off <= pn->bits) {
Robert Olsson19baf832005-06-21 12:43:18 -07001572 cindex &= ~(1 << (chopped_off-1));
Olof Johansson91b9a272005-08-09 20:24:39 -07001573 } else {
Stephen Hemminger06801912007-08-10 15:22:13 -07001574 struct tnode *parent = node_parent((struct node *) pn);
1575 if (!parent)
Robert Olsson19baf832005-06-21 12:43:18 -07001576 goto failed;
Olof Johansson91b9a272005-08-09 20:24:39 -07001577
Robert Olsson19baf832005-06-21 12:43:18 -07001578 /* Get Child's index */
Stephen Hemminger06801912007-08-10 15:22:13 -07001579 cindex = tkey_extract_bits(pn->key, parent->pos, parent->bits);
1580 pn = parent;
Robert Olsson19baf832005-06-21 12:43:18 -07001581 chopped_off = 0;
1582
1583#ifdef CONFIG_IP_FIB_TRIE_STATS
1584 t->stats.backtrack++;
1585#endif
1586 goto backtrace;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001587 }
Robert Olsson19baf832005-06-21 12:43:18 -07001588 }
1589failed:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001590 ret = 1;
Robert Olsson19baf832005-06-21 12:43:18 -07001591found:
Robert Olsson2373ce12005-08-25 13:01:29 -07001592 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07001593 return ret;
1594}
1595
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001596/*
1597 * Remove the leaf and return parent.
1598 */
1599static void trie_leaf_remove(struct trie *t, struct leaf *l)
Robert Olsson19baf832005-06-21 12:43:18 -07001600{
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001601 struct tnode *tp = node_parent((struct node *) l);
Robert Olsson19baf832005-06-21 12:43:18 -07001602
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001603 pr_debug("entering trie_leaf_remove(%p)\n", l);
Robert Olsson19baf832005-06-21 12:43:18 -07001604
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001605 if (tp) {
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001606 t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits);
Robert Olsson19baf832005-06-21 12:43:18 -07001607 put_child(t, (struct tnode *)tp, cindex, NULL);
Robert Olsson2373ce12005-08-25 13:01:29 -07001608 rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
Olof Johansson91b9a272005-08-09 20:24:39 -07001609 } else
Robert Olsson2373ce12005-08-25 13:01:29 -07001610 rcu_assign_pointer(t->trie, NULL);
Robert Olsson19baf832005-06-21 12:43:18 -07001611
Stephen Hemminger387a5482008-04-10 03:47:34 -07001612 free_leaf(l);
Robert Olsson19baf832005-06-21 12:43:18 -07001613}
1614
Robert Olssond562f1f2007-03-26 14:22:22 -07001615/*
1616 * Caller must hold RTNL.
1617 */
Thomas Graf4e902c52006-08-17 18:14:52 -07001618static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
Robert Olsson19baf832005-06-21 12:43:18 -07001619{
1620 struct trie *t = (struct trie *) tb->tb_data;
1621 u32 key, mask;
Thomas Graf4e902c52006-08-17 18:14:52 -07001622 int plen = cfg->fc_dst_len;
1623 u8 tos = cfg->fc_tos;
Robert Olsson19baf832005-06-21 12:43:18 -07001624 struct fib_alias *fa, *fa_to_delete;
1625 struct list_head *fa_head;
1626 struct leaf *l;
Olof Johansson91b9a272005-08-09 20:24:39 -07001627 struct leaf_info *li;
1628
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001629 if (plen > 32)
Robert Olsson19baf832005-06-21 12:43:18 -07001630 return -EINVAL;
1631
Thomas Graf4e902c52006-08-17 18:14:52 -07001632 key = ntohl(cfg->fc_dst);
Olof Johansson91b9a272005-08-09 20:24:39 -07001633 mask = ntohl(inet_make_mask(plen));
Robert Olsson19baf832005-06-21 12:43:18 -07001634
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001635 if (key & ~mask)
Robert Olsson19baf832005-06-21 12:43:18 -07001636 return -EINVAL;
1637
1638 key = key & mask;
1639 l = fib_find_node(t, key);
1640
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001641 if (!l)
Robert Olsson19baf832005-06-21 12:43:18 -07001642 return -ESRCH;
1643
1644 fa_head = get_fa_head(l, plen);
1645 fa = fib_find_alias(fa_head, tos, 0);
1646
1647 if (!fa)
1648 return -ESRCH;
1649
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001650 pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
Robert Olsson19baf832005-06-21 12:43:18 -07001651
1652 fa_to_delete = NULL;
Julian Anastasov936f6f82008-01-28 21:18:06 -08001653 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
1654 list_for_each_entry_continue(fa, fa_head, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001655 struct fib_info *fi = fa->fa_info;
1656
1657 if (fa->fa_tos != tos)
1658 break;
1659
Thomas Graf4e902c52006-08-17 18:14:52 -07001660 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
1661 (cfg->fc_scope == RT_SCOPE_NOWHERE ||
1662 fa->fa_scope == cfg->fc_scope) &&
1663 (!cfg->fc_protocol ||
1664 fi->fib_protocol == cfg->fc_protocol) &&
1665 fib_nh_match(cfg, fi) == 0) {
Robert Olsson19baf832005-06-21 12:43:18 -07001666 fa_to_delete = fa;
1667 break;
1668 }
1669 }
1670
Olof Johansson91b9a272005-08-09 20:24:39 -07001671 if (!fa_to_delete)
1672 return -ESRCH;
Robert Olsson19baf832005-06-21 12:43:18 -07001673
Olof Johansson91b9a272005-08-09 20:24:39 -07001674 fa = fa_to_delete;
Thomas Graf4e902c52006-08-17 18:14:52 -07001675 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
Milan Kocianb8f55832007-05-23 14:55:06 -07001676 &cfg->fc_nlinfo, 0);
Robert Olsson19baf832005-06-21 12:43:18 -07001677
Olof Johansson91b9a272005-08-09 20:24:39 -07001678 l = fib_find_node(t, key);
Robert Olsson772cb712005-09-19 15:31:18 -07001679 li = find_leaf_info(l, plen);
Robert Olsson19baf832005-06-21 12:43:18 -07001680
Robert Olsson2373ce12005-08-25 13:01:29 -07001681 list_del_rcu(&fa->fa_list);
Robert Olsson19baf832005-06-21 12:43:18 -07001682
Olof Johansson91b9a272005-08-09 20:24:39 -07001683 if (list_empty(fa_head)) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001684 hlist_del_rcu(&li->hlist);
Olof Johansson91b9a272005-08-09 20:24:39 -07001685 free_leaf_info(li);
Robert Olsson2373ce12005-08-25 13:01:29 -07001686 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001687
1688 if (hlist_empty(&l->list))
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001689 trie_leaf_remove(t, l);
Olof Johansson91b9a272005-08-09 20:24:39 -07001690
1691 if (fa->fa_state & FA_S_ACCESSED)
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -07001692 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
Olof Johansson91b9a272005-08-09 20:24:39 -07001693
Robert Olsson2373ce12005-08-25 13:01:29 -07001694 fib_release_info(fa->fa_info);
1695 alias_free_mem_rcu(fa);
Olof Johansson91b9a272005-08-09 20:24:39 -07001696 return 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001697}
1698
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001699static int trie_flush_list(struct list_head *head)
Robert Olsson19baf832005-06-21 12:43:18 -07001700{
1701 struct fib_alias *fa, *fa_node;
1702 int found = 0;
1703
1704 list_for_each_entry_safe(fa, fa_node, head, fa_list) {
1705 struct fib_info *fi = fa->fa_info;
Robert Olsson19baf832005-06-21 12:43:18 -07001706
Robert Olsson2373ce12005-08-25 13:01:29 -07001707 if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
1708 list_del_rcu(&fa->fa_list);
1709 fib_release_info(fa->fa_info);
1710 alias_free_mem_rcu(fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001711 found++;
1712 }
1713 }
1714 return found;
1715}
1716
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001717static int trie_flush_leaf(struct leaf *l)
Robert Olsson19baf832005-06-21 12:43:18 -07001718{
1719 int found = 0;
1720 struct hlist_head *lih = &l->list;
1721 struct hlist_node *node, *tmp;
1722 struct leaf_info *li = NULL;
1723
1724 hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001725 found += trie_flush_list(&li->falh);
Robert Olsson19baf832005-06-21 12:43:18 -07001726
1727 if (list_empty(&li->falh)) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001728 hlist_del_rcu(&li->hlist);
Robert Olsson19baf832005-06-21 12:43:18 -07001729 free_leaf_info(li);
1730 }
1731 }
1732 return found;
1733}
1734
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001735/*
1736 * Scan for the next right leaf starting at node p->child[idx]
1737 * Since we have back pointer, no recursion necessary.
1738 */
1739static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c)
Robert Olsson19baf832005-06-21 12:43:18 -07001740{
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001741 do {
1742 t_key idx;
Robert Olsson19baf832005-06-21 12:43:18 -07001743
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001744 if (c)
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001745 idx = tkey_extract_bits(c->key, p->pos, p->bits) + 1;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001746 else
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001747 idx = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001748
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001749 while (idx < 1u << p->bits) {
1750 c = tnode_get_child_rcu(p, idx++);
Robert Olsson2373ce12005-08-25 13:01:29 -07001751 if (!c)
Olof Johansson91b9a272005-08-09 20:24:39 -07001752 continue;
Robert Olsson19baf832005-06-21 12:43:18 -07001753
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001754 if (IS_LEAF(c)) {
1755 prefetch(p->child[idx]);
1756 return (struct leaf *) c;
Robert Olsson19baf832005-06-21 12:43:18 -07001757 }
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001758
1759 /* Rescan start scanning in new node */
1760 p = (struct tnode *) c;
1761 idx = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001762 }
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001763
1764 /* Node empty, walk back up to parent */
Olof Johansson91b9a272005-08-09 20:24:39 -07001765 c = (struct node *) p;
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001766 } while ( (p = node_parent_rcu(c)) != NULL);
1767
1768 return NULL; /* Root of trie */
1769}
1770
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001771static struct leaf *trie_firstleaf(struct trie *t)
1772{
1773 struct tnode *n = (struct tnode *) rcu_dereference(t->trie);
1774
1775 if (!n)
1776 return NULL;
1777
1778 if (IS_LEAF(n)) /* trie is just a leaf */
1779 return (struct leaf *) n;
1780
1781 return leaf_walk_rcu(n, NULL);
1782}
1783
1784static struct leaf *trie_nextleaf(struct leaf *l)
1785{
1786 struct node *c = (struct node *) l;
1787 struct tnode *p = node_parent(c);
1788
1789 if (!p)
1790 return NULL; /* trie with just one leaf */
1791
1792 return leaf_walk_rcu(p, c);
Robert Olsson19baf832005-06-21 12:43:18 -07001793}
1794
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001795static struct leaf *trie_leafindex(struct trie *t, int index)
1796{
1797 struct leaf *l = trie_firstleaf(t);
1798
Stephen Hemmingerec28cf72008-02-11 21:12:49 -08001799 while (l && index-- > 0)
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001800 l = trie_nextleaf(l);
Stephen Hemmingerec28cf72008-02-11 21:12:49 -08001801
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001802 return l;
1803}
1804
1805
Robert Olssond562f1f2007-03-26 14:22:22 -07001806/*
1807 * Caller must hold RTNL.
1808 */
Robert Olsson19baf832005-06-21 12:43:18 -07001809static int fn_trie_flush(struct fib_table *tb)
1810{
1811 struct trie *t = (struct trie *) tb->tb_data;
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001812 struct leaf *l, *ll = NULL;
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001813 int found = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001814
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001815 for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) {
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001816 found += trie_flush_leaf(l);
Robert Olsson19baf832005-06-21 12:43:18 -07001817
1818 if (ll && hlist_empty(&ll->list))
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001819 trie_leaf_remove(t, ll);
Robert Olsson19baf832005-06-21 12:43:18 -07001820 ll = l;
1821 }
1822
1823 if (ll && hlist_empty(&ll->list))
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001824 trie_leaf_remove(t, ll);
Robert Olsson19baf832005-06-21 12:43:18 -07001825
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001826 pr_debug("trie_flush found=%d\n", found);
Robert Olsson19baf832005-06-21 12:43:18 -07001827 return found;
1828}
1829
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001830static void fn_trie_select_default(struct fib_table *tb,
1831 const struct flowi *flp,
1832 struct fib_result *res)
Robert Olsson19baf832005-06-21 12:43:18 -07001833{
1834 struct trie *t = (struct trie *) tb->tb_data;
1835 int order, last_idx;
1836 struct fib_info *fi = NULL;
1837 struct fib_info *last_resort;
1838 struct fib_alias *fa = NULL;
1839 struct list_head *fa_head;
1840 struct leaf *l;
1841
1842 last_idx = -1;
1843 last_resort = NULL;
1844 order = -1;
1845
Robert Olsson2373ce12005-08-25 13:01:29 -07001846 rcu_read_lock();
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001847
Robert Olsson19baf832005-06-21 12:43:18 -07001848 l = fib_find_node(t, 0);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001849 if (!l)
Robert Olsson19baf832005-06-21 12:43:18 -07001850 goto out;
1851
1852 fa_head = get_fa_head(l, 0);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001853 if (!fa_head)
Robert Olsson19baf832005-06-21 12:43:18 -07001854 goto out;
1855
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001856 if (list_empty(fa_head))
Robert Olsson19baf832005-06-21 12:43:18 -07001857 goto out;
1858
Robert Olsson2373ce12005-08-25 13:01:29 -07001859 list_for_each_entry_rcu(fa, fa_head, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001860 struct fib_info *next_fi = fa->fa_info;
Olof Johansson91b9a272005-08-09 20:24:39 -07001861
Robert Olsson19baf832005-06-21 12:43:18 -07001862 if (fa->fa_scope != res->scope ||
1863 fa->fa_type != RTN_UNICAST)
1864 continue;
Olof Johansson91b9a272005-08-09 20:24:39 -07001865
Robert Olsson19baf832005-06-21 12:43:18 -07001866 if (next_fi->fib_priority > res->fi->fib_priority)
1867 break;
1868 if (!next_fi->fib_nh[0].nh_gw ||
1869 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1870 continue;
1871 fa->fa_state |= FA_S_ACCESSED;
Olof Johansson91b9a272005-08-09 20:24:39 -07001872
Robert Olsson19baf832005-06-21 12:43:18 -07001873 if (fi == NULL) {
1874 if (next_fi != res->fi)
1875 break;
1876 } else if (!fib_detect_death(fi, order, &last_resort,
Denis V. Lunev971b8932007-12-08 00:32:23 -08001877 &last_idx, tb->tb_default)) {
Denis V. Luneva2bbe682007-12-08 00:31:44 -08001878 fib_result_assign(res, fi);
Denis V. Lunev971b8932007-12-08 00:32:23 -08001879 tb->tb_default = order;
Robert Olsson19baf832005-06-21 12:43:18 -07001880 goto out;
1881 }
1882 fi = next_fi;
1883 order++;
1884 }
1885 if (order <= 0 || fi == NULL) {
Denis V. Lunev971b8932007-12-08 00:32:23 -08001886 tb->tb_default = -1;
Robert Olsson19baf832005-06-21 12:43:18 -07001887 goto out;
1888 }
1889
Denis V. Lunev971b8932007-12-08 00:32:23 -08001890 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
1891 tb->tb_default)) {
Denis V. Luneva2bbe682007-12-08 00:31:44 -08001892 fib_result_assign(res, fi);
Denis V. Lunev971b8932007-12-08 00:32:23 -08001893 tb->tb_default = order;
Robert Olsson19baf832005-06-21 12:43:18 -07001894 goto out;
1895 }
Denis V. Luneva2bbe682007-12-08 00:31:44 -08001896 if (last_idx >= 0)
1897 fib_result_assign(res, last_resort);
Denis V. Lunev971b8932007-12-08 00:32:23 -08001898 tb->tb_default = last_idx;
1899out:
Robert Olsson2373ce12005-08-25 13:01:29 -07001900 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07001901}
1902
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001903static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
1904 struct fib_table *tb,
Robert Olsson19baf832005-06-21 12:43:18 -07001905 struct sk_buff *skb, struct netlink_callback *cb)
1906{
1907 int i, s_i;
1908 struct fib_alias *fa;
Al Viro32ab5f82006-09-26 22:21:45 -07001909 __be32 xkey = htonl(key);
Robert Olsson19baf832005-06-21 12:43:18 -07001910
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001911 s_i = cb->args[5];
Robert Olsson19baf832005-06-21 12:43:18 -07001912 i = 0;
1913
Robert Olsson2373ce12005-08-25 13:01:29 -07001914 /* rcu_read_lock is hold by caller */
1915
1916 list_for_each_entry_rcu(fa, fah, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001917 if (i < s_i) {
1918 i++;
1919 continue;
1920 }
Robert Olsson19baf832005-06-21 12:43:18 -07001921
1922 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
1923 cb->nlh->nlmsg_seq,
1924 RTM_NEWROUTE,
1925 tb->tb_id,
1926 fa->fa_type,
1927 fa->fa_scope,
Thomas Grafbe403ea2006-08-17 18:15:17 -07001928 xkey,
Robert Olsson19baf832005-06-21 12:43:18 -07001929 plen,
1930 fa->fa_tos,
Stephen Hemminger64347f72008-01-22 21:55:01 -08001931 fa->fa_info, NLM_F_MULTI) < 0) {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001932 cb->args[5] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001933 return -1;
Olof Johansson91b9a272005-08-09 20:24:39 -07001934 }
Robert Olsson19baf832005-06-21 12:43:18 -07001935 i++;
1936 }
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001937 cb->args[5] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001938 return skb->len;
1939}
1940
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001941static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb,
1942 struct sk_buff *skb, struct netlink_callback *cb)
Robert Olsson19baf832005-06-21 12:43:18 -07001943{
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001944 struct leaf_info *li;
1945 struct hlist_node *node;
1946 int i, s_i;
Robert Olsson19baf832005-06-21 12:43:18 -07001947
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001948 s_i = cb->args[4];
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001949 i = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001950
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001951 /* rcu_read_lock is hold by caller */
1952 hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
1953 if (i < s_i) {
1954 i++;
Robert Olsson19baf832005-06-21 12:43:18 -07001955 continue;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001956 }
Robert Olsson19baf832005-06-21 12:43:18 -07001957
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001958 if (i > s_i)
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001959 cb->args[5] = 0;
Olof Johansson91b9a272005-08-09 20:24:39 -07001960
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001961 if (list_empty(&li->falh))
Robert Olsson19baf832005-06-21 12:43:18 -07001962 continue;
1963
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001964 if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001965 cb->args[4] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001966 return -1;
1967 }
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001968 i++;
Robert Olsson19baf832005-06-21 12:43:18 -07001969 }
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001970
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001971 cb->args[4] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001972 return skb->len;
1973}
1974
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001975static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb,
1976 struct netlink_callback *cb)
Robert Olsson19baf832005-06-21 12:43:18 -07001977{
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001978 struct leaf *l;
Robert Olsson19baf832005-06-21 12:43:18 -07001979 struct trie *t = (struct trie *) tb->tb_data;
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001980 t_key key = cb->args[2];
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001981 int count = cb->args[3];
Robert Olsson19baf832005-06-21 12:43:18 -07001982
Robert Olsson2373ce12005-08-25 13:01:29 -07001983 rcu_read_lock();
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001984 /* Dump starting at last key.
1985 * Note: 0.0.0.0/0 (ie default) is first key.
1986 */
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001987 if (count == 0)
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001988 l = trie_firstleaf(t);
1989 else {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001990 /* Normally, continue from last key, but if that is missing
1991 * fallback to using slow rescan
1992 */
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001993 l = fib_find_node(t, key);
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001994 if (!l)
1995 l = trie_leafindex(t, count);
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001996 }
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001997
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001998 while (l) {
1999 cb->args[2] = l->key;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08002000 if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08002001 cb->args[3] = count;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08002002 rcu_read_unlock();
Stephen Hemmingera88ee222008-01-22 21:56:11 -08002003 return -1;
Robert Olsson19baf832005-06-21 12:43:18 -07002004 }
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08002005
Stephen Hemminger71d67e62008-01-31 16:45:47 -08002006 ++count;
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08002007 l = trie_nextleaf(l);
Stephen Hemminger71d67e62008-01-31 16:45:47 -08002008 memset(&cb->args[4], 0,
2009 sizeof(cb->args) - 4*sizeof(cb->args[0]));
Robert Olsson19baf832005-06-21 12:43:18 -07002010 }
Stephen Hemminger71d67e62008-01-31 16:45:47 -08002011 cb->args[3] = count;
Robert Olsson2373ce12005-08-25 13:01:29 -07002012 rcu_read_unlock();
Stephen Hemmingera88ee222008-01-22 21:56:11 -08002013
Robert Olsson19baf832005-06-21 12:43:18 -07002014 return skb->len;
Robert Olsson19baf832005-06-21 12:43:18 -07002015}
2016
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08002017void __init fib_hash_init(void)
2018{
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002019 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
2020 sizeof(struct fib_alias),
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -08002021 0, SLAB_PANIC, NULL);
2022
2023 trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
2024 max(sizeof(struct leaf),
2025 sizeof(struct leaf_info)),
2026 0, SLAB_PANIC, NULL);
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08002027}
Robert Olsson19baf832005-06-21 12:43:18 -07002028
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08002029
2030/* Fix more generic FIB names for init later */
2031struct fib_table *fib_hash_table(u32 id)
Robert Olsson19baf832005-06-21 12:43:18 -07002032{
2033 struct fib_table *tb;
2034 struct trie *t;
2035
Robert Olsson19baf832005-06-21 12:43:18 -07002036 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
2037 GFP_KERNEL);
2038 if (tb == NULL)
2039 return NULL;
2040
2041 tb->tb_id = id;
Denis V. Lunev971b8932007-12-08 00:32:23 -08002042 tb->tb_default = -1;
Robert Olsson19baf832005-06-21 12:43:18 -07002043 tb->tb_lookup = fn_trie_lookup;
2044 tb->tb_insert = fn_trie_insert;
2045 tb->tb_delete = fn_trie_delete;
2046 tb->tb_flush = fn_trie_flush;
2047 tb->tb_select_default = fn_trie_select_default;
2048 tb->tb_dump = fn_trie_dump;
Robert Olsson19baf832005-06-21 12:43:18 -07002049
2050 t = (struct trie *) tb->tb_data;
Stephen Hemmingerc28a1cf2008-01-12 20:49:13 -08002051 memset(t, 0, sizeof(*t));
Robert Olsson19baf832005-06-21 12:43:18 -07002052
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002053 if (id == RT_TABLE_LOCAL)
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002054 pr_info("IPv4 FIB: Using LC-trie version %s\n", VERSION);
Robert Olsson19baf832005-06-21 12:43:18 -07002055
2056 return tb;
2057}
2058
Robert Olsson19baf832005-06-21 12:43:18 -07002059#ifdef CONFIG_PROC_FS
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002060/* Depth first Trie walk iterator */
2061struct fib_trie_iter {
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002062 struct seq_net_private p;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002063 struct fib_table *tb;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002064 struct tnode *tnode;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002065 unsigned index;
2066 unsigned depth;
2067};
Robert Olsson19baf832005-06-21 12:43:18 -07002068
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002069static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
Robert Olsson19baf832005-06-21 12:43:18 -07002070{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002071 struct tnode *tn = iter->tnode;
2072 unsigned cindex = iter->index;
2073 struct tnode *p;
2074
Eric W. Biederman6640e692007-01-24 14:42:04 -08002075 /* A single entry routing table */
2076 if (!tn)
2077 return NULL;
2078
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002079 pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
2080 iter->tnode, iter->index, iter->depth);
2081rescan:
2082 while (cindex < (1<<tn->bits)) {
Eric Dumazetb59cfbf2008-01-18 03:31:36 -08002083 struct node *n = tnode_get_child_rcu(tn, cindex);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002084
2085 if (n) {
2086 if (IS_LEAF(n)) {
2087 iter->tnode = tn;
2088 iter->index = cindex + 1;
2089 } else {
2090 /* push down one level */
2091 iter->tnode = (struct tnode *) n;
2092 iter->index = 0;
2093 ++iter->depth;
2094 }
2095 return n;
2096 }
2097
2098 ++cindex;
2099 }
2100
2101 /* Current node exhausted, pop back up */
Eric Dumazetb59cfbf2008-01-18 03:31:36 -08002102 p = node_parent_rcu((struct node *)tn);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002103 if (p) {
2104 cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
2105 tn = p;
2106 --iter->depth;
2107 goto rescan;
2108 }
2109
2110 /* got root? */
Robert Olsson19baf832005-06-21 12:43:18 -07002111 return NULL;
2112}
2113
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002114static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
2115 struct trie *t)
Robert Olsson19baf832005-06-21 12:43:18 -07002116{
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002117 struct node *n;
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08002118
Stephen Hemminger132adf52007-03-08 20:44:43 -08002119 if (!t)
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08002120 return NULL;
2121
2122 n = rcu_dereference(t->trie);
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002123 if (!n)
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08002124 return NULL;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002125
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002126 if (IS_TNODE(n)) {
2127 iter->tnode = (struct tnode *) n;
2128 iter->index = 0;
2129 iter->depth = 1;
2130 } else {
2131 iter->tnode = NULL;
2132 iter->index = 0;
2133 iter->depth = 0;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002134 }
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002135
2136 return n;
Robert Olsson19baf832005-06-21 12:43:18 -07002137}
2138
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002139static void trie_collect_stats(struct trie *t, struct trie_stat *s)
Robert Olsson19baf832005-06-21 12:43:18 -07002140{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002141 struct node *n;
2142 struct fib_trie_iter iter;
Robert Olsson19baf832005-06-21 12:43:18 -07002143
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002144 memset(s, 0, sizeof(*s));
Robert Olsson19baf832005-06-21 12:43:18 -07002145
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002146 rcu_read_lock();
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002147 for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002148 if (IS_LEAF(n)) {
Stephen Hemminger93672292008-01-22 21:54:05 -08002149 struct leaf *l = (struct leaf *)n;
2150 struct leaf_info *li;
2151 struct hlist_node *tmp;
2152
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002153 s->leaves++;
2154 s->totdepth += iter.depth;
2155 if (iter.depth > s->maxdepth)
2156 s->maxdepth = iter.depth;
Stephen Hemminger93672292008-01-22 21:54:05 -08002157
2158 hlist_for_each_entry_rcu(li, tmp, &l->list, hlist)
2159 ++s->prefixes;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002160 } else {
2161 const struct tnode *tn = (const struct tnode *) n;
2162 int i;
Robert Olsson19baf832005-06-21 12:43:18 -07002163
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002164 s->tnodes++;
Stephen Hemminger132adf52007-03-08 20:44:43 -08002165 if (tn->bits < MAX_STAT_DEPTH)
Robert Olsson06ef9212006-03-20 21:35:01 -08002166 s->nodesizes[tn->bits]++;
2167
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002168 for (i = 0; i < (1<<tn->bits); i++)
2169 if (!tn->child[i])
2170 s->nullpointers++;
2171 }
2172 }
2173 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07002174}
2175
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002176/*
Robert Olsson19baf832005-06-21 12:43:18 -07002177 * This outputs /proc/net/fib_triestats
Robert Olsson19baf832005-06-21 12:43:18 -07002178 */
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002179static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
Robert Olsson19baf832005-06-21 12:43:18 -07002180{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002181 unsigned i, max, pointers, bytes, avdepth;
Robert Olsson19baf832005-06-21 12:43:18 -07002182
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002183 if (stat->leaves)
2184 avdepth = stat->totdepth*100 / stat->leaves;
2185 else
2186 avdepth = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07002187
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002188 seq_printf(seq, "\tAver depth: %u.%02d\n",
2189 avdepth / 100, avdepth % 100);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002190 seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth);
Robert Olsson19baf832005-06-21 12:43:18 -07002191
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002192 seq_printf(seq, "\tLeaves: %u\n", stat->leaves);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002193 bytes = sizeof(struct leaf) * stat->leaves;
Stephen Hemminger93672292008-01-22 21:54:05 -08002194
2195 seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes);
2196 bytes += sizeof(struct leaf_info) * stat->prefixes;
2197
Stephen Hemminger187b5182008-01-12 20:55:55 -08002198 seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002199 bytes += sizeof(struct tnode) * stat->tnodes;
Robert Olsson19baf832005-06-21 12:43:18 -07002200
Robert Olsson06ef9212006-03-20 21:35:01 -08002201 max = MAX_STAT_DEPTH;
2202 while (max > 0 && stat->nodesizes[max-1] == 0)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002203 max--;
Robert Olsson19baf832005-06-21 12:43:18 -07002204
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002205 pointers = 0;
2206 for (i = 1; i <= max; i++)
2207 if (stat->nodesizes[i] != 0) {
Stephen Hemminger187b5182008-01-12 20:55:55 -08002208 seq_printf(seq, " %u: %u", i, stat->nodesizes[i]);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002209 pointers += (1<<i) * stat->nodesizes[i];
2210 }
2211 seq_putc(seq, '\n');
Stephen Hemminger187b5182008-01-12 20:55:55 -08002212 seq_printf(seq, "\tPointers: %u\n", pointers);
Robert Olsson19baf832005-06-21 12:43:18 -07002213
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002214 bytes += sizeof(struct node *) * pointers;
Stephen Hemminger187b5182008-01-12 20:55:55 -08002215 seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
2216 seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024);
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002217}
Robert Olsson19baf832005-06-21 12:43:18 -07002218
2219#ifdef CONFIG_IP_FIB_TRIE_STATS
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002220static void trie_show_usage(struct seq_file *seq,
2221 const struct trie_use_stats *stats)
2222{
2223 seq_printf(seq, "\nCounters:\n---------\n");
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002224 seq_printf(seq, "gets = %u\n", stats->gets);
2225 seq_printf(seq, "backtracks = %u\n", stats->backtrack);
2226 seq_printf(seq, "semantic match passed = %u\n",
2227 stats->semantic_match_passed);
2228 seq_printf(seq, "semantic match miss = %u\n",
2229 stats->semantic_match_miss);
2230 seq_printf(seq, "null node hit= %u\n", stats->null_node_hit);
2231 seq_printf(seq, "skipped node resize = %u\n\n",
2232 stats->resize_node_skipped);
Robert Olsson19baf832005-06-21 12:43:18 -07002233}
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002234#endif /* CONFIG_IP_FIB_TRIE_STATS */
2235
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002236static void fib_table_print(struct seq_file *seq, struct fib_table *tb)
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002237{
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002238 if (tb->tb_id == RT_TABLE_LOCAL)
2239 seq_puts(seq, "Local:\n");
2240 else if (tb->tb_id == RT_TABLE_MAIN)
2241 seq_puts(seq, "Main:\n");
2242 else
2243 seq_printf(seq, "Id %d:\n", tb->tb_id);
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002244}
Robert Olsson19baf832005-06-21 12:43:18 -07002245
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002246
Robert Olsson19baf832005-06-21 12:43:18 -07002247static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2248{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002249 struct net *net = (struct net *)seq->private;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002250 unsigned int h;
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08002251
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002252 seq_printf(seq,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002253 "Basic info: size of leaf:"
2254 " %Zd bytes, size of tnode: %Zd bytes.\n",
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002255 sizeof(struct leaf), sizeof(struct tnode));
Olof Johansson91b9a272005-08-09 20:24:39 -07002256
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002257 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2258 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2259 struct hlist_node *node;
2260 struct fib_table *tb;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002261
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002262 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
2263 struct trie *t = (struct trie *) tb->tb_data;
2264 struct trie_stat stat;
2265
2266 if (!t)
2267 continue;
2268
2269 fib_table_print(seq, tb);
2270
2271 trie_collect_stats(t, &stat);
2272 trie_show_stats(seq, &stat);
2273#ifdef CONFIG_IP_FIB_TRIE_STATS
2274 trie_show_usage(seq, &t->stats);
2275#endif
2276 }
2277 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002278
Robert Olsson19baf832005-06-21 12:43:18 -07002279 return 0;
2280}
2281
Robert Olsson19baf832005-06-21 12:43:18 -07002282static int fib_triestat_seq_open(struct inode *inode, struct file *file)
2283{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002284 return single_open_net(inode, file, fib_triestat_seq_show);
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002285}
2286
Arjan van de Ven9a321442007-02-12 00:55:35 -08002287static const struct file_operations fib_triestat_fops = {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002288 .owner = THIS_MODULE,
2289 .open = fib_triestat_seq_open,
2290 .read = seq_read,
2291 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002292 .release = single_release_net,
Robert Olsson19baf832005-06-21 12:43:18 -07002293};
2294
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002295static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
Robert Olsson19baf832005-06-21 12:43:18 -07002296{
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002297 struct fib_trie_iter *iter = seq->private;
2298 struct net *net = seq_file_net(seq);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002299 loff_t idx = 0;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002300 unsigned int h;
Robert Olsson19baf832005-06-21 12:43:18 -07002301
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002302 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2303 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2304 struct hlist_node *node;
2305 struct fib_table *tb;
2306
2307 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
2308 struct node *n;
2309
2310 for (n = fib_trie_get_first(iter,
2311 (struct trie *) tb->tb_data);
2312 n; n = fib_trie_get_next(iter))
2313 if (pos == idx++) {
2314 iter->tb = tb;
2315 return n;
2316 }
2317 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002318 }
Robert Olsson19baf832005-06-21 12:43:18 -07002319
Robert Olsson19baf832005-06-21 12:43:18 -07002320 return NULL;
2321}
2322
2323static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -08002324 __acquires(RCU)
Robert Olsson19baf832005-06-21 12:43:18 -07002325{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002326 rcu_read_lock();
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002327 return fib_trie_get_idx(seq, *pos);
Robert Olsson19baf832005-06-21 12:43:18 -07002328}
2329
2330static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2331{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002332 struct fib_trie_iter *iter = seq->private;
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002333 struct net *net = seq_file_net(seq);
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002334 struct fib_table *tb = iter->tb;
2335 struct hlist_node *tb_node;
2336 unsigned int h;
2337 struct node *n;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002338
Robert Olsson19baf832005-06-21 12:43:18 -07002339 ++*pos;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002340 /* next node in same table */
2341 n = fib_trie_get_next(iter);
2342 if (n)
2343 return n;
Olof Johansson91b9a272005-08-09 20:24:39 -07002344
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002345 /* walk rest of this hash chain */
2346 h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
2347 while ( (tb_node = rcu_dereference(tb->tb_hlist.next)) ) {
2348 tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
2349 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2350 if (n)
2351 goto found;
2352 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002353
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002354 /* new hash chain */
2355 while (++h < FIB_TABLE_HASHSZ) {
2356 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2357 hlist_for_each_entry_rcu(tb, tb_node, head, tb_hlist) {
2358 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2359 if (n)
2360 goto found;
2361 }
2362 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002363 return NULL;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002364
2365found:
2366 iter->tb = tb;
2367 return n;
Robert Olsson19baf832005-06-21 12:43:18 -07002368}
2369
2370static void fib_trie_seq_stop(struct seq_file *seq, void *v)
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -08002371 __releases(RCU)
Robert Olsson19baf832005-06-21 12:43:18 -07002372{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002373 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07002374}
2375
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002376static void seq_indent(struct seq_file *seq, int n)
2377{
2378 while (n-- > 0) seq_puts(seq, " ");
2379}
Robert Olsson19baf832005-06-21 12:43:18 -07002380
Eric Dumazet28d36e32008-01-14 23:09:56 -08002381static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002382{
Stephen Hemminger132adf52007-03-08 20:44:43 -08002383 switch (s) {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002384 case RT_SCOPE_UNIVERSE: return "universe";
2385 case RT_SCOPE_SITE: return "site";
2386 case RT_SCOPE_LINK: return "link";
2387 case RT_SCOPE_HOST: return "host";
2388 case RT_SCOPE_NOWHERE: return "nowhere";
2389 default:
Eric Dumazet28d36e32008-01-14 23:09:56 -08002390 snprintf(buf, len, "scope=%d", s);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002391 return buf;
2392 }
2393}
2394
2395static const char *rtn_type_names[__RTN_MAX] = {
2396 [RTN_UNSPEC] = "UNSPEC",
2397 [RTN_UNICAST] = "UNICAST",
2398 [RTN_LOCAL] = "LOCAL",
2399 [RTN_BROADCAST] = "BROADCAST",
2400 [RTN_ANYCAST] = "ANYCAST",
2401 [RTN_MULTICAST] = "MULTICAST",
2402 [RTN_BLACKHOLE] = "BLACKHOLE",
2403 [RTN_UNREACHABLE] = "UNREACHABLE",
2404 [RTN_PROHIBIT] = "PROHIBIT",
2405 [RTN_THROW] = "THROW",
2406 [RTN_NAT] = "NAT",
2407 [RTN_XRESOLVE] = "XRESOLVE",
2408};
2409
Eric Dumazet28d36e32008-01-14 23:09:56 -08002410static inline const char *rtn_type(char *buf, size_t len, unsigned t)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002411{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002412 if (t < __RTN_MAX && rtn_type_names[t])
2413 return rtn_type_names[t];
Eric Dumazet28d36e32008-01-14 23:09:56 -08002414 snprintf(buf, len, "type %u", t);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002415 return buf;
2416}
2417
2418/* Pretty print the trie */
Robert Olsson19baf832005-06-21 12:43:18 -07002419static int fib_trie_seq_show(struct seq_file *seq, void *v)
2420{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002421 const struct fib_trie_iter *iter = seq->private;
2422 struct node *n = v;
Robert Olsson19baf832005-06-21 12:43:18 -07002423
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002424 if (!node_parent_rcu(n))
2425 fib_table_print(seq, iter->tb);
Robert Olsson095b8502007-01-26 19:06:01 -08002426
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002427 if (IS_TNODE(n)) {
2428 struct tnode *tn = (struct tnode *) n;
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -07002429 __be32 prf = htonl(mask_pfx(tn->key, tn->pos));
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002430
Robert Olsson1d25cd62005-09-19 15:29:52 -07002431 seq_indent(seq, iter->depth-1);
Harvey Harrison673d57e2008-10-31 00:53:57 -07002432 seq_printf(seq, " +-- %pI4/%d %d %d %d\n",
2433 &prf, tn->pos, tn->bits, tn->full_children,
Robert Olsson1d25cd62005-09-19 15:29:52 -07002434 tn->empty_children);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002435
Olof Johansson91b9a272005-08-09 20:24:39 -07002436 } else {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002437 struct leaf *l = (struct leaf *) n;
Stephen Hemminger13280422008-01-22 21:54:37 -08002438 struct leaf_info *li;
2439 struct hlist_node *node;
Al Viro32ab5f82006-09-26 22:21:45 -07002440 __be32 val = htonl(l->key);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002441
2442 seq_indent(seq, iter->depth);
Harvey Harrison673d57e2008-10-31 00:53:57 -07002443 seq_printf(seq, " |-- %pI4\n", &val);
Eric Dumazet28d36e32008-01-14 23:09:56 -08002444
Stephen Hemminger13280422008-01-22 21:54:37 -08002445 hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
2446 struct fib_alias *fa;
Eric Dumazet28d36e32008-01-14 23:09:56 -08002447
Stephen Hemminger13280422008-01-22 21:54:37 -08002448 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
2449 char buf1[32], buf2[32];
Eric Dumazet28d36e32008-01-14 23:09:56 -08002450
Stephen Hemminger13280422008-01-22 21:54:37 -08002451 seq_indent(seq, iter->depth+1);
2452 seq_printf(seq, " /%d %s %s", li->plen,
2453 rtn_scope(buf1, sizeof(buf1),
2454 fa->fa_scope),
2455 rtn_type(buf2, sizeof(buf2),
2456 fa->fa_type));
2457 if (fa->fa_tos)
Denis V. Lunevb9c4d822008-02-05 02:58:45 -08002458 seq_printf(seq, " tos=%d", fa->fa_tos);
Stephen Hemminger13280422008-01-22 21:54:37 -08002459 seq_putc(seq, '\n');
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002460 }
2461 }
Robert Olsson19baf832005-06-21 12:43:18 -07002462 }
2463
2464 return 0;
2465}
2466
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002467static const struct seq_operations fib_trie_seq_ops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002468 .start = fib_trie_seq_start,
2469 .next = fib_trie_seq_next,
2470 .stop = fib_trie_seq_stop,
2471 .show = fib_trie_seq_show,
Robert Olsson19baf832005-06-21 12:43:18 -07002472};
2473
2474static int fib_trie_seq_open(struct inode *inode, struct file *file)
2475{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002476 return seq_open_net(inode, file, &fib_trie_seq_ops,
2477 sizeof(struct fib_trie_iter));
Robert Olsson19baf832005-06-21 12:43:18 -07002478}
2479
Arjan van de Ven9a321442007-02-12 00:55:35 -08002480static const struct file_operations fib_trie_fops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002481 .owner = THIS_MODULE,
2482 .open = fib_trie_seq_open,
2483 .read = seq_read,
2484 .llseek = seq_lseek,
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002485 .release = seq_release_net,
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002486};
2487
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002488struct fib_route_iter {
2489 struct seq_net_private p;
2490 struct trie *main_trie;
2491 loff_t pos;
2492 t_key key;
2493};
2494
2495static struct leaf *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos)
2496{
2497 struct leaf *l = NULL;
2498 struct trie *t = iter->main_trie;
2499
2500 /* use cache location of last found key */
2501 if (iter->pos > 0 && pos >= iter->pos && (l = fib_find_node(t, iter->key)))
2502 pos -= iter->pos;
2503 else {
2504 iter->pos = 0;
2505 l = trie_firstleaf(t);
2506 }
2507
2508 while (l && pos-- > 0) {
2509 iter->pos++;
2510 l = trie_nextleaf(l);
2511 }
2512
2513 if (l)
2514 iter->key = pos; /* remember it */
2515 else
2516 iter->pos = 0; /* forget it */
2517
2518 return l;
2519}
2520
2521static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
2522 __acquires(RCU)
2523{
2524 struct fib_route_iter *iter = seq->private;
2525 struct fib_table *tb;
2526
2527 rcu_read_lock();
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002528 tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002529 if (!tb)
2530 return NULL;
2531
2532 iter->main_trie = (struct trie *) tb->tb_data;
2533 if (*pos == 0)
2534 return SEQ_START_TOKEN;
2535 else
2536 return fib_route_get_idx(iter, *pos - 1);
2537}
2538
2539static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2540{
2541 struct fib_route_iter *iter = seq->private;
2542 struct leaf *l = v;
2543
2544 ++*pos;
2545 if (v == SEQ_START_TOKEN) {
2546 iter->pos = 0;
2547 l = trie_firstleaf(iter->main_trie);
2548 } else {
2549 iter->pos++;
2550 l = trie_nextleaf(l);
2551 }
2552
2553 if (l)
2554 iter->key = l->key;
2555 else
2556 iter->pos = 0;
2557 return l;
2558}
2559
2560static void fib_route_seq_stop(struct seq_file *seq, void *v)
2561 __releases(RCU)
2562{
2563 rcu_read_unlock();
2564}
2565
Al Viro32ab5f82006-09-26 22:21:45 -07002566static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002567{
2568 static unsigned type2flags[RTN_MAX + 1] = {
2569 [7] = RTF_REJECT, [8] = RTF_REJECT,
2570 };
2571 unsigned flags = type2flags[type];
2572
2573 if (fi && fi->fib_nh->nh_gw)
2574 flags |= RTF_GATEWAY;
Al Viro32ab5f82006-09-26 22:21:45 -07002575 if (mask == htonl(0xFFFFFFFF))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002576 flags |= RTF_HOST;
2577 flags |= RTF_UP;
2578 return flags;
2579}
2580
2581/*
2582 * This outputs /proc/net/route.
2583 * The format of the file is not supposed to be changed
2584 * and needs to be same as fib_hash output to avoid breaking
2585 * legacy utilities
2586 */
2587static int fib_route_seq_show(struct seq_file *seq, void *v)
2588{
2589 struct leaf *l = v;
Stephen Hemminger13280422008-01-22 21:54:37 -08002590 struct leaf_info *li;
2591 struct hlist_node *node;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002592
2593 if (v == SEQ_START_TOKEN) {
2594 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
2595 "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
2596 "\tWindow\tIRTT");
2597 return 0;
2598 }
2599
Stephen Hemminger13280422008-01-22 21:54:37 -08002600 hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002601 struct fib_alias *fa;
Al Viro32ab5f82006-09-26 22:21:45 -07002602 __be32 mask, prefix;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002603
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002604 mask = inet_make_mask(li->plen);
2605 prefix = htonl(l->key);
2606
2607 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
Herbert Xu1371e372005-10-15 09:42:39 +10002608 const struct fib_info *fi = fa->fa_info;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002609 unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002610 int len;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002611
2612 if (fa->fa_type == RTN_BROADCAST
2613 || fa->fa_type == RTN_MULTICAST)
2614 continue;
2615
2616 if (fi)
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002617 seq_printf(seq,
2618 "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
2619 "%d\t%08X\t%d\t%u\t%u%n",
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002620 fi->fib_dev ? fi->fib_dev->name : "*",
2621 prefix,
2622 fi->fib_nh->nh_gw, flags, 0, 0,
2623 fi->fib_priority,
2624 mask,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002625 (fi->fib_advmss ?
2626 fi->fib_advmss + 40 : 0),
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002627 fi->fib_window,
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002628 fi->fib_rtt >> 3, &len);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002629 else
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002630 seq_printf(seq,
2631 "*\t%08X\t%08X\t%04X\t%d\t%u\t"
2632 "%d\t%08X\t%d\t%u\t%u%n",
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002633 prefix, 0, flags, 0, 0, 0,
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002634 mask, 0, 0, 0, &len);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002635
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002636 seq_printf(seq, "%*s\n", 127 - len, "");
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002637 }
2638 }
2639
2640 return 0;
2641}
2642
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002643static const struct seq_operations fib_route_seq_ops = {
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002644 .start = fib_route_seq_start,
2645 .next = fib_route_seq_next,
2646 .stop = fib_route_seq_stop,
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002647 .show = fib_route_seq_show,
2648};
2649
2650static int fib_route_seq_open(struct inode *inode, struct file *file)
2651{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002652 return seq_open_net(inode, file, &fib_route_seq_ops,
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002653 sizeof(struct fib_route_iter));
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002654}
2655
Arjan van de Ven9a321442007-02-12 00:55:35 -08002656static const struct file_operations fib_route_fops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002657 .owner = THIS_MODULE,
2658 .open = fib_route_seq_open,
2659 .read = seq_read,
2660 .llseek = seq_lseek,
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002661 .release = seq_release_net,
Robert Olsson19baf832005-06-21 12:43:18 -07002662};
2663
Denis V. Lunev61a02652008-01-10 03:21:09 -08002664int __net_init fib_proc_init(struct net *net)
Robert Olsson19baf832005-06-21 12:43:18 -07002665{
Denis V. Lunev61a02652008-01-10 03:21:09 -08002666 if (!proc_net_fops_create(net, "fib_trie", S_IRUGO, &fib_trie_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002667 goto out1;
2668
Denis V. Lunev61a02652008-01-10 03:21:09 -08002669 if (!proc_net_fops_create(net, "fib_triestat", S_IRUGO,
2670 &fib_triestat_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002671 goto out2;
2672
Denis V. Lunev61a02652008-01-10 03:21:09 -08002673 if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_route_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002674 goto out3;
2675
Robert Olsson19baf832005-06-21 12:43:18 -07002676 return 0;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002677
2678out3:
Denis V. Lunev61a02652008-01-10 03:21:09 -08002679 proc_net_remove(net, "fib_triestat");
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002680out2:
Denis V. Lunev61a02652008-01-10 03:21:09 -08002681 proc_net_remove(net, "fib_trie");
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002682out1:
2683 return -ENOMEM;
Robert Olsson19baf832005-06-21 12:43:18 -07002684}
2685
Denis V. Lunev61a02652008-01-10 03:21:09 -08002686void __net_exit fib_proc_exit(struct net *net)
Robert Olsson19baf832005-06-21 12:43:18 -07002687{
Denis V. Lunev61a02652008-01-10 03:21:09 -08002688 proc_net_remove(net, "fib_trie");
2689 proc_net_remove(net, "fib_triestat");
2690 proc_net_remove(net, "route");
Robert Olsson19baf832005-06-21 12:43:18 -07002691}
2692
2693#endif /* CONFIG_PROC_FS */