blob: d58b49115386f0ca0fd3ebddb2b0684728c117e6 [file] [log] [blame]
Robert Olsson19baf832005-06-21 12:43:18 -07001/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version
5 * 2 of the License, or (at your option) any later version.
6 *
7 * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
8 * & Swedish University of Agricultural Sciences.
9 *
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090010 * Jens Laas <jens.laas@data.slu.se> Swedish University of
Robert Olsson19baf832005-06-21 12:43:18 -070011 * Agricultural Sciences.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090012 *
Robert Olsson19baf832005-06-21 12:43:18 -070013 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
14 *
15 * This work is based on the LPC-trie which is originally descibed in:
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090016 *
Robert Olsson19baf832005-06-21 12:43:18 -070017 * An experimental study of compression methods for dynamic tries
18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
19 * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
20 *
21 *
22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
23 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
24 *
Robert Olsson19baf832005-06-21 12:43:18 -070025 *
26 * Code from fib_hash has been reused which includes the following header:
27 *
28 *
29 * INET An implementation of the TCP/IP protocol suite for the LINUX
30 * operating system. INET is implemented using the BSD Socket
31 * interface as the means of communication with the user level.
32 *
33 * IPv4 FIB: lookup engine and maintenance routines.
34 *
35 *
36 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
37 *
38 * This program is free software; you can redistribute it and/or
39 * modify it under the terms of the GNU General Public License
40 * as published by the Free Software Foundation; either version
41 * 2 of the License, or (at your option) any later version.
Robert Olssonfd966252005-12-22 11:25:10 -080042 *
43 * Substantial contributions to this work comes from:
44 *
45 * David S. Miller, <davem@davemloft.net>
46 * Stephen Hemminger <shemminger@osdl.org>
47 * Paul E. McKenney <paulmck@us.ibm.com>
48 * Patrick McHardy <kaber@trash.net>
Robert Olsson19baf832005-06-21 12:43:18 -070049 */
50
Robert Olsson05eee482007-03-19 16:27:37 -070051#define VERSION "0.408"
Robert Olsson19baf832005-06-21 12:43:18 -070052
Robert Olsson19baf832005-06-21 12:43:18 -070053#include <asm/uaccess.h>
54#include <asm/system.h>
Jiri Slaby1977f032007-10-18 23:40:25 -070055#include <linux/bitops.h>
Robert Olsson19baf832005-06-21 12:43:18 -070056#include <linux/types.h>
57#include <linux/kernel.h>
Robert Olsson19baf832005-06-21 12:43:18 -070058#include <linux/mm.h>
59#include <linux/string.h>
60#include <linux/socket.h>
61#include <linux/sockios.h>
62#include <linux/errno.h>
63#include <linux/in.h>
64#include <linux/inet.h>
Stephen Hemmingercd8787a2006-01-03 14:38:34 -080065#include <linux/inetdevice.h>
Robert Olsson19baf832005-06-21 12:43:18 -070066#include <linux/netdevice.h>
67#include <linux/if_arp.h>
68#include <linux/proc_fs.h>
Robert Olsson2373ce12005-08-25 13:01:29 -070069#include <linux/rcupdate.h>
Robert Olsson19baf832005-06-21 12:43:18 -070070#include <linux/skbuff.h>
71#include <linux/netlink.h>
72#include <linux/init.h>
73#include <linux/list.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020074#include <net/net_namespace.h>
Robert Olsson19baf832005-06-21 12:43:18 -070075#include <net/ip.h>
76#include <net/protocol.h>
77#include <net/route.h>
78#include <net/tcp.h>
79#include <net/sock.h>
80#include <net/ip_fib.h>
81#include "fib_lookup.h"
82
Robert Olsson06ef9212006-03-20 21:35:01 -080083#define MAX_STAT_DEPTH 32
Robert Olsson19baf832005-06-21 12:43:18 -070084
Robert Olsson19baf832005-06-21 12:43:18 -070085#define KEYLENGTH (8*sizeof(t_key))
Robert Olsson19baf832005-06-21 12:43:18 -070086
Robert Olsson19baf832005-06-21 12:43:18 -070087typedef unsigned int t_key;
88
89#define T_TNODE 0
90#define T_LEAF 1
91#define NODE_TYPE_MASK 0x1UL
Robert Olsson2373ce12005-08-25 13:01:29 -070092#define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK)
93
Olof Johansson91b9a272005-08-09 20:24:39 -070094#define IS_TNODE(n) (!(n->parent & T_LEAF))
95#define IS_LEAF(n) (n->parent & T_LEAF)
Robert Olsson19baf832005-06-21 12:43:18 -070096
97struct node {
Olof Johansson91b9a272005-08-09 20:24:39 -070098 unsigned long parent;
Eric Dumazet8d9654442008-01-13 22:31:44 -080099 t_key key;
Robert Olsson19baf832005-06-21 12:43:18 -0700100};
101
102struct leaf {
Olof Johansson91b9a272005-08-09 20:24:39 -0700103 unsigned long parent;
Eric Dumazet8d9654442008-01-13 22:31:44 -0800104 t_key key;
Robert Olsson19baf832005-06-21 12:43:18 -0700105 struct hlist_head list;
Robert Olsson2373ce12005-08-25 13:01:29 -0700106 struct rcu_head rcu;
Robert Olsson19baf832005-06-21 12:43:18 -0700107};
108
109struct leaf_info {
110 struct hlist_node hlist;
Robert Olsson2373ce12005-08-25 13:01:29 -0700111 struct rcu_head rcu;
Robert Olsson19baf832005-06-21 12:43:18 -0700112 int plen;
113 struct list_head falh;
114};
115
116struct tnode {
Olof Johansson91b9a272005-08-09 20:24:39 -0700117 unsigned long parent;
Eric Dumazet8d9654442008-01-13 22:31:44 -0800118 t_key key;
Eric Dumazet112d8cf2008-01-12 21:27:41 -0800119 unsigned char pos; /* 2log(KEYLENGTH) bits needed */
120 unsigned char bits; /* 2log(KEYLENGTH) bits needed */
Eric Dumazet8d9654442008-01-13 22:31:44 -0800121 unsigned int full_children; /* KEYLENGTH bits needed */
122 unsigned int empty_children; /* KEYLENGTH bits needed */
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700123 union {
124 struct rcu_head rcu;
125 struct work_struct work;
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700126 struct tnode *tnode_free;
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700127 };
Olof Johansson91b9a272005-08-09 20:24:39 -0700128 struct node *child[0];
Robert Olsson19baf832005-06-21 12:43:18 -0700129};
130
131#ifdef CONFIG_IP_FIB_TRIE_STATS
132struct trie_use_stats {
133 unsigned int gets;
134 unsigned int backtrack;
135 unsigned int semantic_match_passed;
136 unsigned int semantic_match_miss;
137 unsigned int null_node_hit;
Robert Olsson2f368952005-07-05 15:02:40 -0700138 unsigned int resize_node_skipped;
Robert Olsson19baf832005-06-21 12:43:18 -0700139};
140#endif
141
142struct trie_stat {
143 unsigned int totdepth;
144 unsigned int maxdepth;
145 unsigned int tnodes;
146 unsigned int leaves;
147 unsigned int nullpointers;
Stephen Hemminger93672292008-01-22 21:54:05 -0800148 unsigned int prefixes;
Robert Olsson06ef9212006-03-20 21:35:01 -0800149 unsigned int nodesizes[MAX_STAT_DEPTH];
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700150};
Robert Olsson19baf832005-06-21 12:43:18 -0700151
152struct trie {
Olof Johansson91b9a272005-08-09 20:24:39 -0700153 struct node *trie;
Robert Olsson19baf832005-06-21 12:43:18 -0700154#ifdef CONFIG_IP_FIB_TRIE_STATS
155 struct trie_use_stats stats;
156#endif
Robert Olsson19baf832005-06-21 12:43:18 -0700157};
158
Robert Olsson19baf832005-06-21 12:43:18 -0700159static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800160static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
161 int wasfull);
Robert Olsson19baf832005-06-21 12:43:18 -0700162static struct node *resize(struct trie *t, struct tnode *tn);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700163static struct tnode *inflate(struct trie *t, struct tnode *tn);
164static struct tnode *halve(struct trie *t, struct tnode *tn);
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700165/* tnodes to free after resize(); protected by RTNL */
166static struct tnode *tnode_free_head;
Jarek Poplawskic3059472009-07-14 08:33:08 +0000167static size_t tnode_free_size;
168
169/*
170 * synchronize_rcu after call_rcu for that many pages; it should be especially
171 * useful before resizing the root node with PREEMPT_NONE configs; the value was
172 * obtained experimentally, aiming to avoid visible slowdown.
173 */
174static const int sync_pages = 128;
Robert Olsson19baf832005-06-21 12:43:18 -0700175
Christoph Lametere18b8902006-12-06 20:33:20 -0800176static struct kmem_cache *fn_alias_kmem __read_mostly;
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -0800177static struct kmem_cache *trie_leaf_kmem __read_mostly;
Robert Olsson19baf832005-06-21 12:43:18 -0700178
Stephen Hemminger06801912007-08-10 15:22:13 -0700179static inline struct tnode *node_parent(struct node *node)
180{
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800181 return (struct tnode *)(node->parent & ~NODE_TYPE_MASK);
182}
Stephen Hemminger06801912007-08-10 15:22:13 -0700183
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800184static inline struct tnode *node_parent_rcu(struct node *node)
185{
186 struct tnode *ret = node_parent(node);
187
Stephen Hemminger06801912007-08-10 15:22:13 -0700188 return rcu_dereference(ret);
189}
190
Stephen Hemminger6440cc92008-03-22 17:59:58 -0700191/* Same as rcu_assign_pointer
192 * but that macro() assumes that value is a pointer.
193 */
Stephen Hemminger06801912007-08-10 15:22:13 -0700194static inline void node_set_parent(struct node *node, struct tnode *ptr)
195{
Stephen Hemminger6440cc92008-03-22 17:59:58 -0700196 smp_wmb();
197 node->parent = (unsigned long)ptr | NODE_TYPE(node);
Stephen Hemminger06801912007-08-10 15:22:13 -0700198}
Robert Olsson2373ce12005-08-25 13:01:29 -0700199
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800200static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i)
Robert Olsson19baf832005-06-21 12:43:18 -0700201{
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800202 BUG_ON(i >= 1U << tn->bits);
Robert Olsson19baf832005-06-21 12:43:18 -0700203
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800204 return tn->child[i];
205}
206
207static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i)
208{
209 struct node *ret = tnode_get_child(tn, i);
210
211 return rcu_dereference(ret);
Robert Olsson19baf832005-06-21 12:43:18 -0700212}
213
Stephen Hemmignerbb435b82005-08-09 20:25:39 -0700214static inline int tnode_child_length(const struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700215{
Olof Johansson91b9a272005-08-09 20:24:39 -0700216 return 1 << tn->bits;
Robert Olsson19baf832005-06-21 12:43:18 -0700217}
218
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -0700219static inline t_key mask_pfx(t_key k, unsigned short l)
220{
221 return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);
222}
223
Robert Olsson19baf832005-06-21 12:43:18 -0700224static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
225{
Olof Johansson91b9a272005-08-09 20:24:39 -0700226 if (offset < KEYLENGTH)
Robert Olsson19baf832005-06-21 12:43:18 -0700227 return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
Olof Johansson91b9a272005-08-09 20:24:39 -0700228 else
Robert Olsson19baf832005-06-21 12:43:18 -0700229 return 0;
230}
231
232static inline int tkey_equals(t_key a, t_key b)
233{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700234 return a == b;
Robert Olsson19baf832005-06-21 12:43:18 -0700235}
236
237static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
238{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700239 if (bits == 0 || offset >= KEYLENGTH)
240 return 1;
Olof Johansson91b9a272005-08-09 20:24:39 -0700241 bits = bits > KEYLENGTH ? KEYLENGTH : bits;
242 return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700243}
Robert Olsson19baf832005-06-21 12:43:18 -0700244
245static inline int tkey_mismatch(t_key a, int offset, t_key b)
246{
247 t_key diff = a ^ b;
248 int i = offset;
249
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700250 if (!diff)
251 return 0;
252 while ((diff << i) >> (KEYLENGTH-1) == 0)
Robert Olsson19baf832005-06-21 12:43:18 -0700253 i++;
254 return i;
255}
256
Robert Olsson19baf832005-06-21 12:43:18 -0700257/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900258 To understand this stuff, an understanding of keys and all their bits is
259 necessary. Every node in the trie has a key associated with it, but not
Robert Olsson19baf832005-06-21 12:43:18 -0700260 all of the bits in that key are significant.
261
262 Consider a node 'n' and its parent 'tp'.
263
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900264 If n is a leaf, every bit in its key is significant. Its presence is
265 necessitated by path compression, since during a tree traversal (when
266 searching for a leaf - unless we are doing an insertion) we will completely
267 ignore all skipped bits we encounter. Thus we need to verify, at the end of
268 a potentially successful search, that we have indeed been walking the
Robert Olsson19baf832005-06-21 12:43:18 -0700269 correct key path.
270
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900271 Note that we can never "miss" the correct key in the tree if present by
272 following the wrong path. Path compression ensures that segments of the key
273 that are the same for all keys with a given prefix are skipped, but the
274 skipped part *is* identical for each node in the subtrie below the skipped
275 bit! trie_insert() in this implementation takes care of that - note the
Robert Olsson19baf832005-06-21 12:43:18 -0700276 call to tkey_sub_equals() in trie_insert().
277
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900278 if n is an internal node - a 'tnode' here, the various parts of its key
Robert Olsson19baf832005-06-21 12:43:18 -0700279 have many different meanings.
280
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900281 Example:
Robert Olsson19baf832005-06-21 12:43:18 -0700282 _________________________________________________________________
283 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
284 -----------------------------------------------------------------
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900285 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
Robert Olsson19baf832005-06-21 12:43:18 -0700286
287 _________________________________________________________________
288 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
289 -----------------------------------------------------------------
290 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
291
292 tp->pos = 7
293 tp->bits = 3
294 n->pos = 15
Olof Johansson91b9a272005-08-09 20:24:39 -0700295 n->bits = 4
Robert Olsson19baf832005-06-21 12:43:18 -0700296
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900297 First, let's just ignore the bits that come before the parent tp, that is
298 the bits from 0 to (tp->pos-1). They are *known* but at this point we do
Robert Olsson19baf832005-06-21 12:43:18 -0700299 not use them for anything.
300
301 The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900302 index into the parent's child array. That is, they will be used to find
Robert Olsson19baf832005-06-21 12:43:18 -0700303 'n' among tp's children.
304
305 The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
306 for the node n.
307
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900308 All the bits we have seen so far are significant to the node n. The rest
Robert Olsson19baf832005-06-21 12:43:18 -0700309 of the bits are really not needed or indeed known in n->key.
310
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900311 The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
Robert Olsson19baf832005-06-21 12:43:18 -0700312 n's child array, and will of course be different for each child.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900313
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700314
Robert Olsson19baf832005-06-21 12:43:18 -0700315 The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
316 at this point.
317
318*/
319
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700320static inline void check_tnode(const struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700321{
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700322 WARN_ON(tn && tn->pos+tn->bits > 32);
Robert Olsson19baf832005-06-21 12:43:18 -0700323}
324
Denis V. Lunevf5026fa2007-12-13 09:47:57 -0800325static const int halve_threshold = 25;
326static const int inflate_threshold = 50;
Jarek Poplawski345aa032009-07-07 19:39:16 -0700327static const int halve_threshold_root = 15;
328static const int inflate_threshold_root = 25;
Robert Olsson19baf832005-06-21 12:43:18 -0700329
Jarek Poplawskibe916cd2009-07-14 09:41:00 +0000330static int inflate_threshold_root_fix;
331#define INFLATE_FIX_MAX 10 /* a comment in resize() */
Robert Olsson2373ce12005-08-25 13:01:29 -0700332
333static void __alias_free_mem(struct rcu_head *head)
334{
335 struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
336 kmem_cache_free(fn_alias_kmem, fa);
337}
338
339static inline void alias_free_mem_rcu(struct fib_alias *fa)
340{
341 call_rcu(&fa->rcu, __alias_free_mem);
342}
343
344static void __leaf_free_rcu(struct rcu_head *head)
345{
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -0800346 struct leaf *l = container_of(head, struct leaf, rcu);
347 kmem_cache_free(trie_leaf_kmem, l);
Robert Olsson2373ce12005-08-25 13:01:29 -0700348}
349
Stephen Hemminger387a5482008-04-10 03:47:34 -0700350static inline void free_leaf(struct leaf *l)
351{
352 call_rcu_bh(&l->rcu, __leaf_free_rcu);
353}
354
Robert Olsson2373ce12005-08-25 13:01:29 -0700355static void __leaf_info_free_rcu(struct rcu_head *head)
356{
357 kfree(container_of(head, struct leaf_info, rcu));
358}
359
360static inline void free_leaf_info(struct leaf_info *leaf)
361{
362 call_rcu(&leaf->rcu, __leaf_info_free_rcu);
363}
364
Eric Dumazet8d9654442008-01-13 22:31:44 -0800365static struct tnode *tnode_alloc(size_t size)
Robert Olsson2373ce12005-08-25 13:01:29 -0700366{
Robert Olsson2373ce12005-08-25 13:01:29 -0700367 if (size <= PAGE_SIZE)
Eric Dumazet8d9654442008-01-13 22:31:44 -0800368 return kzalloc(size, GFP_KERNEL);
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700369 else
370 return __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
371}
Robert Olsson2373ce12005-08-25 13:01:29 -0700372
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700373static void __tnode_vfree(struct work_struct *arg)
374{
375 struct tnode *tn = container_of(arg, struct tnode, work);
376 vfree(tn);
Robert Olsson2373ce12005-08-25 13:01:29 -0700377}
378
379static void __tnode_free_rcu(struct rcu_head *head)
380{
381 struct tnode *tn = container_of(head, struct tnode, rcu);
Eric Dumazet8d9654442008-01-13 22:31:44 -0800382 size_t size = sizeof(struct tnode) +
383 (sizeof(struct node *) << tn->bits);
Robert Olsson2373ce12005-08-25 13:01:29 -0700384
385 if (size <= PAGE_SIZE)
386 kfree(tn);
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700387 else {
388 INIT_WORK(&tn->work, __tnode_vfree);
389 schedule_work(&tn->work);
390 }
Robert Olsson2373ce12005-08-25 13:01:29 -0700391}
392
393static inline void tnode_free(struct tnode *tn)
394{
Stephen Hemminger387a5482008-04-10 03:47:34 -0700395 if (IS_LEAF(tn))
396 free_leaf((struct leaf *) tn);
397 else
Robert Olsson550e29b2006-04-04 12:53:35 -0700398 call_rcu(&tn->rcu, __tnode_free_rcu);
Robert Olsson2373ce12005-08-25 13:01:29 -0700399}
400
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700401static void tnode_free_safe(struct tnode *tn)
402{
403 BUG_ON(IS_LEAF(tn));
Jarek Poplawski7b855762009-06-18 00:28:51 -0700404 tn->tnode_free = tnode_free_head;
405 tnode_free_head = tn;
Jarek Poplawskic3059472009-07-14 08:33:08 +0000406 tnode_free_size += sizeof(struct tnode) +
407 (sizeof(struct node *) << tn->bits);
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700408}
409
410static void tnode_free_flush(void)
411{
412 struct tnode *tn;
413
414 while ((tn = tnode_free_head)) {
415 tnode_free_head = tn->tnode_free;
416 tn->tnode_free = NULL;
417 tnode_free(tn);
418 }
Jarek Poplawskic3059472009-07-14 08:33:08 +0000419
420 if (tnode_free_size >= PAGE_SIZE * sync_pages) {
421 tnode_free_size = 0;
422 synchronize_rcu();
423 }
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700424}
425
Robert Olsson19baf832005-06-21 12:43:18 -0700426static struct leaf *leaf_new(void)
427{
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -0800428 struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700429 if (l) {
Robert Olsson2373ce12005-08-25 13:01:29 -0700430 l->parent = T_LEAF;
Robert Olsson19baf832005-06-21 12:43:18 -0700431 INIT_HLIST_HEAD(&l->list);
432 }
433 return l;
434}
435
436static struct leaf_info *leaf_info_new(int plen)
437{
438 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
Robert Olsson2373ce12005-08-25 13:01:29 -0700439 if (li) {
440 li->plen = plen;
441 INIT_LIST_HEAD(&li->falh);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700442 }
Robert Olsson2373ce12005-08-25 13:01:29 -0700443 return li;
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700444}
445
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800446static struct tnode *tnode_new(t_key key, int pos, int bits)
Robert Olsson19baf832005-06-21 12:43:18 -0700447{
Eric Dumazet8d9654442008-01-13 22:31:44 -0800448 size_t sz = sizeof(struct tnode) + (sizeof(struct node *) << bits);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700449 struct tnode *tn = tnode_alloc(sz);
Robert Olsson19baf832005-06-21 12:43:18 -0700450
Olof Johansson91b9a272005-08-09 20:24:39 -0700451 if (tn) {
Robert Olsson2373ce12005-08-25 13:01:29 -0700452 tn->parent = T_TNODE;
Robert Olsson19baf832005-06-21 12:43:18 -0700453 tn->pos = pos;
454 tn->bits = bits;
455 tn->key = key;
456 tn->full_children = 0;
457 tn->empty_children = 1<<bits;
458 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700459
Eric Dumazet8d9654442008-01-13 22:31:44 -0800460 pr_debug("AT %p s=%u %lu\n", tn, (unsigned int) sizeof(struct tnode),
461 (unsigned long) (sizeof(struct node) << bits));
Robert Olsson19baf832005-06-21 12:43:18 -0700462 return tn;
463}
464
Robert Olsson19baf832005-06-21 12:43:18 -0700465/*
466 * Check whether a tnode 'n' is "full", i.e. it is an internal node
467 * and no bits are skipped. See discussion in dyntree paper p. 6
468 */
469
Stephen Hemmignerbb435b82005-08-09 20:25:39 -0700470static inline int tnode_full(const struct tnode *tn, const struct node *n)
Robert Olsson19baf832005-06-21 12:43:18 -0700471{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700472 if (n == NULL || IS_LEAF(n))
Robert Olsson19baf832005-06-21 12:43:18 -0700473 return 0;
474
475 return ((struct tnode *) n)->pos == tn->pos + tn->bits;
476}
477
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800478static inline void put_child(struct trie *t, struct tnode *tn, int i,
479 struct node *n)
Robert Olsson19baf832005-06-21 12:43:18 -0700480{
481 tnode_put_child_reorg(tn, i, n, -1);
482}
483
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700484 /*
Robert Olsson19baf832005-06-21 12:43:18 -0700485 * Add a child at position i overwriting the old value.
486 * Update the value of full_children and empty_children.
487 */
488
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800489static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
490 int wasfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700491{
Robert Olsson2373ce12005-08-25 13:01:29 -0700492 struct node *chi = tn->child[i];
Robert Olsson19baf832005-06-21 12:43:18 -0700493 int isfull;
494
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700495 BUG_ON(i >= 1<<tn->bits);
496
Robert Olsson19baf832005-06-21 12:43:18 -0700497 /* update emptyChildren */
498 if (n == NULL && chi != NULL)
499 tn->empty_children++;
500 else if (n != NULL && chi == NULL)
501 tn->empty_children--;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700502
Robert Olsson19baf832005-06-21 12:43:18 -0700503 /* update fullChildren */
Olof Johansson91b9a272005-08-09 20:24:39 -0700504 if (wasfull == -1)
Robert Olsson19baf832005-06-21 12:43:18 -0700505 wasfull = tnode_full(tn, chi);
506
507 isfull = tnode_full(tn, n);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700508 if (wasfull && !isfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700509 tn->full_children--;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700510 else if (!wasfull && isfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700511 tn->full_children++;
Olof Johansson91b9a272005-08-09 20:24:39 -0700512
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700513 if (n)
Stephen Hemminger06801912007-08-10 15:22:13 -0700514 node_set_parent(n, tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700515
Robert Olsson2373ce12005-08-25 13:01:29 -0700516 rcu_assign_pointer(tn->child[i], n);
Robert Olsson19baf832005-06-21 12:43:18 -0700517}
518
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700519static struct node *resize(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700520{
521 int i;
Robert Olsson2f368952005-07-05 15:02:40 -0700522 int err = 0;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700523 struct tnode *old_tn;
Robert Olssone6308be2005-10-04 13:01:58 -0700524 int inflate_threshold_use;
525 int halve_threshold_use;
Robert Olsson05eee482007-03-19 16:27:37 -0700526 int max_resize;
Robert Olsson19baf832005-06-21 12:43:18 -0700527
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900528 if (!tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700529 return NULL;
530
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700531 pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
532 tn, inflate_threshold, halve_threshold);
Robert Olsson19baf832005-06-21 12:43:18 -0700533
534 /* No children */
535 if (tn->empty_children == tnode_child_length(tn)) {
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700536 tnode_free_safe(tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700537 return NULL;
538 }
539 /* One child */
540 if (tn->empty_children == tnode_child_length(tn) - 1)
541 for (i = 0; i < tnode_child_length(tn); i++) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700542 struct node *n;
Robert Olsson19baf832005-06-21 12:43:18 -0700543
Olof Johansson91b9a272005-08-09 20:24:39 -0700544 n = tn->child[i];
Robert Olsson2373ce12005-08-25 13:01:29 -0700545 if (!n)
Olof Johansson91b9a272005-08-09 20:24:39 -0700546 continue;
Olof Johansson91b9a272005-08-09 20:24:39 -0700547
548 /* compress one level */
Stephen Hemminger06801912007-08-10 15:22:13 -0700549 node_set_parent(n, NULL);
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700550 tnode_free_safe(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -0700551 return n;
Robert Olsson19baf832005-06-21 12:43:18 -0700552 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700553 /*
Robert Olsson19baf832005-06-21 12:43:18 -0700554 * Double as long as the resulting node has a number of
555 * nonempty nodes that are above the threshold.
556 */
557
558 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700559 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
560 * the Helsinki University of Technology and Matti Tikkanen of Nokia
Robert Olsson19baf832005-06-21 12:43:18 -0700561 * Telecommunications, page 6:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700562 * "A node is doubled if the ratio of non-empty children to all
Robert Olsson19baf832005-06-21 12:43:18 -0700563 * children in the *doubled* node is at least 'high'."
564 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700565 * 'high' in this instance is the variable 'inflate_threshold'. It
566 * is expressed as a percentage, so we multiply it with
567 * tnode_child_length() and instead of multiplying by 2 (since the
568 * child array will be doubled by inflate()) and multiplying
569 * the left-hand side by 100 (to handle the percentage thing) we
Robert Olsson19baf832005-06-21 12:43:18 -0700570 * multiply the left-hand side by 50.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700571 *
572 * The left-hand side may look a bit weird: tnode_child_length(tn)
573 * - tn->empty_children is of course the number of non-null children
574 * in the current node. tn->full_children is the number of "full"
Robert Olsson19baf832005-06-21 12:43:18 -0700575 * children, that is non-null tnodes with a skip value of 0.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700576 * All of those will be doubled in the resulting inflated tnode, so
Robert Olsson19baf832005-06-21 12:43:18 -0700577 * we just count them one extra time here.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700578 *
Robert Olsson19baf832005-06-21 12:43:18 -0700579 * A clearer way to write this would be:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700580 *
Robert Olsson19baf832005-06-21 12:43:18 -0700581 * to_be_doubled = tn->full_children;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700582 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
Robert Olsson19baf832005-06-21 12:43:18 -0700583 * tn->full_children;
584 *
585 * new_child_length = tnode_child_length(tn) * 2;
586 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700587 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
Robert Olsson19baf832005-06-21 12:43:18 -0700588 * new_child_length;
589 * if (new_fill_factor >= inflate_threshold)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700590 *
591 * ...and so on, tho it would mess up the while () loop.
592 *
Robert Olsson19baf832005-06-21 12:43:18 -0700593 * anyway,
594 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
595 * inflate_threshold
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700596 *
Robert Olsson19baf832005-06-21 12:43:18 -0700597 * avoid a division:
598 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
599 * inflate_threshold * new_child_length
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700600 *
Robert Olsson19baf832005-06-21 12:43:18 -0700601 * expand not_to_be_doubled and to_be_doubled, and shorten:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700602 * 100 * (tnode_child_length(tn) - tn->empty_children +
Olof Johansson91b9a272005-08-09 20:24:39 -0700603 * tn->full_children) >= inflate_threshold * new_child_length
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700604 *
Robert Olsson19baf832005-06-21 12:43:18 -0700605 * expand new_child_length:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700606 * 100 * (tnode_child_length(tn) - tn->empty_children +
Olof Johansson91b9a272005-08-09 20:24:39 -0700607 * tn->full_children) >=
Robert Olsson19baf832005-06-21 12:43:18 -0700608 * inflate_threshold * tnode_child_length(tn) * 2
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700609 *
Robert Olsson19baf832005-06-21 12:43:18 -0700610 * shorten again:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700611 * 50 * (tn->full_children + tnode_child_length(tn) -
Olof Johansson91b9a272005-08-09 20:24:39 -0700612 * tn->empty_children) >= inflate_threshold *
Robert Olsson19baf832005-06-21 12:43:18 -0700613 * tnode_child_length(tn)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700614 *
Robert Olsson19baf832005-06-21 12:43:18 -0700615 */
616
617 check_tnode(tn);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700618
Robert Olssone6308be2005-10-04 13:01:58 -0700619 /* Keep root node larger */
620
Stephen Hemminger132adf52007-03-08 20:44:43 -0800621 if (!tn->parent)
Jarek Poplawskibe916cd2009-07-14 09:41:00 +0000622 inflate_threshold_use = inflate_threshold_root +
623 inflate_threshold_root_fix;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900624 else
Robert Olssone6308be2005-10-04 13:01:58 -0700625 inflate_threshold_use = inflate_threshold;
626
Robert Olsson2f368952005-07-05 15:02:40 -0700627 err = 0;
Robert Olsson05eee482007-03-19 16:27:37 -0700628 max_resize = 10;
629 while ((tn->full_children > 0 && max_resize-- &&
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800630 50 * (tn->full_children + tnode_child_length(tn)
631 - tn->empty_children)
632 >= inflate_threshold_use * tnode_child_length(tn))) {
Robert Olsson19baf832005-06-21 12:43:18 -0700633
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700634 old_tn = tn;
635 tn = inflate(t, tn);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800636
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700637 if (IS_ERR(tn)) {
638 tn = old_tn;
Robert Olsson2f368952005-07-05 15:02:40 -0700639#ifdef CONFIG_IP_FIB_TRIE_STATS
640 t->stats.resize_node_skipped++;
641#endif
642 break;
643 }
Robert Olsson19baf832005-06-21 12:43:18 -0700644 }
645
Robert Olsson05eee482007-03-19 16:27:37 -0700646 if (max_resize < 0) {
Jarek Poplawskibe916cd2009-07-14 09:41:00 +0000647 if (!tn->parent) {
648 /*
649 * It was observed that during large updates even
650 * inflate_threshold_root = 35 might be needed to avoid
651 * this warning; but it should be temporary, so let's
652 * try to handle this automatically.
653 */
654 if (inflate_threshold_root_fix < INFLATE_FIX_MAX)
655 inflate_threshold_root_fix++;
656 else
657 pr_warning("Fix inflate_threshold_root."
658 " Now=%d size=%d bits fix=%d\n",
659 inflate_threshold_root, tn->bits,
660 inflate_threshold_root_fix);
661 } else {
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800662 pr_warning("Fix inflate_threshold."
663 " Now=%d size=%d bits\n",
664 inflate_threshold, tn->bits);
Jarek Poplawskibe916cd2009-07-14 09:41:00 +0000665 }
666 } else if (max_resize > 3 && !tn->parent && inflate_threshold_root_fix)
667 inflate_threshold_root_fix--;
Robert Olsson05eee482007-03-19 16:27:37 -0700668
Robert Olsson19baf832005-06-21 12:43:18 -0700669 check_tnode(tn);
670
671 /*
672 * Halve as long as the number of empty children in this
673 * node is above threshold.
674 */
Robert Olsson2f368952005-07-05 15:02:40 -0700675
Robert Olssone6308be2005-10-04 13:01:58 -0700676
677 /* Keep root node larger */
678
Stephen Hemminger132adf52007-03-08 20:44:43 -0800679 if (!tn->parent)
Robert Olssone6308be2005-10-04 13:01:58 -0700680 halve_threshold_use = halve_threshold_root;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900681 else
Robert Olssone6308be2005-10-04 13:01:58 -0700682 halve_threshold_use = halve_threshold;
683
Robert Olsson2f368952005-07-05 15:02:40 -0700684 err = 0;
Robert Olsson05eee482007-03-19 16:27:37 -0700685 max_resize = 10;
686 while (tn->bits > 1 && max_resize-- &&
Robert Olsson19baf832005-06-21 12:43:18 -0700687 100 * (tnode_child_length(tn) - tn->empty_children) <
Robert Olssone6308be2005-10-04 13:01:58 -0700688 halve_threshold_use * tnode_child_length(tn)) {
Robert Olsson19baf832005-06-21 12:43:18 -0700689
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700690 old_tn = tn;
691 tn = halve(t, tn);
692 if (IS_ERR(tn)) {
693 tn = old_tn;
Robert Olsson2f368952005-07-05 15:02:40 -0700694#ifdef CONFIG_IP_FIB_TRIE_STATS
695 t->stats.resize_node_skipped++;
696#endif
697 break;
698 }
699 }
700
Robert Olsson05eee482007-03-19 16:27:37 -0700701 if (max_resize < 0) {
702 if (!tn->parent)
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800703 pr_warning("Fix halve_threshold_root."
704 " Now=%d size=%d bits\n",
705 halve_threshold_root, tn->bits);
Robert Olsson05eee482007-03-19 16:27:37 -0700706 else
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800707 pr_warning("Fix halve_threshold."
708 " Now=%d size=%d bits\n",
709 halve_threshold, tn->bits);
Robert Olsson05eee482007-03-19 16:27:37 -0700710 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700711
Robert Olsson19baf832005-06-21 12:43:18 -0700712 /* Only one child remains */
Robert Olsson19baf832005-06-21 12:43:18 -0700713 if (tn->empty_children == tnode_child_length(tn) - 1)
714 for (i = 0; i < tnode_child_length(tn); i++) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700715 struct node *n;
716
Olof Johansson91b9a272005-08-09 20:24:39 -0700717 n = tn->child[i];
Robert Olsson2373ce12005-08-25 13:01:29 -0700718 if (!n)
Olof Johansson91b9a272005-08-09 20:24:39 -0700719 continue;
Olof Johansson91b9a272005-08-09 20:24:39 -0700720
721 /* compress one level */
722
Stephen Hemminger06801912007-08-10 15:22:13 -0700723 node_set_parent(n, NULL);
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700724 tnode_free_safe(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -0700725 return n;
Robert Olsson19baf832005-06-21 12:43:18 -0700726 }
727
728 return (struct node *) tn;
729}
730
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700731static struct tnode *inflate(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700732{
Robert Olsson19baf832005-06-21 12:43:18 -0700733 struct tnode *oldtnode = tn;
734 int olen = tnode_child_length(tn);
735 int i;
736
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700737 pr_debug("In inflate\n");
Robert Olsson19baf832005-06-21 12:43:18 -0700738
739 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
740
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700741 if (!tn)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700742 return ERR_PTR(-ENOMEM);
Robert Olsson2f368952005-07-05 15:02:40 -0700743
744 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700745 * Preallocate and store tnodes before the actual work so we
746 * don't get into an inconsistent state if memory allocation
747 * fails. In case of failure we return the oldnode and inflate
Robert Olsson2f368952005-07-05 15:02:40 -0700748 * of tnode is ignored.
749 */
Olof Johansson91b9a272005-08-09 20:24:39 -0700750
751 for (i = 0; i < olen; i++) {
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800752 struct tnode *inode;
Robert Olsson2f368952005-07-05 15:02:40 -0700753
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800754 inode = (struct tnode *) tnode_get_child(oldtnode, i);
Robert Olsson2f368952005-07-05 15:02:40 -0700755 if (inode &&
756 IS_TNODE(inode) &&
757 inode->pos == oldtnode->pos + oldtnode->bits &&
758 inode->bits > 1) {
759 struct tnode *left, *right;
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -0700760 t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700761
Robert Olsson2f368952005-07-05 15:02:40 -0700762 left = tnode_new(inode->key&(~m), inode->pos + 1,
763 inode->bits - 1);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700764 if (!left)
765 goto nomem;
Olof Johansson91b9a272005-08-09 20:24:39 -0700766
Robert Olsson2f368952005-07-05 15:02:40 -0700767 right = tnode_new(inode->key|m, inode->pos + 1,
768 inode->bits - 1);
769
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900770 if (!right) {
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700771 tnode_free(left);
772 goto nomem;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900773 }
Robert Olsson2f368952005-07-05 15:02:40 -0700774
775 put_child(t, tn, 2*i, (struct node *) left);
776 put_child(t, tn, 2*i+1, (struct node *) right);
777 }
778 }
779
Olof Johansson91b9a272005-08-09 20:24:39 -0700780 for (i = 0; i < olen; i++) {
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -0800781 struct tnode *inode;
Robert Olsson19baf832005-06-21 12:43:18 -0700782 struct node *node = tnode_get_child(oldtnode, i);
Olof Johansson91b9a272005-08-09 20:24:39 -0700783 struct tnode *left, *right;
784 int size, j;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700785
Robert Olsson19baf832005-06-21 12:43:18 -0700786 /* An empty child */
787 if (node == NULL)
788 continue;
789
790 /* A leaf or an internal node with skipped bits */
791
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700792 if (IS_LEAF(node) || ((struct tnode *) node)->pos >
Robert Olsson19baf832005-06-21 12:43:18 -0700793 tn->pos + tn->bits - 1) {
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800794 if (tkey_extract_bits(node->key,
795 oldtnode->pos + oldtnode->bits,
796 1) == 0)
Robert Olsson19baf832005-06-21 12:43:18 -0700797 put_child(t, tn, 2*i, node);
798 else
799 put_child(t, tn, 2*i+1, node);
800 continue;
801 }
802
803 /* An internal node with two children */
804 inode = (struct tnode *) node;
805
806 if (inode->bits == 1) {
807 put_child(t, tn, 2*i, inode->child[0]);
808 put_child(t, tn, 2*i+1, inode->child[1]);
809
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700810 tnode_free_safe(inode);
Olof Johansson91b9a272005-08-09 20:24:39 -0700811 continue;
Robert Olsson19baf832005-06-21 12:43:18 -0700812 }
813
Olof Johansson91b9a272005-08-09 20:24:39 -0700814 /* An internal node with more than two children */
Robert Olsson19baf832005-06-21 12:43:18 -0700815
Olof Johansson91b9a272005-08-09 20:24:39 -0700816 /* We will replace this node 'inode' with two new
817 * ones, 'left' and 'right', each with half of the
818 * original children. The two new nodes will have
819 * a position one bit further down the key and this
820 * means that the "significant" part of their keys
821 * (see the discussion near the top of this file)
822 * will differ by one bit, which will be "0" in
823 * left's key and "1" in right's key. Since we are
824 * moving the key position by one step, the bit that
825 * we are moving away from - the bit at position
826 * (inode->pos) - is the one that will differ between
827 * left and right. So... we synthesize that bit in the
828 * two new keys.
829 * The mask 'm' below will be a single "one" bit at
830 * the position (inode->pos)
831 */
Robert Olsson19baf832005-06-21 12:43:18 -0700832
Olof Johansson91b9a272005-08-09 20:24:39 -0700833 /* Use the old key, but set the new significant
834 * bit to zero.
835 */
Robert Olsson19baf832005-06-21 12:43:18 -0700836
Olof Johansson91b9a272005-08-09 20:24:39 -0700837 left = (struct tnode *) tnode_get_child(tn, 2*i);
838 put_child(t, tn, 2*i, NULL);
Robert Olsson19baf832005-06-21 12:43:18 -0700839
Olof Johansson91b9a272005-08-09 20:24:39 -0700840 BUG_ON(!left);
Robert Olsson2f368952005-07-05 15:02:40 -0700841
Olof Johansson91b9a272005-08-09 20:24:39 -0700842 right = (struct tnode *) tnode_get_child(tn, 2*i+1);
843 put_child(t, tn, 2*i+1, NULL);
Robert Olsson2f368952005-07-05 15:02:40 -0700844
Olof Johansson91b9a272005-08-09 20:24:39 -0700845 BUG_ON(!right);
Robert Olsson2f368952005-07-05 15:02:40 -0700846
Olof Johansson91b9a272005-08-09 20:24:39 -0700847 size = tnode_child_length(left);
848 for (j = 0; j < size; j++) {
849 put_child(t, left, j, inode->child[j]);
850 put_child(t, right, j, inode->child[j + size]);
Robert Olsson19baf832005-06-21 12:43:18 -0700851 }
Olof Johansson91b9a272005-08-09 20:24:39 -0700852 put_child(t, tn, 2*i, resize(t, left));
853 put_child(t, tn, 2*i+1, resize(t, right));
854
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700855 tnode_free_safe(inode);
Robert Olsson19baf832005-06-21 12:43:18 -0700856 }
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700857 tnode_free_safe(oldtnode);
Robert Olsson19baf832005-06-21 12:43:18 -0700858 return tn;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700859nomem:
860 {
861 int size = tnode_child_length(tn);
862 int j;
863
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700864 for (j = 0; j < size; j++)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700865 if (tn->child[j])
866 tnode_free((struct tnode *)tn->child[j]);
867
868 tnode_free(tn);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700869
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700870 return ERR_PTR(-ENOMEM);
871 }
Robert Olsson19baf832005-06-21 12:43:18 -0700872}
873
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700874static struct tnode *halve(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700875{
876 struct tnode *oldtnode = tn;
877 struct node *left, *right;
878 int i;
879 int olen = tnode_child_length(tn);
880
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700881 pr_debug("In halve\n");
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700882
883 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
Robert Olsson19baf832005-06-21 12:43:18 -0700884
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700885 if (!tn)
886 return ERR_PTR(-ENOMEM);
Robert Olsson2f368952005-07-05 15:02:40 -0700887
888 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700889 * Preallocate and store tnodes before the actual work so we
890 * don't get into an inconsistent state if memory allocation
891 * fails. In case of failure we return the oldnode and halve
Robert Olsson2f368952005-07-05 15:02:40 -0700892 * of tnode is ignored.
893 */
894
Olof Johansson91b9a272005-08-09 20:24:39 -0700895 for (i = 0; i < olen; i += 2) {
Robert Olsson2f368952005-07-05 15:02:40 -0700896 left = tnode_get_child(oldtnode, i);
897 right = tnode_get_child(oldtnode, i+1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700898
Robert Olsson2f368952005-07-05 15:02:40 -0700899 /* Two nonempty children */
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700900 if (left && right) {
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700901 struct tnode *newn;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700902
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700903 newn = tnode_new(left->key, tn->pos + tn->bits, 1);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700904
905 if (!newn)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700906 goto nomem;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700907
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700908 put_child(t, tn, i/2, (struct node *)newn);
Robert Olsson2f368952005-07-05 15:02:40 -0700909 }
Robert Olsson2f368952005-07-05 15:02:40 -0700910
Robert Olsson2f368952005-07-05 15:02:40 -0700911 }
Robert Olsson19baf832005-06-21 12:43:18 -0700912
Olof Johansson91b9a272005-08-09 20:24:39 -0700913 for (i = 0; i < olen; i += 2) {
914 struct tnode *newBinNode;
915
Robert Olsson19baf832005-06-21 12:43:18 -0700916 left = tnode_get_child(oldtnode, i);
917 right = tnode_get_child(oldtnode, i+1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700918
Robert Olsson19baf832005-06-21 12:43:18 -0700919 /* At least one of the children is empty */
920 if (left == NULL) {
921 if (right == NULL) /* Both are empty */
922 continue;
923 put_child(t, tn, i/2, right);
Olof Johansson91b9a272005-08-09 20:24:39 -0700924 continue;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700925 }
Olof Johansson91b9a272005-08-09 20:24:39 -0700926
927 if (right == NULL) {
Robert Olsson19baf832005-06-21 12:43:18 -0700928 put_child(t, tn, i/2, left);
Olof Johansson91b9a272005-08-09 20:24:39 -0700929 continue;
930 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700931
Robert Olsson19baf832005-06-21 12:43:18 -0700932 /* Two nonempty children */
Olof Johansson91b9a272005-08-09 20:24:39 -0700933 newBinNode = (struct tnode *) tnode_get_child(tn, i/2);
934 put_child(t, tn, i/2, NULL);
Olof Johansson91b9a272005-08-09 20:24:39 -0700935 put_child(t, newBinNode, 0, left);
936 put_child(t, newBinNode, 1, right);
937 put_child(t, tn, i/2, resize(t, newBinNode));
Robert Olsson19baf832005-06-21 12:43:18 -0700938 }
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700939 tnode_free_safe(oldtnode);
Robert Olsson19baf832005-06-21 12:43:18 -0700940 return tn;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700941nomem:
942 {
943 int size = tnode_child_length(tn);
944 int j;
945
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700946 for (j = 0; j < size; j++)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700947 if (tn->child[j])
948 tnode_free((struct tnode *)tn->child[j]);
949
950 tnode_free(tn);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700951
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700952 return ERR_PTR(-ENOMEM);
953 }
Robert Olsson19baf832005-06-21 12:43:18 -0700954}
955
Robert Olsson772cb712005-09-19 15:31:18 -0700956/* readside must use rcu_read_lock currently dump routines
Robert Olsson2373ce12005-08-25 13:01:29 -0700957 via get_fa_head and dump */
958
Robert Olsson772cb712005-09-19 15:31:18 -0700959static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700960{
Robert Olsson772cb712005-09-19 15:31:18 -0700961 struct hlist_head *head = &l->list;
Robert Olsson19baf832005-06-21 12:43:18 -0700962 struct hlist_node *node;
963 struct leaf_info *li;
964
Robert Olsson2373ce12005-08-25 13:01:29 -0700965 hlist_for_each_entry_rcu(li, node, head, hlist)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700966 if (li->plen == plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700967 return li;
Olof Johansson91b9a272005-08-09 20:24:39 -0700968
Robert Olsson19baf832005-06-21 12:43:18 -0700969 return NULL;
970}
971
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800972static inline struct list_head *get_fa_head(struct leaf *l, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700973{
Robert Olsson772cb712005-09-19 15:31:18 -0700974 struct leaf_info *li = find_leaf_info(l, plen);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700975
Olof Johansson91b9a272005-08-09 20:24:39 -0700976 if (!li)
977 return NULL;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700978
Olof Johansson91b9a272005-08-09 20:24:39 -0700979 return &li->falh;
Robert Olsson19baf832005-06-21 12:43:18 -0700980}
981
982static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
983{
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900984 struct leaf_info *li = NULL, *last = NULL;
985 struct hlist_node *node;
Robert Olsson19baf832005-06-21 12:43:18 -0700986
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900987 if (hlist_empty(head)) {
988 hlist_add_head_rcu(&new->hlist, head);
989 } else {
990 hlist_for_each_entry(li, node, head, hlist) {
991 if (new->plen > li->plen)
992 break;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700993
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900994 last = li;
995 }
996 if (last)
997 hlist_add_after_rcu(&last->hlist, &new->hlist);
998 else
999 hlist_add_before_rcu(&new->hlist, &li->hlist);
1000 }
Robert Olsson19baf832005-06-21 12:43:18 -07001001}
1002
Robert Olsson2373ce12005-08-25 13:01:29 -07001003/* rcu_read_lock needs to be hold by caller from readside */
1004
Robert Olsson19baf832005-06-21 12:43:18 -07001005static struct leaf *
1006fib_find_node(struct trie *t, u32 key)
1007{
1008 int pos;
1009 struct tnode *tn;
1010 struct node *n;
1011
1012 pos = 0;
Robert Olsson2373ce12005-08-25 13:01:29 -07001013 n = rcu_dereference(t->trie);
Robert Olsson19baf832005-06-21 12:43:18 -07001014
1015 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
1016 tn = (struct tnode *) n;
Olof Johansson91b9a272005-08-09 20:24:39 -07001017
Robert Olsson19baf832005-06-21 12:43:18 -07001018 check_tnode(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001019
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001020 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
Olof Johansson91b9a272005-08-09 20:24:39 -07001021 pos = tn->pos + tn->bits;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001022 n = tnode_get_child_rcu(tn,
1023 tkey_extract_bits(key,
1024 tn->pos,
1025 tn->bits));
Olof Johansson91b9a272005-08-09 20:24:39 -07001026 } else
Robert Olsson19baf832005-06-21 12:43:18 -07001027 break;
1028 }
1029 /* Case we have found a leaf. Compare prefixes */
1030
Olof Johansson91b9a272005-08-09 20:24:39 -07001031 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key))
1032 return (struct leaf *)n;
1033
Robert Olsson19baf832005-06-21 12:43:18 -07001034 return NULL;
1035}
1036
Jarek Poplawski7b855762009-06-18 00:28:51 -07001037static void trie_rebalance(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -07001038{
Robert Olsson19baf832005-06-21 12:43:18 -07001039 int wasfull;
Robert Olsson3ed18d72009-05-21 15:20:59 -07001040 t_key cindex, key;
Stephen Hemminger06801912007-08-10 15:22:13 -07001041 struct tnode *tp;
Robert Olsson19baf832005-06-21 12:43:18 -07001042
Robert Olsson3ed18d72009-05-21 15:20:59 -07001043 key = tn->key;
1044
Stephen Hemminger06801912007-08-10 15:22:13 -07001045 while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) {
Robert Olsson19baf832005-06-21 12:43:18 -07001046 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1047 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001048 tn = (struct tnode *) resize(t, (struct tnode *)tn);
1049
1050 tnode_put_child_reorg((struct tnode *)tp, cindex,
1051 (struct node *)tn, wasfull);
Olof Johansson91b9a272005-08-09 20:24:39 -07001052
Stephen Hemminger06801912007-08-10 15:22:13 -07001053 tp = node_parent((struct node *) tn);
Jarek Poplawski008440e2009-06-30 12:47:19 -07001054 if (!tp)
1055 rcu_assign_pointer(t->trie, (struct node *)tn);
1056
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -07001057 tnode_free_flush();
Stephen Hemminger06801912007-08-10 15:22:13 -07001058 if (!tp)
Robert Olsson19baf832005-06-21 12:43:18 -07001059 break;
Stephen Hemminger06801912007-08-10 15:22:13 -07001060 tn = tp;
Robert Olsson19baf832005-06-21 12:43:18 -07001061 }
Stephen Hemminger06801912007-08-10 15:22:13 -07001062
Robert Olsson19baf832005-06-21 12:43:18 -07001063 /* Handle last (top) tnode */
Jarek Poplawski7b855762009-06-18 00:28:51 -07001064 if (IS_TNODE(tn))
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001065 tn = (struct tnode *)resize(t, (struct tnode *)tn);
Robert Olsson19baf832005-06-21 12:43:18 -07001066
Jarek Poplawski7b855762009-06-18 00:28:51 -07001067 rcu_assign_pointer(t->trie, (struct node *)tn);
1068 tnode_free_flush();
1069
1070 return;
Robert Olsson19baf832005-06-21 12:43:18 -07001071}
1072
Robert Olsson2373ce12005-08-25 13:01:29 -07001073/* only used from updater-side */
1074
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001075static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -07001076{
1077 int pos, newpos;
1078 struct tnode *tp = NULL, *tn = NULL;
1079 struct node *n;
1080 struct leaf *l;
1081 int missbit;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001082 struct list_head *fa_head = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001083 struct leaf_info *li;
1084 t_key cindex;
1085
1086 pos = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001087 n = t->trie;
Robert Olsson19baf832005-06-21 12:43:18 -07001088
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001089 /* If we point to NULL, stop. Either the tree is empty and we should
1090 * just put a new leaf in if, or we have reached an empty child slot,
Robert Olsson19baf832005-06-21 12:43:18 -07001091 * and we should just put our new leaf in that.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001092 * If we point to a T_TNODE, check if it matches our key. Note that
1093 * a T_TNODE might be skipping any number of bits - its 'pos' need
Robert Olsson19baf832005-06-21 12:43:18 -07001094 * not be the parent's 'pos'+'bits'!
1095 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001096 * If it does match the current key, get pos/bits from it, extract
Robert Olsson19baf832005-06-21 12:43:18 -07001097 * the index from our key, push the T_TNODE and walk the tree.
1098 *
1099 * If it doesn't, we have to replace it with a new T_TNODE.
1100 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001101 * If we point to a T_LEAF, it might or might not have the same key
1102 * as we do. If it does, just change the value, update the T_LEAF's
1103 * value, and return it.
Robert Olsson19baf832005-06-21 12:43:18 -07001104 * If it doesn't, we need to replace it with a T_TNODE.
1105 */
1106
1107 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
1108 tn = (struct tnode *) n;
Olof Johansson91b9a272005-08-09 20:24:39 -07001109
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001110 check_tnode(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001111
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001112 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
Robert Olsson19baf832005-06-21 12:43:18 -07001113 tp = tn;
Olof Johansson91b9a272005-08-09 20:24:39 -07001114 pos = tn->pos + tn->bits;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001115 n = tnode_get_child(tn,
1116 tkey_extract_bits(key,
1117 tn->pos,
1118 tn->bits));
Robert Olsson19baf832005-06-21 12:43:18 -07001119
Stephen Hemminger06801912007-08-10 15:22:13 -07001120 BUG_ON(n && node_parent(n) != tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001121 } else
Robert Olsson19baf832005-06-21 12:43:18 -07001122 break;
1123 }
1124
1125 /*
1126 * n ----> NULL, LEAF or TNODE
1127 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001128 * tp is n's (parent) ----> NULL or TNODE
Robert Olsson19baf832005-06-21 12:43:18 -07001129 */
1130
Olof Johansson91b9a272005-08-09 20:24:39 -07001131 BUG_ON(tp && IS_LEAF(tp));
Robert Olsson19baf832005-06-21 12:43:18 -07001132
1133 /* Case 1: n is a leaf. Compare prefixes */
1134
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001135 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -08001136 l = (struct leaf *) n;
Robert Olsson19baf832005-06-21 12:43:18 -07001137 li = leaf_info_new(plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001138
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001139 if (!li)
1140 return NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001141
1142 fa_head = &li->falh;
1143 insert_leaf_info(&l->list, li);
1144 goto done;
1145 }
Robert Olsson19baf832005-06-21 12:43:18 -07001146 l = leaf_new();
1147
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001148 if (!l)
1149 return NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001150
1151 l->key = key;
1152 li = leaf_info_new(plen);
1153
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001154 if (!li) {
Stephen Hemminger387a5482008-04-10 03:47:34 -07001155 free_leaf(l);
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001156 return NULL;
Robert Olssonf835e472005-06-28 15:00:39 -07001157 }
Robert Olsson19baf832005-06-21 12:43:18 -07001158
1159 fa_head = &li->falh;
1160 insert_leaf_info(&l->list, li);
1161
Robert Olsson19baf832005-06-21 12:43:18 -07001162 if (t->trie && n == NULL) {
Olof Johansson91b9a272005-08-09 20:24:39 -07001163 /* Case 2: n is NULL, and will just insert a new leaf */
Robert Olsson19baf832005-06-21 12:43:18 -07001164
Stephen Hemminger06801912007-08-10 15:22:13 -07001165 node_set_parent((struct node *)l, tp);
Robert Olsson19baf832005-06-21 12:43:18 -07001166
Olof Johansson91b9a272005-08-09 20:24:39 -07001167 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1168 put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
1169 } else {
1170 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001171 /*
1172 * Add a new tnode here
Robert Olsson19baf832005-06-21 12:43:18 -07001173 * first tnode need some special handling
1174 */
1175
1176 if (tp)
Olof Johansson91b9a272005-08-09 20:24:39 -07001177 pos = tp->pos+tp->bits;
Robert Olsson19baf832005-06-21 12:43:18 -07001178 else
Olof Johansson91b9a272005-08-09 20:24:39 -07001179 pos = 0;
1180
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001181 if (n) {
Robert Olsson19baf832005-06-21 12:43:18 -07001182 newpos = tkey_mismatch(key, pos, n->key);
1183 tn = tnode_new(n->key, newpos, 1);
Olof Johansson91b9a272005-08-09 20:24:39 -07001184 } else {
Robert Olsson19baf832005-06-21 12:43:18 -07001185 newpos = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001186 tn = tnode_new(key, newpos, 1); /* First tnode */
Robert Olsson19baf832005-06-21 12:43:18 -07001187 }
Robert Olsson19baf832005-06-21 12:43:18 -07001188
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001189 if (!tn) {
Robert Olssonf835e472005-06-28 15:00:39 -07001190 free_leaf_info(li);
Stephen Hemminger387a5482008-04-10 03:47:34 -07001191 free_leaf(l);
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001192 return NULL;
Olof Johansson91b9a272005-08-09 20:24:39 -07001193 }
1194
Stephen Hemminger06801912007-08-10 15:22:13 -07001195 node_set_parent((struct node *)tn, tp);
Robert Olsson19baf832005-06-21 12:43:18 -07001196
Olof Johansson91b9a272005-08-09 20:24:39 -07001197 missbit = tkey_extract_bits(key, newpos, 1);
Robert Olsson19baf832005-06-21 12:43:18 -07001198 put_child(t, tn, missbit, (struct node *)l);
1199 put_child(t, tn, 1-missbit, n);
1200
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001201 if (tp) {
Robert Olsson19baf832005-06-21 12:43:18 -07001202 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001203 put_child(t, (struct tnode *)tp, cindex,
1204 (struct node *)tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001205 } else {
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001206 rcu_assign_pointer(t->trie, (struct node *)tn);
Robert Olsson19baf832005-06-21 12:43:18 -07001207 tp = tn;
1208 }
1209 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001210
1211 if (tp && tp->pos + tp->bits > 32)
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001212 pr_warning("fib_trie"
1213 " tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
1214 tp, tp->pos, tp->bits, key, plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001215
Robert Olsson19baf832005-06-21 12:43:18 -07001216 /* Rebalance the trie */
Robert Olsson2373ce12005-08-25 13:01:29 -07001217
Jarek Poplawski7b855762009-06-18 00:28:51 -07001218 trie_rebalance(t, tp);
Robert Olssonf835e472005-06-28 15:00:39 -07001219done:
Robert Olsson19baf832005-06-21 12:43:18 -07001220 return fa_head;
1221}
1222
Robert Olssond562f1f2007-03-26 14:22:22 -07001223/*
1224 * Caller must hold RTNL.
1225 */
Thomas Graf4e902c52006-08-17 18:14:52 -07001226static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
Robert Olsson19baf832005-06-21 12:43:18 -07001227{
1228 struct trie *t = (struct trie *) tb->tb_data;
1229 struct fib_alias *fa, *new_fa;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001230 struct list_head *fa_head = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001231 struct fib_info *fi;
Thomas Graf4e902c52006-08-17 18:14:52 -07001232 int plen = cfg->fc_dst_len;
1233 u8 tos = cfg->fc_tos;
Robert Olsson19baf832005-06-21 12:43:18 -07001234 u32 key, mask;
1235 int err;
1236 struct leaf *l;
1237
1238 if (plen > 32)
1239 return -EINVAL;
1240
Thomas Graf4e902c52006-08-17 18:14:52 -07001241 key = ntohl(cfg->fc_dst);
Robert Olsson19baf832005-06-21 12:43:18 -07001242
Patrick McHardy2dfe55b2006-08-10 23:08:33 -07001243 pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
Robert Olsson19baf832005-06-21 12:43:18 -07001244
Olof Johansson91b9a272005-08-09 20:24:39 -07001245 mask = ntohl(inet_make_mask(plen));
Robert Olsson19baf832005-06-21 12:43:18 -07001246
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001247 if (key & ~mask)
Robert Olsson19baf832005-06-21 12:43:18 -07001248 return -EINVAL;
1249
1250 key = key & mask;
1251
Thomas Graf4e902c52006-08-17 18:14:52 -07001252 fi = fib_create_info(cfg);
1253 if (IS_ERR(fi)) {
1254 err = PTR_ERR(fi);
Robert Olsson19baf832005-06-21 12:43:18 -07001255 goto err;
Thomas Graf4e902c52006-08-17 18:14:52 -07001256 }
Robert Olsson19baf832005-06-21 12:43:18 -07001257
1258 l = fib_find_node(t, key);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001259 fa = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001260
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001261 if (l) {
Robert Olsson19baf832005-06-21 12:43:18 -07001262 fa_head = get_fa_head(l, plen);
1263 fa = fib_find_alias(fa_head, tos, fi->fib_priority);
1264 }
1265
1266 /* Now fa, if non-NULL, points to the first fib alias
1267 * with the same keys [prefix,tos,priority], if such key already
1268 * exists or to the node before which we will insert new one.
1269 *
1270 * If fa is NULL, we will need to allocate a new one and
1271 * insert to the head of f.
1272 *
1273 * If f is NULL, no fib node matched the destination key
1274 * and we need to allocate a new one of those as well.
1275 */
1276
Julian Anastasov936f6f82008-01-28 21:18:06 -08001277 if (fa && fa->fa_tos == tos &&
1278 fa->fa_info->fib_priority == fi->fib_priority) {
1279 struct fib_alias *fa_first, *fa_match;
Robert Olsson19baf832005-06-21 12:43:18 -07001280
1281 err = -EEXIST;
Thomas Graf4e902c52006-08-17 18:14:52 -07001282 if (cfg->fc_nlflags & NLM_F_EXCL)
Robert Olsson19baf832005-06-21 12:43:18 -07001283 goto out;
1284
Julian Anastasov936f6f82008-01-28 21:18:06 -08001285 /* We have 2 goals:
1286 * 1. Find exact match for type, scope, fib_info to avoid
1287 * duplicate routes
1288 * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it
1289 */
1290 fa_match = NULL;
1291 fa_first = fa;
1292 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
1293 list_for_each_entry_continue(fa, fa_head, fa_list) {
1294 if (fa->fa_tos != tos)
1295 break;
1296 if (fa->fa_info->fib_priority != fi->fib_priority)
1297 break;
1298 if (fa->fa_type == cfg->fc_type &&
1299 fa->fa_scope == cfg->fc_scope &&
1300 fa->fa_info == fi) {
1301 fa_match = fa;
1302 break;
1303 }
1304 }
1305
Thomas Graf4e902c52006-08-17 18:14:52 -07001306 if (cfg->fc_nlflags & NLM_F_REPLACE) {
Robert Olsson19baf832005-06-21 12:43:18 -07001307 struct fib_info *fi_drop;
1308 u8 state;
1309
Julian Anastasov936f6f82008-01-28 21:18:06 -08001310 fa = fa_first;
1311 if (fa_match) {
1312 if (fa == fa_match)
1313 err = 0;
Joonwoo Park67250332008-01-18 03:45:18 -08001314 goto out;
Julian Anastasov936f6f82008-01-28 21:18:06 -08001315 }
Robert Olsson2373ce12005-08-25 13:01:29 -07001316 err = -ENOBUFS;
Christoph Lametere94b1762006-12-06 20:33:17 -08001317 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
Robert Olsson2373ce12005-08-25 13:01:29 -07001318 if (new_fa == NULL)
1319 goto out;
Robert Olsson19baf832005-06-21 12:43:18 -07001320
1321 fi_drop = fa->fa_info;
Robert Olsson2373ce12005-08-25 13:01:29 -07001322 new_fa->fa_tos = fa->fa_tos;
1323 new_fa->fa_info = fi;
Thomas Graf4e902c52006-08-17 18:14:52 -07001324 new_fa->fa_type = cfg->fc_type;
1325 new_fa->fa_scope = cfg->fc_scope;
Robert Olsson19baf832005-06-21 12:43:18 -07001326 state = fa->fa_state;
Julian Anastasov936f6f82008-01-28 21:18:06 -08001327 new_fa->fa_state = state & ~FA_S_ACCESSED;
Robert Olsson19baf832005-06-21 12:43:18 -07001328
Robert Olsson2373ce12005-08-25 13:01:29 -07001329 list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
1330 alias_free_mem_rcu(fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001331
1332 fib_release_info(fi_drop);
1333 if (state & FA_S_ACCESSED)
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -07001334 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
Milan Kocianb8f55832007-05-23 14:55:06 -07001335 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
1336 tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
Robert Olsson19baf832005-06-21 12:43:18 -07001337
Olof Johansson91b9a272005-08-09 20:24:39 -07001338 goto succeeded;
Robert Olsson19baf832005-06-21 12:43:18 -07001339 }
1340 /* Error if we find a perfect match which
1341 * uses the same scope, type, and nexthop
1342 * information.
1343 */
Julian Anastasov936f6f82008-01-28 21:18:06 -08001344 if (fa_match)
1345 goto out;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001346
Thomas Graf4e902c52006-08-17 18:14:52 -07001347 if (!(cfg->fc_nlflags & NLM_F_APPEND))
Julian Anastasov936f6f82008-01-28 21:18:06 -08001348 fa = fa_first;
Robert Olsson19baf832005-06-21 12:43:18 -07001349 }
1350 err = -ENOENT;
Thomas Graf4e902c52006-08-17 18:14:52 -07001351 if (!(cfg->fc_nlflags & NLM_F_CREATE))
Robert Olsson19baf832005-06-21 12:43:18 -07001352 goto out;
1353
1354 err = -ENOBUFS;
Christoph Lametere94b1762006-12-06 20:33:17 -08001355 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
Robert Olsson19baf832005-06-21 12:43:18 -07001356 if (new_fa == NULL)
1357 goto out;
1358
1359 new_fa->fa_info = fi;
1360 new_fa->fa_tos = tos;
Thomas Graf4e902c52006-08-17 18:14:52 -07001361 new_fa->fa_type = cfg->fc_type;
1362 new_fa->fa_scope = cfg->fc_scope;
Robert Olsson19baf832005-06-21 12:43:18 -07001363 new_fa->fa_state = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001364 /*
1365 * Insert new entry to the list.
1366 */
1367
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001368 if (!fa_head) {
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001369 fa_head = fib_insert_node(t, key, plen);
1370 if (unlikely(!fa_head)) {
1371 err = -ENOMEM;
Robert Olssonf835e472005-06-28 15:00:39 -07001372 goto out_free_new_fa;
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001373 }
Robert Olssonf835e472005-06-28 15:00:39 -07001374 }
Robert Olsson19baf832005-06-21 12:43:18 -07001375
Robert Olsson2373ce12005-08-25 13:01:29 -07001376 list_add_tail_rcu(&new_fa->fa_list,
1377 (fa ? &fa->fa_list : fa_head));
Robert Olsson19baf832005-06-21 12:43:18 -07001378
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -07001379 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
Thomas Graf4e902c52006-08-17 18:14:52 -07001380 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
Milan Kocianb8f55832007-05-23 14:55:06 -07001381 &cfg->fc_nlinfo, 0);
Robert Olsson19baf832005-06-21 12:43:18 -07001382succeeded:
1383 return 0;
Robert Olssonf835e472005-06-28 15:00:39 -07001384
1385out_free_new_fa:
1386 kmem_cache_free(fn_alias_kmem, new_fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001387out:
1388 fib_release_info(fi);
Olof Johansson91b9a272005-08-09 20:24:39 -07001389err:
Robert Olsson19baf832005-06-21 12:43:18 -07001390 return err;
1391}
1392
Robert Olsson772cb712005-09-19 15:31:18 -07001393/* should be called with rcu_read_lock */
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001394static int check_leaf(struct trie *t, struct leaf *l,
1395 t_key key, const struct flowi *flp,
1396 struct fib_result *res)
Robert Olsson19baf832005-06-21 12:43:18 -07001397{
Robert Olsson19baf832005-06-21 12:43:18 -07001398 struct leaf_info *li;
1399 struct hlist_head *hhead = &l->list;
1400 struct hlist_node *node;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001401
Robert Olsson2373ce12005-08-25 13:01:29 -07001402 hlist_for_each_entry_rcu(li, node, hhead, hlist) {
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001403 int err;
1404 int plen = li->plen;
1405 __be32 mask = inet_make_mask(plen);
1406
Al Viro888454c2006-09-19 13:42:46 -07001407 if (l->key != (key & ntohl(mask)))
Robert Olsson19baf832005-06-21 12:43:18 -07001408 continue;
1409
Rami Rosene204a342009-05-18 01:19:12 +00001410 err = fib_semantic_match(&li->falh, flp, res, plen);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001411
Robert Olsson19baf832005-06-21 12:43:18 -07001412#ifdef CONFIG_IP_FIB_TRIE_STATS
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001413 if (err <= 0)
Robert Olsson19baf832005-06-21 12:43:18 -07001414 t->stats.semantic_match_passed++;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001415 else
1416 t->stats.semantic_match_miss++;
Robert Olsson19baf832005-06-21 12:43:18 -07001417#endif
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001418 if (err <= 0)
Ben Hutchings2e655572008-07-10 16:52:52 -07001419 return err;
Robert Olsson19baf832005-06-21 12:43:18 -07001420 }
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001421
Ben Hutchings2e655572008-07-10 16:52:52 -07001422 return 1;
Robert Olsson19baf832005-06-21 12:43:18 -07001423}
1424
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001425static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp,
1426 struct fib_result *res)
Robert Olsson19baf832005-06-21 12:43:18 -07001427{
1428 struct trie *t = (struct trie *) tb->tb_data;
Ben Hutchings2e655572008-07-10 16:52:52 -07001429 int ret;
Robert Olsson19baf832005-06-21 12:43:18 -07001430 struct node *n;
1431 struct tnode *pn;
1432 int pos, bits;
Olof Johansson91b9a272005-08-09 20:24:39 -07001433 t_key key = ntohl(flp->fl4_dst);
Robert Olsson19baf832005-06-21 12:43:18 -07001434 int chopped_off;
1435 t_key cindex = 0;
1436 int current_prefix_length = KEYLENGTH;
Olof Johansson91b9a272005-08-09 20:24:39 -07001437 struct tnode *cn;
1438 t_key node_prefix, key_prefix, pref_mismatch;
1439 int mp;
1440
Robert Olsson2373ce12005-08-25 13:01:29 -07001441 rcu_read_lock();
Robert Olsson19baf832005-06-21 12:43:18 -07001442
Robert Olsson2373ce12005-08-25 13:01:29 -07001443 n = rcu_dereference(t->trie);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001444 if (!n)
Robert Olsson19baf832005-06-21 12:43:18 -07001445 goto failed;
1446
1447#ifdef CONFIG_IP_FIB_TRIE_STATS
1448 t->stats.gets++;
1449#endif
1450
1451 /* Just a leaf? */
1452 if (IS_LEAF(n)) {
Ben Hutchings2e655572008-07-10 16:52:52 -07001453 ret = check_leaf(t, (struct leaf *)n, key, flp, res);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001454 goto found;
Robert Olsson19baf832005-06-21 12:43:18 -07001455 }
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001456
Robert Olsson19baf832005-06-21 12:43:18 -07001457 pn = (struct tnode *) n;
1458 chopped_off = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001459
Olof Johansson91b9a272005-08-09 20:24:39 -07001460 while (pn) {
Robert Olsson19baf832005-06-21 12:43:18 -07001461 pos = pn->pos;
1462 bits = pn->bits;
1463
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001464 if (!chopped_off)
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -07001465 cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length),
1466 pos, bits);
Robert Olsson19baf832005-06-21 12:43:18 -07001467
Jarek Poplawskib902e572009-07-14 11:20:32 +00001468 n = tnode_get_child_rcu(pn, cindex);
Robert Olsson19baf832005-06-21 12:43:18 -07001469
1470 if (n == NULL) {
1471#ifdef CONFIG_IP_FIB_TRIE_STATS
1472 t->stats.null_node_hit++;
1473#endif
1474 goto backtrace;
1475 }
1476
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001477 if (IS_LEAF(n)) {
Ben Hutchings2e655572008-07-10 16:52:52 -07001478 ret = check_leaf(t, (struct leaf *)n, key, flp, res);
1479 if (ret > 0)
Olof Johansson91b9a272005-08-09 20:24:39 -07001480 goto backtrace;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001481 goto found;
Olof Johansson91b9a272005-08-09 20:24:39 -07001482 }
1483
Olof Johansson91b9a272005-08-09 20:24:39 -07001484 cn = (struct tnode *)n;
1485
1486 /*
1487 * It's a tnode, and we can do some extra checks here if we
1488 * like, to avoid descending into a dead-end branch.
1489 * This tnode is in the parent's child array at index
1490 * key[p_pos..p_pos+p_bits] but potentially with some bits
1491 * chopped off, so in reality the index may be just a
1492 * subprefix, padded with zero at the end.
1493 * We can also take a look at any skipped bits in this
1494 * tnode - everything up to p_pos is supposed to be ok,
1495 * and the non-chopped bits of the index (se previous
1496 * paragraph) are also guaranteed ok, but the rest is
1497 * considered unknown.
1498 *
1499 * The skipped bits are key[pos+bits..cn->pos].
1500 */
1501
1502 /* If current_prefix_length < pos+bits, we are already doing
1503 * actual prefix matching, which means everything from
1504 * pos+(bits-chopped_off) onward must be zero along some
1505 * branch of this subtree - otherwise there is *no* valid
1506 * prefix present. Here we can only check the skipped
1507 * bits. Remember, since we have already indexed into the
1508 * parent's child array, we know that the bits we chopped of
1509 * *are* zero.
1510 */
1511
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001512 /* NOTA BENE: Checking only skipped bits
1513 for the new node here */
Olof Johansson91b9a272005-08-09 20:24:39 -07001514
1515 if (current_prefix_length < pos+bits) {
1516 if (tkey_extract_bits(cn->key, current_prefix_length,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001517 cn->pos - current_prefix_length)
1518 || !(cn->child[0]))
Olof Johansson91b9a272005-08-09 20:24:39 -07001519 goto backtrace;
1520 }
1521
1522 /*
1523 * If chopped_off=0, the index is fully validated and we
1524 * only need to look at the skipped bits for this, the new,
1525 * tnode. What we actually want to do is to find out if
1526 * these skipped bits match our key perfectly, or if we will
1527 * have to count on finding a matching prefix further down,
1528 * because if we do, we would like to have some way of
1529 * verifying the existence of such a prefix at this point.
1530 */
1531
1532 /* The only thing we can do at this point is to verify that
1533 * any such matching prefix can indeed be a prefix to our
1534 * key, and if the bits in the node we are inspecting that
1535 * do not match our key are not ZERO, this cannot be true.
1536 * Thus, find out where there is a mismatch (before cn->pos)
1537 * and verify that all the mismatching bits are zero in the
1538 * new tnode's key.
1539 */
1540
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001541 /*
1542 * Note: We aren't very concerned about the piece of
1543 * the key that precede pn->pos+pn->bits, since these
1544 * have already been checked. The bits after cn->pos
1545 * aren't checked since these are by definition
1546 * "unknown" at this point. Thus, what we want to see
1547 * is if we are about to enter the "prefix matching"
1548 * state, and in that case verify that the skipped
1549 * bits that will prevail throughout this subtree are
1550 * zero, as they have to be if we are to find a
1551 * matching prefix.
Olof Johansson91b9a272005-08-09 20:24:39 -07001552 */
1553
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -07001554 node_prefix = mask_pfx(cn->key, cn->pos);
1555 key_prefix = mask_pfx(key, cn->pos);
Olof Johansson91b9a272005-08-09 20:24:39 -07001556 pref_mismatch = key_prefix^node_prefix;
1557 mp = 0;
1558
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001559 /*
1560 * In short: If skipped bits in this node do not match
1561 * the search key, enter the "prefix matching"
1562 * state.directly.
Olof Johansson91b9a272005-08-09 20:24:39 -07001563 */
1564 if (pref_mismatch) {
1565 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
1566 mp++;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001567 pref_mismatch = pref_mismatch << 1;
Olof Johansson91b9a272005-08-09 20:24:39 -07001568 }
1569 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
1570
1571 if (key_prefix != 0)
1572 goto backtrace;
1573
1574 if (current_prefix_length >= cn->pos)
1575 current_prefix_length = mp;
1576 }
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001577
Olof Johansson91b9a272005-08-09 20:24:39 -07001578 pn = (struct tnode *)n; /* Descend */
1579 chopped_off = 0;
1580 continue;
1581
Robert Olsson19baf832005-06-21 12:43:18 -07001582backtrace:
1583 chopped_off++;
1584
1585 /* As zero don't change the child key (cindex) */
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001586 while ((chopped_off <= pn->bits)
1587 && !(cindex & (1<<(chopped_off-1))))
Robert Olsson19baf832005-06-21 12:43:18 -07001588 chopped_off++;
Robert Olsson19baf832005-06-21 12:43:18 -07001589
1590 /* Decrease current_... with bits chopped off */
1591 if (current_prefix_length > pn->pos + pn->bits - chopped_off)
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001592 current_prefix_length = pn->pos + pn->bits
1593 - chopped_off;
Olof Johansson91b9a272005-08-09 20:24:39 -07001594
Robert Olsson19baf832005-06-21 12:43:18 -07001595 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001596 * Either we do the actual chop off according or if we have
Robert Olsson19baf832005-06-21 12:43:18 -07001597 * chopped off all bits in this tnode walk up to our parent.
1598 */
1599
Olof Johansson91b9a272005-08-09 20:24:39 -07001600 if (chopped_off <= pn->bits) {
Robert Olsson19baf832005-06-21 12:43:18 -07001601 cindex &= ~(1 << (chopped_off-1));
Olof Johansson91b9a272005-08-09 20:24:39 -07001602 } else {
Jarek Poplawskib902e572009-07-14 11:20:32 +00001603 struct tnode *parent = node_parent_rcu((struct node *) pn);
Stephen Hemminger06801912007-08-10 15:22:13 -07001604 if (!parent)
Robert Olsson19baf832005-06-21 12:43:18 -07001605 goto failed;
Olof Johansson91b9a272005-08-09 20:24:39 -07001606
Robert Olsson19baf832005-06-21 12:43:18 -07001607 /* Get Child's index */
Stephen Hemminger06801912007-08-10 15:22:13 -07001608 cindex = tkey_extract_bits(pn->key, parent->pos, parent->bits);
1609 pn = parent;
Robert Olsson19baf832005-06-21 12:43:18 -07001610 chopped_off = 0;
1611
1612#ifdef CONFIG_IP_FIB_TRIE_STATS
1613 t->stats.backtrack++;
1614#endif
1615 goto backtrace;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001616 }
Robert Olsson19baf832005-06-21 12:43:18 -07001617 }
1618failed:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001619 ret = 1;
Robert Olsson19baf832005-06-21 12:43:18 -07001620found:
Robert Olsson2373ce12005-08-25 13:01:29 -07001621 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07001622 return ret;
1623}
1624
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001625/*
1626 * Remove the leaf and return parent.
1627 */
1628static void trie_leaf_remove(struct trie *t, struct leaf *l)
Robert Olsson19baf832005-06-21 12:43:18 -07001629{
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001630 struct tnode *tp = node_parent((struct node *) l);
Robert Olsson19baf832005-06-21 12:43:18 -07001631
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001632 pr_debug("entering trie_leaf_remove(%p)\n", l);
Robert Olsson19baf832005-06-21 12:43:18 -07001633
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001634 if (tp) {
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001635 t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits);
Robert Olsson19baf832005-06-21 12:43:18 -07001636 put_child(t, (struct tnode *)tp, cindex, NULL);
Jarek Poplawski7b855762009-06-18 00:28:51 -07001637 trie_rebalance(t, tp);
Olof Johansson91b9a272005-08-09 20:24:39 -07001638 } else
Robert Olsson2373ce12005-08-25 13:01:29 -07001639 rcu_assign_pointer(t->trie, NULL);
Robert Olsson19baf832005-06-21 12:43:18 -07001640
Stephen Hemminger387a5482008-04-10 03:47:34 -07001641 free_leaf(l);
Robert Olsson19baf832005-06-21 12:43:18 -07001642}
1643
Robert Olssond562f1f2007-03-26 14:22:22 -07001644/*
1645 * Caller must hold RTNL.
1646 */
Thomas Graf4e902c52006-08-17 18:14:52 -07001647static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
Robert Olsson19baf832005-06-21 12:43:18 -07001648{
1649 struct trie *t = (struct trie *) tb->tb_data;
1650 u32 key, mask;
Thomas Graf4e902c52006-08-17 18:14:52 -07001651 int plen = cfg->fc_dst_len;
1652 u8 tos = cfg->fc_tos;
Robert Olsson19baf832005-06-21 12:43:18 -07001653 struct fib_alias *fa, *fa_to_delete;
1654 struct list_head *fa_head;
1655 struct leaf *l;
Olof Johansson91b9a272005-08-09 20:24:39 -07001656 struct leaf_info *li;
1657
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001658 if (plen > 32)
Robert Olsson19baf832005-06-21 12:43:18 -07001659 return -EINVAL;
1660
Thomas Graf4e902c52006-08-17 18:14:52 -07001661 key = ntohl(cfg->fc_dst);
Olof Johansson91b9a272005-08-09 20:24:39 -07001662 mask = ntohl(inet_make_mask(plen));
Robert Olsson19baf832005-06-21 12:43:18 -07001663
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001664 if (key & ~mask)
Robert Olsson19baf832005-06-21 12:43:18 -07001665 return -EINVAL;
1666
1667 key = key & mask;
1668 l = fib_find_node(t, key);
1669
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001670 if (!l)
Robert Olsson19baf832005-06-21 12:43:18 -07001671 return -ESRCH;
1672
1673 fa_head = get_fa_head(l, plen);
1674 fa = fib_find_alias(fa_head, tos, 0);
1675
1676 if (!fa)
1677 return -ESRCH;
1678
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001679 pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
Robert Olsson19baf832005-06-21 12:43:18 -07001680
1681 fa_to_delete = NULL;
Julian Anastasov936f6f82008-01-28 21:18:06 -08001682 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
1683 list_for_each_entry_continue(fa, fa_head, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001684 struct fib_info *fi = fa->fa_info;
1685
1686 if (fa->fa_tos != tos)
1687 break;
1688
Thomas Graf4e902c52006-08-17 18:14:52 -07001689 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
1690 (cfg->fc_scope == RT_SCOPE_NOWHERE ||
1691 fa->fa_scope == cfg->fc_scope) &&
1692 (!cfg->fc_protocol ||
1693 fi->fib_protocol == cfg->fc_protocol) &&
1694 fib_nh_match(cfg, fi) == 0) {
Robert Olsson19baf832005-06-21 12:43:18 -07001695 fa_to_delete = fa;
1696 break;
1697 }
1698 }
1699
Olof Johansson91b9a272005-08-09 20:24:39 -07001700 if (!fa_to_delete)
1701 return -ESRCH;
Robert Olsson19baf832005-06-21 12:43:18 -07001702
Olof Johansson91b9a272005-08-09 20:24:39 -07001703 fa = fa_to_delete;
Thomas Graf4e902c52006-08-17 18:14:52 -07001704 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
Milan Kocianb8f55832007-05-23 14:55:06 -07001705 &cfg->fc_nlinfo, 0);
Robert Olsson19baf832005-06-21 12:43:18 -07001706
Olof Johansson91b9a272005-08-09 20:24:39 -07001707 l = fib_find_node(t, key);
Robert Olsson772cb712005-09-19 15:31:18 -07001708 li = find_leaf_info(l, plen);
Robert Olsson19baf832005-06-21 12:43:18 -07001709
Robert Olsson2373ce12005-08-25 13:01:29 -07001710 list_del_rcu(&fa->fa_list);
Robert Olsson19baf832005-06-21 12:43:18 -07001711
Olof Johansson91b9a272005-08-09 20:24:39 -07001712 if (list_empty(fa_head)) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001713 hlist_del_rcu(&li->hlist);
Olof Johansson91b9a272005-08-09 20:24:39 -07001714 free_leaf_info(li);
Robert Olsson2373ce12005-08-25 13:01:29 -07001715 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001716
1717 if (hlist_empty(&l->list))
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001718 trie_leaf_remove(t, l);
Olof Johansson91b9a272005-08-09 20:24:39 -07001719
1720 if (fa->fa_state & FA_S_ACCESSED)
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -07001721 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
Olof Johansson91b9a272005-08-09 20:24:39 -07001722
Robert Olsson2373ce12005-08-25 13:01:29 -07001723 fib_release_info(fa->fa_info);
1724 alias_free_mem_rcu(fa);
Olof Johansson91b9a272005-08-09 20:24:39 -07001725 return 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001726}
1727
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001728static int trie_flush_list(struct list_head *head)
Robert Olsson19baf832005-06-21 12:43:18 -07001729{
1730 struct fib_alias *fa, *fa_node;
1731 int found = 0;
1732
1733 list_for_each_entry_safe(fa, fa_node, head, fa_list) {
1734 struct fib_info *fi = fa->fa_info;
Robert Olsson19baf832005-06-21 12:43:18 -07001735
Robert Olsson2373ce12005-08-25 13:01:29 -07001736 if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
1737 list_del_rcu(&fa->fa_list);
1738 fib_release_info(fa->fa_info);
1739 alias_free_mem_rcu(fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001740 found++;
1741 }
1742 }
1743 return found;
1744}
1745
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001746static int trie_flush_leaf(struct leaf *l)
Robert Olsson19baf832005-06-21 12:43:18 -07001747{
1748 int found = 0;
1749 struct hlist_head *lih = &l->list;
1750 struct hlist_node *node, *tmp;
1751 struct leaf_info *li = NULL;
1752
1753 hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001754 found += trie_flush_list(&li->falh);
Robert Olsson19baf832005-06-21 12:43:18 -07001755
1756 if (list_empty(&li->falh)) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001757 hlist_del_rcu(&li->hlist);
Robert Olsson19baf832005-06-21 12:43:18 -07001758 free_leaf_info(li);
1759 }
1760 }
1761 return found;
1762}
1763
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001764/*
1765 * Scan for the next right leaf starting at node p->child[idx]
1766 * Since we have back pointer, no recursion necessary.
1767 */
1768static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c)
Robert Olsson19baf832005-06-21 12:43:18 -07001769{
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001770 do {
1771 t_key idx;
Robert Olsson19baf832005-06-21 12:43:18 -07001772
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001773 if (c)
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001774 idx = tkey_extract_bits(c->key, p->pos, p->bits) + 1;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001775 else
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001776 idx = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001777
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001778 while (idx < 1u << p->bits) {
1779 c = tnode_get_child_rcu(p, idx++);
Robert Olsson2373ce12005-08-25 13:01:29 -07001780 if (!c)
Olof Johansson91b9a272005-08-09 20:24:39 -07001781 continue;
Robert Olsson19baf832005-06-21 12:43:18 -07001782
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001783 if (IS_LEAF(c)) {
1784 prefetch(p->child[idx]);
1785 return (struct leaf *) c;
Robert Olsson19baf832005-06-21 12:43:18 -07001786 }
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001787
1788 /* Rescan start scanning in new node */
1789 p = (struct tnode *) c;
1790 idx = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001791 }
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001792
1793 /* Node empty, walk back up to parent */
Olof Johansson91b9a272005-08-09 20:24:39 -07001794 c = (struct node *) p;
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001795 } while ( (p = node_parent_rcu(c)) != NULL);
1796
1797 return NULL; /* Root of trie */
1798}
1799
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001800static struct leaf *trie_firstleaf(struct trie *t)
1801{
1802 struct tnode *n = (struct tnode *) rcu_dereference(t->trie);
1803
1804 if (!n)
1805 return NULL;
1806
1807 if (IS_LEAF(n)) /* trie is just a leaf */
1808 return (struct leaf *) n;
1809
1810 return leaf_walk_rcu(n, NULL);
1811}
1812
1813static struct leaf *trie_nextleaf(struct leaf *l)
1814{
1815 struct node *c = (struct node *) l;
Jarek Poplawskib902e572009-07-14 11:20:32 +00001816 struct tnode *p = node_parent_rcu(c);
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001817
1818 if (!p)
1819 return NULL; /* trie with just one leaf */
1820
1821 return leaf_walk_rcu(p, c);
Robert Olsson19baf832005-06-21 12:43:18 -07001822}
1823
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001824static struct leaf *trie_leafindex(struct trie *t, int index)
1825{
1826 struct leaf *l = trie_firstleaf(t);
1827
Stephen Hemmingerec28cf72008-02-11 21:12:49 -08001828 while (l && index-- > 0)
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001829 l = trie_nextleaf(l);
Stephen Hemmingerec28cf72008-02-11 21:12:49 -08001830
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001831 return l;
1832}
1833
1834
Robert Olssond562f1f2007-03-26 14:22:22 -07001835/*
1836 * Caller must hold RTNL.
1837 */
Robert Olsson19baf832005-06-21 12:43:18 -07001838static int fn_trie_flush(struct fib_table *tb)
1839{
1840 struct trie *t = (struct trie *) tb->tb_data;
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001841 struct leaf *l, *ll = NULL;
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001842 int found = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001843
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001844 for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) {
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001845 found += trie_flush_leaf(l);
Robert Olsson19baf832005-06-21 12:43:18 -07001846
1847 if (ll && hlist_empty(&ll->list))
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001848 trie_leaf_remove(t, ll);
Robert Olsson19baf832005-06-21 12:43:18 -07001849 ll = l;
1850 }
1851
1852 if (ll && hlist_empty(&ll->list))
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001853 trie_leaf_remove(t, ll);
Robert Olsson19baf832005-06-21 12:43:18 -07001854
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001855 pr_debug("trie_flush found=%d\n", found);
Robert Olsson19baf832005-06-21 12:43:18 -07001856 return found;
1857}
1858
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001859static void fn_trie_select_default(struct fib_table *tb,
1860 const struct flowi *flp,
1861 struct fib_result *res)
Robert Olsson19baf832005-06-21 12:43:18 -07001862{
1863 struct trie *t = (struct trie *) tb->tb_data;
1864 int order, last_idx;
1865 struct fib_info *fi = NULL;
1866 struct fib_info *last_resort;
1867 struct fib_alias *fa = NULL;
1868 struct list_head *fa_head;
1869 struct leaf *l;
1870
1871 last_idx = -1;
1872 last_resort = NULL;
1873 order = -1;
1874
Robert Olsson2373ce12005-08-25 13:01:29 -07001875 rcu_read_lock();
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001876
Robert Olsson19baf832005-06-21 12:43:18 -07001877 l = fib_find_node(t, 0);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001878 if (!l)
Robert Olsson19baf832005-06-21 12:43:18 -07001879 goto out;
1880
1881 fa_head = get_fa_head(l, 0);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001882 if (!fa_head)
Robert Olsson19baf832005-06-21 12:43:18 -07001883 goto out;
1884
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001885 if (list_empty(fa_head))
Robert Olsson19baf832005-06-21 12:43:18 -07001886 goto out;
1887
Robert Olsson2373ce12005-08-25 13:01:29 -07001888 list_for_each_entry_rcu(fa, fa_head, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001889 struct fib_info *next_fi = fa->fa_info;
Olof Johansson91b9a272005-08-09 20:24:39 -07001890
Robert Olsson19baf832005-06-21 12:43:18 -07001891 if (fa->fa_scope != res->scope ||
1892 fa->fa_type != RTN_UNICAST)
1893 continue;
Olof Johansson91b9a272005-08-09 20:24:39 -07001894
Robert Olsson19baf832005-06-21 12:43:18 -07001895 if (next_fi->fib_priority > res->fi->fib_priority)
1896 break;
1897 if (!next_fi->fib_nh[0].nh_gw ||
1898 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1899 continue;
1900 fa->fa_state |= FA_S_ACCESSED;
Olof Johansson91b9a272005-08-09 20:24:39 -07001901
Robert Olsson19baf832005-06-21 12:43:18 -07001902 if (fi == NULL) {
1903 if (next_fi != res->fi)
1904 break;
1905 } else if (!fib_detect_death(fi, order, &last_resort,
Denis V. Lunev971b8932007-12-08 00:32:23 -08001906 &last_idx, tb->tb_default)) {
Denis V. Luneva2bbe682007-12-08 00:31:44 -08001907 fib_result_assign(res, fi);
Denis V. Lunev971b8932007-12-08 00:32:23 -08001908 tb->tb_default = order;
Robert Olsson19baf832005-06-21 12:43:18 -07001909 goto out;
1910 }
1911 fi = next_fi;
1912 order++;
1913 }
1914 if (order <= 0 || fi == NULL) {
Denis V. Lunev971b8932007-12-08 00:32:23 -08001915 tb->tb_default = -1;
Robert Olsson19baf832005-06-21 12:43:18 -07001916 goto out;
1917 }
1918
Denis V. Lunev971b8932007-12-08 00:32:23 -08001919 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
1920 tb->tb_default)) {
Denis V. Luneva2bbe682007-12-08 00:31:44 -08001921 fib_result_assign(res, fi);
Denis V. Lunev971b8932007-12-08 00:32:23 -08001922 tb->tb_default = order;
Robert Olsson19baf832005-06-21 12:43:18 -07001923 goto out;
1924 }
Denis V. Luneva2bbe682007-12-08 00:31:44 -08001925 if (last_idx >= 0)
1926 fib_result_assign(res, last_resort);
Denis V. Lunev971b8932007-12-08 00:32:23 -08001927 tb->tb_default = last_idx;
1928out:
Robert Olsson2373ce12005-08-25 13:01:29 -07001929 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07001930}
1931
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001932static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
1933 struct fib_table *tb,
Robert Olsson19baf832005-06-21 12:43:18 -07001934 struct sk_buff *skb, struct netlink_callback *cb)
1935{
1936 int i, s_i;
1937 struct fib_alias *fa;
Al Viro32ab5f82006-09-26 22:21:45 -07001938 __be32 xkey = htonl(key);
Robert Olsson19baf832005-06-21 12:43:18 -07001939
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001940 s_i = cb->args[5];
Robert Olsson19baf832005-06-21 12:43:18 -07001941 i = 0;
1942
Robert Olsson2373ce12005-08-25 13:01:29 -07001943 /* rcu_read_lock is hold by caller */
1944
1945 list_for_each_entry_rcu(fa, fah, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001946 if (i < s_i) {
1947 i++;
1948 continue;
1949 }
Robert Olsson19baf832005-06-21 12:43:18 -07001950
1951 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
1952 cb->nlh->nlmsg_seq,
1953 RTM_NEWROUTE,
1954 tb->tb_id,
1955 fa->fa_type,
1956 fa->fa_scope,
Thomas Grafbe403ea2006-08-17 18:15:17 -07001957 xkey,
Robert Olsson19baf832005-06-21 12:43:18 -07001958 plen,
1959 fa->fa_tos,
Stephen Hemminger64347f72008-01-22 21:55:01 -08001960 fa->fa_info, NLM_F_MULTI) < 0) {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001961 cb->args[5] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001962 return -1;
Olof Johansson91b9a272005-08-09 20:24:39 -07001963 }
Robert Olsson19baf832005-06-21 12:43:18 -07001964 i++;
1965 }
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001966 cb->args[5] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001967 return skb->len;
1968}
1969
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001970static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb,
1971 struct sk_buff *skb, struct netlink_callback *cb)
Robert Olsson19baf832005-06-21 12:43:18 -07001972{
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001973 struct leaf_info *li;
1974 struct hlist_node *node;
1975 int i, s_i;
Robert Olsson19baf832005-06-21 12:43:18 -07001976
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001977 s_i = cb->args[4];
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001978 i = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001979
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001980 /* rcu_read_lock is hold by caller */
1981 hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
1982 if (i < s_i) {
1983 i++;
Robert Olsson19baf832005-06-21 12:43:18 -07001984 continue;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001985 }
Robert Olsson19baf832005-06-21 12:43:18 -07001986
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001987 if (i > s_i)
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001988 cb->args[5] = 0;
Olof Johansson91b9a272005-08-09 20:24:39 -07001989
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001990 if (list_empty(&li->falh))
Robert Olsson19baf832005-06-21 12:43:18 -07001991 continue;
1992
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001993 if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001994 cb->args[4] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001995 return -1;
1996 }
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001997 i++;
Robert Olsson19baf832005-06-21 12:43:18 -07001998 }
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001999
Stephen Hemminger71d67e62008-01-31 16:45:47 -08002000 cb->args[4] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07002001 return skb->len;
2002}
2003
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002004static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb,
2005 struct netlink_callback *cb)
Robert Olsson19baf832005-06-21 12:43:18 -07002006{
Stephen Hemmingera88ee222008-01-22 21:56:11 -08002007 struct leaf *l;
Robert Olsson19baf832005-06-21 12:43:18 -07002008 struct trie *t = (struct trie *) tb->tb_data;
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08002009 t_key key = cb->args[2];
Stephen Hemminger71d67e62008-01-31 16:45:47 -08002010 int count = cb->args[3];
Robert Olsson19baf832005-06-21 12:43:18 -07002011
Robert Olsson2373ce12005-08-25 13:01:29 -07002012 rcu_read_lock();
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08002013 /* Dump starting at last key.
2014 * Note: 0.0.0.0/0 (ie default) is first key.
2015 */
Stephen Hemminger71d67e62008-01-31 16:45:47 -08002016 if (count == 0)
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08002017 l = trie_firstleaf(t);
2018 else {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08002019 /* Normally, continue from last key, but if that is missing
2020 * fallback to using slow rescan
2021 */
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08002022 l = fib_find_node(t, key);
Stephen Hemminger71d67e62008-01-31 16:45:47 -08002023 if (!l)
2024 l = trie_leafindex(t, count);
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08002025 }
Stephen Hemmingera88ee222008-01-22 21:56:11 -08002026
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08002027 while (l) {
2028 cb->args[2] = l->key;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08002029 if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08002030 cb->args[3] = count;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08002031 rcu_read_unlock();
Stephen Hemmingera88ee222008-01-22 21:56:11 -08002032 return -1;
Robert Olsson19baf832005-06-21 12:43:18 -07002033 }
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08002034
Stephen Hemminger71d67e62008-01-31 16:45:47 -08002035 ++count;
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08002036 l = trie_nextleaf(l);
Stephen Hemminger71d67e62008-01-31 16:45:47 -08002037 memset(&cb->args[4], 0,
2038 sizeof(cb->args) - 4*sizeof(cb->args[0]));
Robert Olsson19baf832005-06-21 12:43:18 -07002039 }
Stephen Hemminger71d67e62008-01-31 16:45:47 -08002040 cb->args[3] = count;
Robert Olsson2373ce12005-08-25 13:01:29 -07002041 rcu_read_unlock();
Stephen Hemmingera88ee222008-01-22 21:56:11 -08002042
Robert Olsson19baf832005-06-21 12:43:18 -07002043 return skb->len;
Robert Olsson19baf832005-06-21 12:43:18 -07002044}
2045
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08002046void __init fib_hash_init(void)
2047{
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002048 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
2049 sizeof(struct fib_alias),
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -08002050 0, SLAB_PANIC, NULL);
2051
2052 trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
2053 max(sizeof(struct leaf),
2054 sizeof(struct leaf_info)),
2055 0, SLAB_PANIC, NULL);
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08002056}
Robert Olsson19baf832005-06-21 12:43:18 -07002057
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08002058
2059/* Fix more generic FIB names for init later */
2060struct fib_table *fib_hash_table(u32 id)
Robert Olsson19baf832005-06-21 12:43:18 -07002061{
2062 struct fib_table *tb;
2063 struct trie *t;
2064
Robert Olsson19baf832005-06-21 12:43:18 -07002065 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
2066 GFP_KERNEL);
2067 if (tb == NULL)
2068 return NULL;
2069
2070 tb->tb_id = id;
Denis V. Lunev971b8932007-12-08 00:32:23 -08002071 tb->tb_default = -1;
Robert Olsson19baf832005-06-21 12:43:18 -07002072 tb->tb_lookup = fn_trie_lookup;
2073 tb->tb_insert = fn_trie_insert;
2074 tb->tb_delete = fn_trie_delete;
2075 tb->tb_flush = fn_trie_flush;
2076 tb->tb_select_default = fn_trie_select_default;
2077 tb->tb_dump = fn_trie_dump;
Robert Olsson19baf832005-06-21 12:43:18 -07002078
2079 t = (struct trie *) tb->tb_data;
Stephen Hemmingerc28a1cf2008-01-12 20:49:13 -08002080 memset(t, 0, sizeof(*t));
Robert Olsson19baf832005-06-21 12:43:18 -07002081
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002082 if (id == RT_TABLE_LOCAL)
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002083 pr_info("IPv4 FIB: Using LC-trie version %s\n", VERSION);
Robert Olsson19baf832005-06-21 12:43:18 -07002084
2085 return tb;
2086}
2087
Robert Olsson19baf832005-06-21 12:43:18 -07002088#ifdef CONFIG_PROC_FS
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002089/* Depth first Trie walk iterator */
2090struct fib_trie_iter {
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002091 struct seq_net_private p;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002092 struct fib_table *tb;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002093 struct tnode *tnode;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002094 unsigned index;
2095 unsigned depth;
2096};
Robert Olsson19baf832005-06-21 12:43:18 -07002097
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002098static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
Robert Olsson19baf832005-06-21 12:43:18 -07002099{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002100 struct tnode *tn = iter->tnode;
2101 unsigned cindex = iter->index;
2102 struct tnode *p;
2103
Eric W. Biederman6640e692007-01-24 14:42:04 -08002104 /* A single entry routing table */
2105 if (!tn)
2106 return NULL;
2107
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002108 pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
2109 iter->tnode, iter->index, iter->depth);
2110rescan:
2111 while (cindex < (1<<tn->bits)) {
Eric Dumazetb59cfbf2008-01-18 03:31:36 -08002112 struct node *n = tnode_get_child_rcu(tn, cindex);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002113
2114 if (n) {
2115 if (IS_LEAF(n)) {
2116 iter->tnode = tn;
2117 iter->index = cindex + 1;
2118 } else {
2119 /* push down one level */
2120 iter->tnode = (struct tnode *) n;
2121 iter->index = 0;
2122 ++iter->depth;
2123 }
2124 return n;
2125 }
2126
2127 ++cindex;
2128 }
2129
2130 /* Current node exhausted, pop back up */
Eric Dumazetb59cfbf2008-01-18 03:31:36 -08002131 p = node_parent_rcu((struct node *)tn);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002132 if (p) {
2133 cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
2134 tn = p;
2135 --iter->depth;
2136 goto rescan;
2137 }
2138
2139 /* got root? */
Robert Olsson19baf832005-06-21 12:43:18 -07002140 return NULL;
2141}
2142
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002143static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
2144 struct trie *t)
Robert Olsson19baf832005-06-21 12:43:18 -07002145{
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002146 struct node *n;
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08002147
Stephen Hemminger132adf52007-03-08 20:44:43 -08002148 if (!t)
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08002149 return NULL;
2150
2151 n = rcu_dereference(t->trie);
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002152 if (!n)
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08002153 return NULL;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002154
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002155 if (IS_TNODE(n)) {
2156 iter->tnode = (struct tnode *) n;
2157 iter->index = 0;
2158 iter->depth = 1;
2159 } else {
2160 iter->tnode = NULL;
2161 iter->index = 0;
2162 iter->depth = 0;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002163 }
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002164
2165 return n;
Robert Olsson19baf832005-06-21 12:43:18 -07002166}
2167
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002168static void trie_collect_stats(struct trie *t, struct trie_stat *s)
Robert Olsson19baf832005-06-21 12:43:18 -07002169{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002170 struct node *n;
2171 struct fib_trie_iter iter;
Robert Olsson19baf832005-06-21 12:43:18 -07002172
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002173 memset(s, 0, sizeof(*s));
Robert Olsson19baf832005-06-21 12:43:18 -07002174
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002175 rcu_read_lock();
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002176 for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002177 if (IS_LEAF(n)) {
Stephen Hemminger93672292008-01-22 21:54:05 -08002178 struct leaf *l = (struct leaf *)n;
2179 struct leaf_info *li;
2180 struct hlist_node *tmp;
2181
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002182 s->leaves++;
2183 s->totdepth += iter.depth;
2184 if (iter.depth > s->maxdepth)
2185 s->maxdepth = iter.depth;
Stephen Hemminger93672292008-01-22 21:54:05 -08002186
2187 hlist_for_each_entry_rcu(li, tmp, &l->list, hlist)
2188 ++s->prefixes;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002189 } else {
2190 const struct tnode *tn = (const struct tnode *) n;
2191 int i;
Robert Olsson19baf832005-06-21 12:43:18 -07002192
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002193 s->tnodes++;
Stephen Hemminger132adf52007-03-08 20:44:43 -08002194 if (tn->bits < MAX_STAT_DEPTH)
Robert Olsson06ef9212006-03-20 21:35:01 -08002195 s->nodesizes[tn->bits]++;
2196
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002197 for (i = 0; i < (1<<tn->bits); i++)
2198 if (!tn->child[i])
2199 s->nullpointers++;
2200 }
2201 }
2202 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07002203}
2204
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002205/*
Robert Olsson19baf832005-06-21 12:43:18 -07002206 * This outputs /proc/net/fib_triestats
Robert Olsson19baf832005-06-21 12:43:18 -07002207 */
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002208static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
Robert Olsson19baf832005-06-21 12:43:18 -07002209{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002210 unsigned i, max, pointers, bytes, avdepth;
Robert Olsson19baf832005-06-21 12:43:18 -07002211
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002212 if (stat->leaves)
2213 avdepth = stat->totdepth*100 / stat->leaves;
2214 else
2215 avdepth = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07002216
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002217 seq_printf(seq, "\tAver depth: %u.%02d\n",
2218 avdepth / 100, avdepth % 100);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002219 seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth);
Robert Olsson19baf832005-06-21 12:43:18 -07002220
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002221 seq_printf(seq, "\tLeaves: %u\n", stat->leaves);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002222 bytes = sizeof(struct leaf) * stat->leaves;
Stephen Hemminger93672292008-01-22 21:54:05 -08002223
2224 seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes);
2225 bytes += sizeof(struct leaf_info) * stat->prefixes;
2226
Stephen Hemminger187b5182008-01-12 20:55:55 -08002227 seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002228 bytes += sizeof(struct tnode) * stat->tnodes;
Robert Olsson19baf832005-06-21 12:43:18 -07002229
Robert Olsson06ef9212006-03-20 21:35:01 -08002230 max = MAX_STAT_DEPTH;
2231 while (max > 0 && stat->nodesizes[max-1] == 0)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002232 max--;
Robert Olsson19baf832005-06-21 12:43:18 -07002233
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002234 pointers = 0;
2235 for (i = 1; i <= max; i++)
2236 if (stat->nodesizes[i] != 0) {
Stephen Hemminger187b5182008-01-12 20:55:55 -08002237 seq_printf(seq, " %u: %u", i, stat->nodesizes[i]);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002238 pointers += (1<<i) * stat->nodesizes[i];
2239 }
2240 seq_putc(seq, '\n');
Stephen Hemminger187b5182008-01-12 20:55:55 -08002241 seq_printf(seq, "\tPointers: %u\n", pointers);
Robert Olsson19baf832005-06-21 12:43:18 -07002242
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002243 bytes += sizeof(struct node *) * pointers;
Stephen Hemminger187b5182008-01-12 20:55:55 -08002244 seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
2245 seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024);
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002246}
Robert Olsson19baf832005-06-21 12:43:18 -07002247
2248#ifdef CONFIG_IP_FIB_TRIE_STATS
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002249static void trie_show_usage(struct seq_file *seq,
2250 const struct trie_use_stats *stats)
2251{
2252 seq_printf(seq, "\nCounters:\n---------\n");
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002253 seq_printf(seq, "gets = %u\n", stats->gets);
2254 seq_printf(seq, "backtracks = %u\n", stats->backtrack);
2255 seq_printf(seq, "semantic match passed = %u\n",
2256 stats->semantic_match_passed);
2257 seq_printf(seq, "semantic match miss = %u\n",
2258 stats->semantic_match_miss);
2259 seq_printf(seq, "null node hit= %u\n", stats->null_node_hit);
2260 seq_printf(seq, "skipped node resize = %u\n\n",
2261 stats->resize_node_skipped);
Robert Olsson19baf832005-06-21 12:43:18 -07002262}
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002263#endif /* CONFIG_IP_FIB_TRIE_STATS */
2264
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002265static void fib_table_print(struct seq_file *seq, struct fib_table *tb)
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002266{
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002267 if (tb->tb_id == RT_TABLE_LOCAL)
2268 seq_puts(seq, "Local:\n");
2269 else if (tb->tb_id == RT_TABLE_MAIN)
2270 seq_puts(seq, "Main:\n");
2271 else
2272 seq_printf(seq, "Id %d:\n", tb->tb_id);
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002273}
Robert Olsson19baf832005-06-21 12:43:18 -07002274
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002275
Robert Olsson19baf832005-06-21 12:43:18 -07002276static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2277{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002278 struct net *net = (struct net *)seq->private;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002279 unsigned int h;
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08002280
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002281 seq_printf(seq,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002282 "Basic info: size of leaf:"
2283 " %Zd bytes, size of tnode: %Zd bytes.\n",
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002284 sizeof(struct leaf), sizeof(struct tnode));
Olof Johansson91b9a272005-08-09 20:24:39 -07002285
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002286 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2287 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2288 struct hlist_node *node;
2289 struct fib_table *tb;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002290
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002291 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
2292 struct trie *t = (struct trie *) tb->tb_data;
2293 struct trie_stat stat;
2294
2295 if (!t)
2296 continue;
2297
2298 fib_table_print(seq, tb);
2299
2300 trie_collect_stats(t, &stat);
2301 trie_show_stats(seq, &stat);
2302#ifdef CONFIG_IP_FIB_TRIE_STATS
2303 trie_show_usage(seq, &t->stats);
2304#endif
2305 }
2306 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002307
Robert Olsson19baf832005-06-21 12:43:18 -07002308 return 0;
2309}
2310
Robert Olsson19baf832005-06-21 12:43:18 -07002311static int fib_triestat_seq_open(struct inode *inode, struct file *file)
2312{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002313 return single_open_net(inode, file, fib_triestat_seq_show);
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002314}
2315
Arjan van de Ven9a321442007-02-12 00:55:35 -08002316static const struct file_operations fib_triestat_fops = {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002317 .owner = THIS_MODULE,
2318 .open = fib_triestat_seq_open,
2319 .read = seq_read,
2320 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002321 .release = single_release_net,
Robert Olsson19baf832005-06-21 12:43:18 -07002322};
2323
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002324static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
Robert Olsson19baf832005-06-21 12:43:18 -07002325{
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002326 struct fib_trie_iter *iter = seq->private;
2327 struct net *net = seq_file_net(seq);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002328 loff_t idx = 0;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002329 unsigned int h;
Robert Olsson19baf832005-06-21 12:43:18 -07002330
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002331 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2332 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2333 struct hlist_node *node;
2334 struct fib_table *tb;
2335
2336 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
2337 struct node *n;
2338
2339 for (n = fib_trie_get_first(iter,
2340 (struct trie *) tb->tb_data);
2341 n; n = fib_trie_get_next(iter))
2342 if (pos == idx++) {
2343 iter->tb = tb;
2344 return n;
2345 }
2346 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002347 }
Robert Olsson19baf832005-06-21 12:43:18 -07002348
Robert Olsson19baf832005-06-21 12:43:18 -07002349 return NULL;
2350}
2351
2352static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -08002353 __acquires(RCU)
Robert Olsson19baf832005-06-21 12:43:18 -07002354{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002355 rcu_read_lock();
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002356 return fib_trie_get_idx(seq, *pos);
Robert Olsson19baf832005-06-21 12:43:18 -07002357}
2358
2359static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2360{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002361 struct fib_trie_iter *iter = seq->private;
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002362 struct net *net = seq_file_net(seq);
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002363 struct fib_table *tb = iter->tb;
2364 struct hlist_node *tb_node;
2365 unsigned int h;
2366 struct node *n;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002367
Robert Olsson19baf832005-06-21 12:43:18 -07002368 ++*pos;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002369 /* next node in same table */
2370 n = fib_trie_get_next(iter);
2371 if (n)
2372 return n;
Olof Johansson91b9a272005-08-09 20:24:39 -07002373
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002374 /* walk rest of this hash chain */
2375 h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
2376 while ( (tb_node = rcu_dereference(tb->tb_hlist.next)) ) {
2377 tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
2378 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2379 if (n)
2380 goto found;
2381 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002382
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002383 /* new hash chain */
2384 while (++h < FIB_TABLE_HASHSZ) {
2385 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2386 hlist_for_each_entry_rcu(tb, tb_node, head, tb_hlist) {
2387 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2388 if (n)
2389 goto found;
2390 }
2391 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002392 return NULL;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002393
2394found:
2395 iter->tb = tb;
2396 return n;
Robert Olsson19baf832005-06-21 12:43:18 -07002397}
2398
2399static void fib_trie_seq_stop(struct seq_file *seq, void *v)
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -08002400 __releases(RCU)
Robert Olsson19baf832005-06-21 12:43:18 -07002401{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002402 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07002403}
2404
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002405static void seq_indent(struct seq_file *seq, int n)
2406{
2407 while (n-- > 0) seq_puts(seq, " ");
2408}
Robert Olsson19baf832005-06-21 12:43:18 -07002409
Eric Dumazet28d36e32008-01-14 23:09:56 -08002410static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002411{
Stephen Hemminger132adf52007-03-08 20:44:43 -08002412 switch (s) {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002413 case RT_SCOPE_UNIVERSE: return "universe";
2414 case RT_SCOPE_SITE: return "site";
2415 case RT_SCOPE_LINK: return "link";
2416 case RT_SCOPE_HOST: return "host";
2417 case RT_SCOPE_NOWHERE: return "nowhere";
2418 default:
Eric Dumazet28d36e32008-01-14 23:09:56 -08002419 snprintf(buf, len, "scope=%d", s);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002420 return buf;
2421 }
2422}
2423
2424static const char *rtn_type_names[__RTN_MAX] = {
2425 [RTN_UNSPEC] = "UNSPEC",
2426 [RTN_UNICAST] = "UNICAST",
2427 [RTN_LOCAL] = "LOCAL",
2428 [RTN_BROADCAST] = "BROADCAST",
2429 [RTN_ANYCAST] = "ANYCAST",
2430 [RTN_MULTICAST] = "MULTICAST",
2431 [RTN_BLACKHOLE] = "BLACKHOLE",
2432 [RTN_UNREACHABLE] = "UNREACHABLE",
2433 [RTN_PROHIBIT] = "PROHIBIT",
2434 [RTN_THROW] = "THROW",
2435 [RTN_NAT] = "NAT",
2436 [RTN_XRESOLVE] = "XRESOLVE",
2437};
2438
Eric Dumazet28d36e32008-01-14 23:09:56 -08002439static inline const char *rtn_type(char *buf, size_t len, unsigned t)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002440{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002441 if (t < __RTN_MAX && rtn_type_names[t])
2442 return rtn_type_names[t];
Eric Dumazet28d36e32008-01-14 23:09:56 -08002443 snprintf(buf, len, "type %u", t);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002444 return buf;
2445}
2446
2447/* Pretty print the trie */
Robert Olsson19baf832005-06-21 12:43:18 -07002448static int fib_trie_seq_show(struct seq_file *seq, void *v)
2449{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002450 const struct fib_trie_iter *iter = seq->private;
2451 struct node *n = v;
Robert Olsson19baf832005-06-21 12:43:18 -07002452
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002453 if (!node_parent_rcu(n))
2454 fib_table_print(seq, iter->tb);
Robert Olsson095b8502007-01-26 19:06:01 -08002455
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002456 if (IS_TNODE(n)) {
2457 struct tnode *tn = (struct tnode *) n;
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -07002458 __be32 prf = htonl(mask_pfx(tn->key, tn->pos));
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002459
Robert Olsson1d25cd62005-09-19 15:29:52 -07002460 seq_indent(seq, iter->depth-1);
Harvey Harrison673d57e2008-10-31 00:53:57 -07002461 seq_printf(seq, " +-- %pI4/%d %d %d %d\n",
2462 &prf, tn->pos, tn->bits, tn->full_children,
Robert Olsson1d25cd62005-09-19 15:29:52 -07002463 tn->empty_children);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002464
Olof Johansson91b9a272005-08-09 20:24:39 -07002465 } else {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002466 struct leaf *l = (struct leaf *) n;
Stephen Hemminger13280422008-01-22 21:54:37 -08002467 struct leaf_info *li;
2468 struct hlist_node *node;
Al Viro32ab5f82006-09-26 22:21:45 -07002469 __be32 val = htonl(l->key);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002470
2471 seq_indent(seq, iter->depth);
Harvey Harrison673d57e2008-10-31 00:53:57 -07002472 seq_printf(seq, " |-- %pI4\n", &val);
Eric Dumazet28d36e32008-01-14 23:09:56 -08002473
Stephen Hemminger13280422008-01-22 21:54:37 -08002474 hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
2475 struct fib_alias *fa;
Eric Dumazet28d36e32008-01-14 23:09:56 -08002476
Stephen Hemminger13280422008-01-22 21:54:37 -08002477 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
2478 char buf1[32], buf2[32];
Eric Dumazet28d36e32008-01-14 23:09:56 -08002479
Stephen Hemminger13280422008-01-22 21:54:37 -08002480 seq_indent(seq, iter->depth+1);
2481 seq_printf(seq, " /%d %s %s", li->plen,
2482 rtn_scope(buf1, sizeof(buf1),
2483 fa->fa_scope),
2484 rtn_type(buf2, sizeof(buf2),
2485 fa->fa_type));
2486 if (fa->fa_tos)
Denis V. Lunevb9c4d822008-02-05 02:58:45 -08002487 seq_printf(seq, " tos=%d", fa->fa_tos);
Stephen Hemminger13280422008-01-22 21:54:37 -08002488 seq_putc(seq, '\n');
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002489 }
2490 }
Robert Olsson19baf832005-06-21 12:43:18 -07002491 }
2492
2493 return 0;
2494}
2495
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002496static const struct seq_operations fib_trie_seq_ops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002497 .start = fib_trie_seq_start,
2498 .next = fib_trie_seq_next,
2499 .stop = fib_trie_seq_stop,
2500 .show = fib_trie_seq_show,
Robert Olsson19baf832005-06-21 12:43:18 -07002501};
2502
2503static int fib_trie_seq_open(struct inode *inode, struct file *file)
2504{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002505 return seq_open_net(inode, file, &fib_trie_seq_ops,
2506 sizeof(struct fib_trie_iter));
Robert Olsson19baf832005-06-21 12:43:18 -07002507}
2508
Arjan van de Ven9a321442007-02-12 00:55:35 -08002509static const struct file_operations fib_trie_fops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002510 .owner = THIS_MODULE,
2511 .open = fib_trie_seq_open,
2512 .read = seq_read,
2513 .llseek = seq_lseek,
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002514 .release = seq_release_net,
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002515};
2516
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002517struct fib_route_iter {
2518 struct seq_net_private p;
2519 struct trie *main_trie;
2520 loff_t pos;
2521 t_key key;
2522};
2523
2524static struct leaf *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos)
2525{
2526 struct leaf *l = NULL;
2527 struct trie *t = iter->main_trie;
2528
2529 /* use cache location of last found key */
2530 if (iter->pos > 0 && pos >= iter->pos && (l = fib_find_node(t, iter->key)))
2531 pos -= iter->pos;
2532 else {
2533 iter->pos = 0;
2534 l = trie_firstleaf(t);
2535 }
2536
2537 while (l && pos-- > 0) {
2538 iter->pos++;
2539 l = trie_nextleaf(l);
2540 }
2541
2542 if (l)
2543 iter->key = pos; /* remember it */
2544 else
2545 iter->pos = 0; /* forget it */
2546
2547 return l;
2548}
2549
2550static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
2551 __acquires(RCU)
2552{
2553 struct fib_route_iter *iter = seq->private;
2554 struct fib_table *tb;
2555
2556 rcu_read_lock();
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002557 tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002558 if (!tb)
2559 return NULL;
2560
2561 iter->main_trie = (struct trie *) tb->tb_data;
2562 if (*pos == 0)
2563 return SEQ_START_TOKEN;
2564 else
2565 return fib_route_get_idx(iter, *pos - 1);
2566}
2567
2568static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2569{
2570 struct fib_route_iter *iter = seq->private;
2571 struct leaf *l = v;
2572
2573 ++*pos;
2574 if (v == SEQ_START_TOKEN) {
2575 iter->pos = 0;
2576 l = trie_firstleaf(iter->main_trie);
2577 } else {
2578 iter->pos++;
2579 l = trie_nextleaf(l);
2580 }
2581
2582 if (l)
2583 iter->key = l->key;
2584 else
2585 iter->pos = 0;
2586 return l;
2587}
2588
2589static void fib_route_seq_stop(struct seq_file *seq, void *v)
2590 __releases(RCU)
2591{
2592 rcu_read_unlock();
2593}
2594
Al Viro32ab5f82006-09-26 22:21:45 -07002595static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002596{
2597 static unsigned type2flags[RTN_MAX + 1] = {
2598 [7] = RTF_REJECT, [8] = RTF_REJECT,
2599 };
2600 unsigned flags = type2flags[type];
2601
2602 if (fi && fi->fib_nh->nh_gw)
2603 flags |= RTF_GATEWAY;
Al Viro32ab5f82006-09-26 22:21:45 -07002604 if (mask == htonl(0xFFFFFFFF))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002605 flags |= RTF_HOST;
2606 flags |= RTF_UP;
2607 return flags;
2608}
2609
2610/*
2611 * This outputs /proc/net/route.
2612 * The format of the file is not supposed to be changed
2613 * and needs to be same as fib_hash output to avoid breaking
2614 * legacy utilities
2615 */
2616static int fib_route_seq_show(struct seq_file *seq, void *v)
2617{
2618 struct leaf *l = v;
Stephen Hemminger13280422008-01-22 21:54:37 -08002619 struct leaf_info *li;
2620 struct hlist_node *node;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002621
2622 if (v == SEQ_START_TOKEN) {
2623 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
2624 "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
2625 "\tWindow\tIRTT");
2626 return 0;
2627 }
2628
Stephen Hemminger13280422008-01-22 21:54:37 -08002629 hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002630 struct fib_alias *fa;
Al Viro32ab5f82006-09-26 22:21:45 -07002631 __be32 mask, prefix;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002632
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002633 mask = inet_make_mask(li->plen);
2634 prefix = htonl(l->key);
2635
2636 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
Herbert Xu1371e372005-10-15 09:42:39 +10002637 const struct fib_info *fi = fa->fa_info;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002638 unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002639 int len;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002640
2641 if (fa->fa_type == RTN_BROADCAST
2642 || fa->fa_type == RTN_MULTICAST)
2643 continue;
2644
2645 if (fi)
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002646 seq_printf(seq,
2647 "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
2648 "%d\t%08X\t%d\t%u\t%u%n",
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002649 fi->fib_dev ? fi->fib_dev->name : "*",
2650 prefix,
2651 fi->fib_nh->nh_gw, flags, 0, 0,
2652 fi->fib_priority,
2653 mask,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002654 (fi->fib_advmss ?
2655 fi->fib_advmss + 40 : 0),
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002656 fi->fib_window,
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002657 fi->fib_rtt >> 3, &len);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002658 else
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002659 seq_printf(seq,
2660 "*\t%08X\t%08X\t%04X\t%d\t%u\t"
2661 "%d\t%08X\t%d\t%u\t%u%n",
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002662 prefix, 0, flags, 0, 0, 0,
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002663 mask, 0, 0, 0, &len);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002664
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002665 seq_printf(seq, "%*s\n", 127 - len, "");
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002666 }
2667 }
2668
2669 return 0;
2670}
2671
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002672static const struct seq_operations fib_route_seq_ops = {
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002673 .start = fib_route_seq_start,
2674 .next = fib_route_seq_next,
2675 .stop = fib_route_seq_stop,
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002676 .show = fib_route_seq_show,
2677};
2678
2679static int fib_route_seq_open(struct inode *inode, struct file *file)
2680{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002681 return seq_open_net(inode, file, &fib_route_seq_ops,
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002682 sizeof(struct fib_route_iter));
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002683}
2684
Arjan van de Ven9a321442007-02-12 00:55:35 -08002685static const struct file_operations fib_route_fops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002686 .owner = THIS_MODULE,
2687 .open = fib_route_seq_open,
2688 .read = seq_read,
2689 .llseek = seq_lseek,
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002690 .release = seq_release_net,
Robert Olsson19baf832005-06-21 12:43:18 -07002691};
2692
Denis V. Lunev61a02652008-01-10 03:21:09 -08002693int __net_init fib_proc_init(struct net *net)
Robert Olsson19baf832005-06-21 12:43:18 -07002694{
Denis V. Lunev61a02652008-01-10 03:21:09 -08002695 if (!proc_net_fops_create(net, "fib_trie", S_IRUGO, &fib_trie_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002696 goto out1;
2697
Denis V. Lunev61a02652008-01-10 03:21:09 -08002698 if (!proc_net_fops_create(net, "fib_triestat", S_IRUGO,
2699 &fib_triestat_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002700 goto out2;
2701
Denis V. Lunev61a02652008-01-10 03:21:09 -08002702 if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_route_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002703 goto out3;
2704
Robert Olsson19baf832005-06-21 12:43:18 -07002705 return 0;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002706
2707out3:
Denis V. Lunev61a02652008-01-10 03:21:09 -08002708 proc_net_remove(net, "fib_triestat");
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002709out2:
Denis V. Lunev61a02652008-01-10 03:21:09 -08002710 proc_net_remove(net, "fib_trie");
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002711out1:
2712 return -ENOMEM;
Robert Olsson19baf832005-06-21 12:43:18 -07002713}
2714
Denis V. Lunev61a02652008-01-10 03:21:09 -08002715void __net_exit fib_proc_exit(struct net *net)
Robert Olsson19baf832005-06-21 12:43:18 -07002716{
Denis V. Lunev61a02652008-01-10 03:21:09 -08002717 proc_net_remove(net, "fib_trie");
2718 proc_net_remove(net, "fib_triestat");
2719 proc_net_remove(net, "route");
Robert Olsson19baf832005-06-21 12:43:18 -07002720}
2721
2722#endif /* CONFIG_PROC_FS */