blob: 9291e7c41fc798fc0e9364d12a8a3ecae1ebe975 [file] [log] [blame]
Robert Olsson19baf832005-06-21 12:43:18 -07001/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version
5 * 2 of the License, or (at your option) any later version.
6 *
7 * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
8 * & Swedish University of Agricultural Sciences.
9 *
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090010 * Jens Laas <jens.laas@data.slu.se> Swedish University of
Robert Olsson19baf832005-06-21 12:43:18 -070011 * Agricultural Sciences.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090012 *
Robert Olsson19baf832005-06-21 12:43:18 -070013 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
14 *
15 * This work is based on the LPC-trie which is originally descibed in:
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090016 *
Robert Olsson19baf832005-06-21 12:43:18 -070017 * An experimental study of compression methods for dynamic tries
18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
19 * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
20 *
21 *
22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
23 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
24 *
25 * Version: $Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $
26 *
27 *
28 * Code from fib_hash has been reused which includes the following header:
29 *
30 *
31 * INET An implementation of the TCP/IP protocol suite for the LINUX
32 * operating system. INET is implemented using the BSD Socket
33 * interface as the means of communication with the user level.
34 *
35 * IPv4 FIB: lookup engine and maintenance routines.
36 *
37 *
38 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
39 *
40 * This program is free software; you can redistribute it and/or
41 * modify it under the terms of the GNU General Public License
42 * as published by the Free Software Foundation; either version
43 * 2 of the License, or (at your option) any later version.
Robert Olssonfd966252005-12-22 11:25:10 -080044 *
45 * Substantial contributions to this work comes from:
46 *
47 * David S. Miller, <davem@davemloft.net>
48 * Stephen Hemminger <shemminger@osdl.org>
49 * Paul E. McKenney <paulmck@us.ibm.com>
50 * Patrick McHardy <kaber@trash.net>
Robert Olsson19baf832005-06-21 12:43:18 -070051 */
52
Robert Olsson05eee482007-03-19 16:27:37 -070053#define VERSION "0.408"
Robert Olsson19baf832005-06-21 12:43:18 -070054
Robert Olsson19baf832005-06-21 12:43:18 -070055#include <asm/uaccess.h>
56#include <asm/system.h>
Jiri Slaby1977f032007-10-18 23:40:25 -070057#include <linux/bitops.h>
Robert Olsson19baf832005-06-21 12:43:18 -070058#include <linux/types.h>
59#include <linux/kernel.h>
Robert Olsson19baf832005-06-21 12:43:18 -070060#include <linux/mm.h>
61#include <linux/string.h>
62#include <linux/socket.h>
63#include <linux/sockios.h>
64#include <linux/errno.h>
65#include <linux/in.h>
66#include <linux/inet.h>
Stephen Hemmingercd8787a2006-01-03 14:38:34 -080067#include <linux/inetdevice.h>
Robert Olsson19baf832005-06-21 12:43:18 -070068#include <linux/netdevice.h>
69#include <linux/if_arp.h>
70#include <linux/proc_fs.h>
Robert Olsson2373ce12005-08-25 13:01:29 -070071#include <linux/rcupdate.h>
Robert Olsson19baf832005-06-21 12:43:18 -070072#include <linux/skbuff.h>
73#include <linux/netlink.h>
74#include <linux/init.h>
75#include <linux/list.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020076#include <net/net_namespace.h>
Robert Olsson19baf832005-06-21 12:43:18 -070077#include <net/ip.h>
78#include <net/protocol.h>
79#include <net/route.h>
80#include <net/tcp.h>
81#include <net/sock.h>
82#include <net/ip_fib.h>
83#include "fib_lookup.h"
84
Robert Olsson06ef9212006-03-20 21:35:01 -080085#define MAX_STAT_DEPTH 32
Robert Olsson19baf832005-06-21 12:43:18 -070086
Robert Olsson19baf832005-06-21 12:43:18 -070087#define KEYLENGTH (8*sizeof(t_key))
Robert Olsson19baf832005-06-21 12:43:18 -070088
Robert Olsson19baf832005-06-21 12:43:18 -070089typedef unsigned int t_key;
90
91#define T_TNODE 0
92#define T_LEAF 1
93#define NODE_TYPE_MASK 0x1UL
Robert Olsson2373ce12005-08-25 13:01:29 -070094#define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK)
95
Olof Johansson91b9a272005-08-09 20:24:39 -070096#define IS_TNODE(n) (!(n->parent & T_LEAF))
97#define IS_LEAF(n) (n->parent & T_LEAF)
Robert Olsson19baf832005-06-21 12:43:18 -070098
99struct node {
Olof Johansson91b9a272005-08-09 20:24:39 -0700100 unsigned long parent;
Eric Dumazet8d9654442008-01-13 22:31:44 -0800101 t_key key;
Robert Olsson19baf832005-06-21 12:43:18 -0700102};
103
104struct leaf {
Olof Johansson91b9a272005-08-09 20:24:39 -0700105 unsigned long parent;
Eric Dumazet8d9654442008-01-13 22:31:44 -0800106 t_key key;
Robert Olsson19baf832005-06-21 12:43:18 -0700107 struct hlist_head list;
Robert Olsson2373ce12005-08-25 13:01:29 -0700108 struct rcu_head rcu;
Robert Olsson19baf832005-06-21 12:43:18 -0700109};
110
111struct leaf_info {
112 struct hlist_node hlist;
Robert Olsson2373ce12005-08-25 13:01:29 -0700113 struct rcu_head rcu;
Robert Olsson19baf832005-06-21 12:43:18 -0700114 int plen;
115 struct list_head falh;
116};
117
118struct tnode {
Olof Johansson91b9a272005-08-09 20:24:39 -0700119 unsigned long parent;
Eric Dumazet8d9654442008-01-13 22:31:44 -0800120 t_key key;
Eric Dumazet112d8cf2008-01-12 21:27:41 -0800121 unsigned char pos; /* 2log(KEYLENGTH) bits needed */
122 unsigned char bits; /* 2log(KEYLENGTH) bits needed */
Eric Dumazet8d9654442008-01-13 22:31:44 -0800123 unsigned int full_children; /* KEYLENGTH bits needed */
124 unsigned int empty_children; /* KEYLENGTH bits needed */
Robert Olsson2373ce12005-08-25 13:01:29 -0700125 struct rcu_head rcu;
Olof Johansson91b9a272005-08-09 20:24:39 -0700126 struct node *child[0];
Robert Olsson19baf832005-06-21 12:43:18 -0700127};
128
129#ifdef CONFIG_IP_FIB_TRIE_STATS
130struct trie_use_stats {
131 unsigned int gets;
132 unsigned int backtrack;
133 unsigned int semantic_match_passed;
134 unsigned int semantic_match_miss;
135 unsigned int null_node_hit;
Robert Olsson2f368952005-07-05 15:02:40 -0700136 unsigned int resize_node_skipped;
Robert Olsson19baf832005-06-21 12:43:18 -0700137};
138#endif
139
140struct trie_stat {
141 unsigned int totdepth;
142 unsigned int maxdepth;
143 unsigned int tnodes;
144 unsigned int leaves;
145 unsigned int nullpointers;
Robert Olsson06ef9212006-03-20 21:35:01 -0800146 unsigned int nodesizes[MAX_STAT_DEPTH];
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700147};
Robert Olsson19baf832005-06-21 12:43:18 -0700148
149struct trie {
Olof Johansson91b9a272005-08-09 20:24:39 -0700150 struct node *trie;
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -0800151 unsigned int size;
Robert Olsson19baf832005-06-21 12:43:18 -0700152#ifdef CONFIG_IP_FIB_TRIE_STATS
153 struct trie_use_stats stats;
154#endif
Robert Olsson19baf832005-06-21 12:43:18 -0700155};
156
Robert Olsson19baf832005-06-21 12:43:18 -0700157static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
158static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull);
Robert Olsson19baf832005-06-21 12:43:18 -0700159static struct node *resize(struct trie *t, struct tnode *tn);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700160static struct tnode *inflate(struct trie *t, struct tnode *tn);
161static struct tnode *halve(struct trie *t, struct tnode *tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700162static void tnode_free(struct tnode *tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700163
Christoph Lametere18b8902006-12-06 20:33:20 -0800164static struct kmem_cache *fn_alias_kmem __read_mostly;
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -0800165static struct kmem_cache *trie_leaf_kmem __read_mostly;
Robert Olsson19baf832005-06-21 12:43:18 -0700166
Stephen Hemminger06801912007-08-10 15:22:13 -0700167static inline struct tnode *node_parent(struct node *node)
168{
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800169 return (struct tnode *)(node->parent & ~NODE_TYPE_MASK);
170}
Stephen Hemminger06801912007-08-10 15:22:13 -0700171
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800172static inline struct tnode *node_parent_rcu(struct node *node)
173{
174 struct tnode *ret = node_parent(node);
175
Stephen Hemminger06801912007-08-10 15:22:13 -0700176 return rcu_dereference(ret);
177}
178
179static inline void node_set_parent(struct node *node, struct tnode *ptr)
180{
181 rcu_assign_pointer(node->parent,
182 (unsigned long)ptr | NODE_TYPE(node));
183}
Robert Olsson2373ce12005-08-25 13:01:29 -0700184
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800185static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i)
Robert Olsson19baf832005-06-21 12:43:18 -0700186{
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800187 BUG_ON(i >= 1U << tn->bits);
Robert Olsson19baf832005-06-21 12:43:18 -0700188
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800189 return tn->child[i];
190}
191
192static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i)
193{
194 struct node *ret = tnode_get_child(tn, i);
195
196 return rcu_dereference(ret);
Robert Olsson19baf832005-06-21 12:43:18 -0700197}
198
Stephen Hemmignerbb435b82005-08-09 20:25:39 -0700199static inline int tnode_child_length(const struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700200{
Olof Johansson91b9a272005-08-09 20:24:39 -0700201 return 1 << tn->bits;
Robert Olsson19baf832005-06-21 12:43:18 -0700202}
203
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -0700204static inline t_key mask_pfx(t_key k, unsigned short l)
205{
206 return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);
207}
208
Robert Olsson19baf832005-06-21 12:43:18 -0700209static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
210{
Olof Johansson91b9a272005-08-09 20:24:39 -0700211 if (offset < KEYLENGTH)
Robert Olsson19baf832005-06-21 12:43:18 -0700212 return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
Olof Johansson91b9a272005-08-09 20:24:39 -0700213 else
Robert Olsson19baf832005-06-21 12:43:18 -0700214 return 0;
215}
216
217static inline int tkey_equals(t_key a, t_key b)
218{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700219 return a == b;
Robert Olsson19baf832005-06-21 12:43:18 -0700220}
221
222static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
223{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700224 if (bits == 0 || offset >= KEYLENGTH)
225 return 1;
Olof Johansson91b9a272005-08-09 20:24:39 -0700226 bits = bits > KEYLENGTH ? KEYLENGTH : bits;
227 return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700228}
Robert Olsson19baf832005-06-21 12:43:18 -0700229
230static inline int tkey_mismatch(t_key a, int offset, t_key b)
231{
232 t_key diff = a ^ b;
233 int i = offset;
234
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700235 if (!diff)
236 return 0;
237 while ((diff << i) >> (KEYLENGTH-1) == 0)
Robert Olsson19baf832005-06-21 12:43:18 -0700238 i++;
239 return i;
240}
241
Robert Olsson19baf832005-06-21 12:43:18 -0700242/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900243 To understand this stuff, an understanding of keys and all their bits is
244 necessary. Every node in the trie has a key associated with it, but not
Robert Olsson19baf832005-06-21 12:43:18 -0700245 all of the bits in that key are significant.
246
247 Consider a node 'n' and its parent 'tp'.
248
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900249 If n is a leaf, every bit in its key is significant. Its presence is
250 necessitated by path compression, since during a tree traversal (when
251 searching for a leaf - unless we are doing an insertion) we will completely
252 ignore all skipped bits we encounter. Thus we need to verify, at the end of
253 a potentially successful search, that we have indeed been walking the
Robert Olsson19baf832005-06-21 12:43:18 -0700254 correct key path.
255
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900256 Note that we can never "miss" the correct key in the tree if present by
257 following the wrong path. Path compression ensures that segments of the key
258 that are the same for all keys with a given prefix are skipped, but the
259 skipped part *is* identical for each node in the subtrie below the skipped
260 bit! trie_insert() in this implementation takes care of that - note the
Robert Olsson19baf832005-06-21 12:43:18 -0700261 call to tkey_sub_equals() in trie_insert().
262
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900263 if n is an internal node - a 'tnode' here, the various parts of its key
Robert Olsson19baf832005-06-21 12:43:18 -0700264 have many different meanings.
265
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900266 Example:
Robert Olsson19baf832005-06-21 12:43:18 -0700267 _________________________________________________________________
268 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
269 -----------------------------------------------------------------
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900270 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
Robert Olsson19baf832005-06-21 12:43:18 -0700271
272 _________________________________________________________________
273 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
274 -----------------------------------------------------------------
275 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
276
277 tp->pos = 7
278 tp->bits = 3
279 n->pos = 15
Olof Johansson91b9a272005-08-09 20:24:39 -0700280 n->bits = 4
Robert Olsson19baf832005-06-21 12:43:18 -0700281
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900282 First, let's just ignore the bits that come before the parent tp, that is
283 the bits from 0 to (tp->pos-1). They are *known* but at this point we do
Robert Olsson19baf832005-06-21 12:43:18 -0700284 not use them for anything.
285
286 The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900287 index into the parent's child array. That is, they will be used to find
Robert Olsson19baf832005-06-21 12:43:18 -0700288 'n' among tp's children.
289
290 The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
291 for the node n.
292
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900293 All the bits we have seen so far are significant to the node n. The rest
Robert Olsson19baf832005-06-21 12:43:18 -0700294 of the bits are really not needed or indeed known in n->key.
295
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900296 The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
Robert Olsson19baf832005-06-21 12:43:18 -0700297 n's child array, and will of course be different for each child.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900298
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700299
Robert Olsson19baf832005-06-21 12:43:18 -0700300 The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
301 at this point.
302
303*/
304
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700305static inline void check_tnode(const struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700306{
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700307 WARN_ON(tn && tn->pos+tn->bits > 32);
Robert Olsson19baf832005-06-21 12:43:18 -0700308}
309
Denis V. Lunevf5026fa2007-12-13 09:47:57 -0800310static const int halve_threshold = 25;
311static const int inflate_threshold = 50;
312static const int halve_threshold_root = 8;
313static const int inflate_threshold_root = 15;
Robert Olsson19baf832005-06-21 12:43:18 -0700314
Robert Olsson2373ce12005-08-25 13:01:29 -0700315
316static void __alias_free_mem(struct rcu_head *head)
317{
318 struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
319 kmem_cache_free(fn_alias_kmem, fa);
320}
321
322static inline void alias_free_mem_rcu(struct fib_alias *fa)
323{
324 call_rcu(&fa->rcu, __alias_free_mem);
325}
326
327static void __leaf_free_rcu(struct rcu_head *head)
328{
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -0800329 struct leaf *l = container_of(head, struct leaf, rcu);
330 kmem_cache_free(trie_leaf_kmem, l);
Robert Olsson2373ce12005-08-25 13:01:29 -0700331}
332
Robert Olsson2373ce12005-08-25 13:01:29 -0700333static void __leaf_info_free_rcu(struct rcu_head *head)
334{
335 kfree(container_of(head, struct leaf_info, rcu));
336}
337
338static inline void free_leaf_info(struct leaf_info *leaf)
339{
340 call_rcu(&leaf->rcu, __leaf_info_free_rcu);
341}
342
Eric Dumazet8d9654442008-01-13 22:31:44 -0800343static struct tnode *tnode_alloc(size_t size)
Robert Olsson2373ce12005-08-25 13:01:29 -0700344{
345 struct page *pages;
346
347 if (size <= PAGE_SIZE)
Eric Dumazet8d9654442008-01-13 22:31:44 -0800348 return kzalloc(size, GFP_KERNEL);
Robert Olsson2373ce12005-08-25 13:01:29 -0700349
350 pages = alloc_pages(GFP_KERNEL|__GFP_ZERO, get_order(size));
351 if (!pages)
352 return NULL;
353
354 return page_address(pages);
355}
356
357static void __tnode_free_rcu(struct rcu_head *head)
358{
359 struct tnode *tn = container_of(head, struct tnode, rcu);
Eric Dumazet8d9654442008-01-13 22:31:44 -0800360 size_t size = sizeof(struct tnode) +
361 (sizeof(struct node *) << tn->bits);
Robert Olsson2373ce12005-08-25 13:01:29 -0700362
363 if (size <= PAGE_SIZE)
364 kfree(tn);
365 else
366 free_pages((unsigned long)tn, get_order(size));
367}
368
369static inline void tnode_free(struct tnode *tn)
370{
Stephen Hemminger132adf52007-03-08 20:44:43 -0800371 if (IS_LEAF(tn)) {
Robert Olsson550e29b2006-04-04 12:53:35 -0700372 struct leaf *l = (struct leaf *) tn;
373 call_rcu_bh(&l->rcu, __leaf_free_rcu);
Stephen Hemminger132adf52007-03-08 20:44:43 -0800374 } else
Robert Olsson550e29b2006-04-04 12:53:35 -0700375 call_rcu(&tn->rcu, __tnode_free_rcu);
Robert Olsson2373ce12005-08-25 13:01:29 -0700376}
377
Robert Olsson19baf832005-06-21 12:43:18 -0700378static struct leaf *leaf_new(void)
379{
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -0800380 struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700381 if (l) {
Robert Olsson2373ce12005-08-25 13:01:29 -0700382 l->parent = T_LEAF;
Robert Olsson19baf832005-06-21 12:43:18 -0700383 INIT_HLIST_HEAD(&l->list);
384 }
385 return l;
386}
387
388static struct leaf_info *leaf_info_new(int plen)
389{
390 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
Robert Olsson2373ce12005-08-25 13:01:29 -0700391 if (li) {
392 li->plen = plen;
393 INIT_LIST_HEAD(&li->falh);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700394 }
Robert Olsson2373ce12005-08-25 13:01:29 -0700395 return li;
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700396}
397
Robert Olsson19baf832005-06-21 12:43:18 -0700398static struct tnode* tnode_new(t_key key, int pos, int bits)
399{
Eric Dumazet8d9654442008-01-13 22:31:44 -0800400 size_t sz = sizeof(struct tnode) + (sizeof(struct node *) << bits);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700401 struct tnode *tn = tnode_alloc(sz);
Robert Olsson19baf832005-06-21 12:43:18 -0700402
Olof Johansson91b9a272005-08-09 20:24:39 -0700403 if (tn) {
Robert Olsson2373ce12005-08-25 13:01:29 -0700404 tn->parent = T_TNODE;
Robert Olsson19baf832005-06-21 12:43:18 -0700405 tn->pos = pos;
406 tn->bits = bits;
407 tn->key = key;
408 tn->full_children = 0;
409 tn->empty_children = 1<<bits;
410 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700411
Eric Dumazet8d9654442008-01-13 22:31:44 -0800412 pr_debug("AT %p s=%u %lu\n", tn, (unsigned int) sizeof(struct tnode),
413 (unsigned long) (sizeof(struct node) << bits));
Robert Olsson19baf832005-06-21 12:43:18 -0700414 return tn;
415}
416
Robert Olsson19baf832005-06-21 12:43:18 -0700417/*
418 * Check whether a tnode 'n' is "full", i.e. it is an internal node
419 * and no bits are skipped. See discussion in dyntree paper p. 6
420 */
421
Stephen Hemmignerbb435b82005-08-09 20:25:39 -0700422static inline int tnode_full(const struct tnode *tn, const struct node *n)
Robert Olsson19baf832005-06-21 12:43:18 -0700423{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700424 if (n == NULL || IS_LEAF(n))
Robert Olsson19baf832005-06-21 12:43:18 -0700425 return 0;
426
427 return ((struct tnode *) n)->pos == tn->pos + tn->bits;
428}
429
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700430static inline void put_child(struct trie *t, struct tnode *tn, int i, struct node *n)
Robert Olsson19baf832005-06-21 12:43:18 -0700431{
432 tnode_put_child_reorg(tn, i, n, -1);
433}
434
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700435 /*
Robert Olsson19baf832005-06-21 12:43:18 -0700436 * Add a child at position i overwriting the old value.
437 * Update the value of full_children and empty_children.
438 */
439
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700440static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700441{
Robert Olsson2373ce12005-08-25 13:01:29 -0700442 struct node *chi = tn->child[i];
Robert Olsson19baf832005-06-21 12:43:18 -0700443 int isfull;
444
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700445 BUG_ON(i >= 1<<tn->bits);
446
Robert Olsson19baf832005-06-21 12:43:18 -0700447
448 /* update emptyChildren */
449 if (n == NULL && chi != NULL)
450 tn->empty_children++;
451 else if (n != NULL && chi == NULL)
452 tn->empty_children--;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700453
Robert Olsson19baf832005-06-21 12:43:18 -0700454 /* update fullChildren */
Olof Johansson91b9a272005-08-09 20:24:39 -0700455 if (wasfull == -1)
Robert Olsson19baf832005-06-21 12:43:18 -0700456 wasfull = tnode_full(tn, chi);
457
458 isfull = tnode_full(tn, n);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700459 if (wasfull && !isfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700460 tn->full_children--;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700461 else if (!wasfull && isfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700462 tn->full_children++;
Olof Johansson91b9a272005-08-09 20:24:39 -0700463
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700464 if (n)
Stephen Hemminger06801912007-08-10 15:22:13 -0700465 node_set_parent(n, tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700466
Robert Olsson2373ce12005-08-25 13:01:29 -0700467 rcu_assign_pointer(tn->child[i], n);
Robert Olsson19baf832005-06-21 12:43:18 -0700468}
469
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700470static struct node *resize(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700471{
472 int i;
Robert Olsson2f368952005-07-05 15:02:40 -0700473 int err = 0;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700474 struct tnode *old_tn;
Robert Olssone6308be2005-10-04 13:01:58 -0700475 int inflate_threshold_use;
476 int halve_threshold_use;
Robert Olsson05eee482007-03-19 16:27:37 -0700477 int max_resize;
Robert Olsson19baf832005-06-21 12:43:18 -0700478
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900479 if (!tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700480 return NULL;
481
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700482 pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
483 tn, inflate_threshold, halve_threshold);
Robert Olsson19baf832005-06-21 12:43:18 -0700484
485 /* No children */
486 if (tn->empty_children == tnode_child_length(tn)) {
487 tnode_free(tn);
488 return NULL;
489 }
490 /* One child */
491 if (tn->empty_children == tnode_child_length(tn) - 1)
492 for (i = 0; i < tnode_child_length(tn); i++) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700493 struct node *n;
Robert Olsson19baf832005-06-21 12:43:18 -0700494
Olof Johansson91b9a272005-08-09 20:24:39 -0700495 n = tn->child[i];
Robert Olsson2373ce12005-08-25 13:01:29 -0700496 if (!n)
Olof Johansson91b9a272005-08-09 20:24:39 -0700497 continue;
Olof Johansson91b9a272005-08-09 20:24:39 -0700498
499 /* compress one level */
Stephen Hemminger06801912007-08-10 15:22:13 -0700500 node_set_parent(n, NULL);
Olof Johansson91b9a272005-08-09 20:24:39 -0700501 tnode_free(tn);
502 return n;
Robert Olsson19baf832005-06-21 12:43:18 -0700503 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700504 /*
Robert Olsson19baf832005-06-21 12:43:18 -0700505 * Double as long as the resulting node has a number of
506 * nonempty nodes that are above the threshold.
507 */
508
509 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700510 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
511 * the Helsinki University of Technology and Matti Tikkanen of Nokia
Robert Olsson19baf832005-06-21 12:43:18 -0700512 * Telecommunications, page 6:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700513 * "A node is doubled if the ratio of non-empty children to all
Robert Olsson19baf832005-06-21 12:43:18 -0700514 * children in the *doubled* node is at least 'high'."
515 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700516 * 'high' in this instance is the variable 'inflate_threshold'. It
517 * is expressed as a percentage, so we multiply it with
518 * tnode_child_length() and instead of multiplying by 2 (since the
519 * child array will be doubled by inflate()) and multiplying
520 * the left-hand side by 100 (to handle the percentage thing) we
Robert Olsson19baf832005-06-21 12:43:18 -0700521 * multiply the left-hand side by 50.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700522 *
523 * The left-hand side may look a bit weird: tnode_child_length(tn)
524 * - tn->empty_children is of course the number of non-null children
525 * in the current node. tn->full_children is the number of "full"
Robert Olsson19baf832005-06-21 12:43:18 -0700526 * children, that is non-null tnodes with a skip value of 0.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700527 * All of those will be doubled in the resulting inflated tnode, so
Robert Olsson19baf832005-06-21 12:43:18 -0700528 * we just count them one extra time here.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700529 *
Robert Olsson19baf832005-06-21 12:43:18 -0700530 * A clearer way to write this would be:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700531 *
Robert Olsson19baf832005-06-21 12:43:18 -0700532 * to_be_doubled = tn->full_children;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700533 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
Robert Olsson19baf832005-06-21 12:43:18 -0700534 * tn->full_children;
535 *
536 * new_child_length = tnode_child_length(tn) * 2;
537 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700538 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
Robert Olsson19baf832005-06-21 12:43:18 -0700539 * new_child_length;
540 * if (new_fill_factor >= inflate_threshold)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700541 *
542 * ...and so on, tho it would mess up the while () loop.
543 *
Robert Olsson19baf832005-06-21 12:43:18 -0700544 * anyway,
545 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
546 * inflate_threshold
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700547 *
Robert Olsson19baf832005-06-21 12:43:18 -0700548 * avoid a division:
549 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
550 * inflate_threshold * new_child_length
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700551 *
Robert Olsson19baf832005-06-21 12:43:18 -0700552 * expand not_to_be_doubled and to_be_doubled, and shorten:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700553 * 100 * (tnode_child_length(tn) - tn->empty_children +
Olof Johansson91b9a272005-08-09 20:24:39 -0700554 * tn->full_children) >= inflate_threshold * new_child_length
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700555 *
Robert Olsson19baf832005-06-21 12:43:18 -0700556 * expand new_child_length:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700557 * 100 * (tnode_child_length(tn) - tn->empty_children +
Olof Johansson91b9a272005-08-09 20:24:39 -0700558 * tn->full_children) >=
Robert Olsson19baf832005-06-21 12:43:18 -0700559 * inflate_threshold * tnode_child_length(tn) * 2
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700560 *
Robert Olsson19baf832005-06-21 12:43:18 -0700561 * shorten again:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700562 * 50 * (tn->full_children + tnode_child_length(tn) -
Olof Johansson91b9a272005-08-09 20:24:39 -0700563 * tn->empty_children) >= inflate_threshold *
Robert Olsson19baf832005-06-21 12:43:18 -0700564 * tnode_child_length(tn)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700565 *
Robert Olsson19baf832005-06-21 12:43:18 -0700566 */
567
568 check_tnode(tn);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700569
Robert Olssone6308be2005-10-04 13:01:58 -0700570 /* Keep root node larger */
571
Stephen Hemminger132adf52007-03-08 20:44:43 -0800572 if (!tn->parent)
Robert Olssone6308be2005-10-04 13:01:58 -0700573 inflate_threshold_use = inflate_threshold_root;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900574 else
Robert Olssone6308be2005-10-04 13:01:58 -0700575 inflate_threshold_use = inflate_threshold;
576
Robert Olsson2f368952005-07-05 15:02:40 -0700577 err = 0;
Robert Olsson05eee482007-03-19 16:27:37 -0700578 max_resize = 10;
579 while ((tn->full_children > 0 && max_resize-- &&
Robert Olsson19baf832005-06-21 12:43:18 -0700580 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
Robert Olssone6308be2005-10-04 13:01:58 -0700581 inflate_threshold_use * tnode_child_length(tn))) {
Robert Olsson19baf832005-06-21 12:43:18 -0700582
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700583 old_tn = tn;
584 tn = inflate(t, tn);
585 if (IS_ERR(tn)) {
586 tn = old_tn;
Robert Olsson2f368952005-07-05 15:02:40 -0700587#ifdef CONFIG_IP_FIB_TRIE_STATS
588 t->stats.resize_node_skipped++;
589#endif
590 break;
591 }
Robert Olsson19baf832005-06-21 12:43:18 -0700592 }
593
Robert Olsson05eee482007-03-19 16:27:37 -0700594 if (max_resize < 0) {
595 if (!tn->parent)
596 printk(KERN_WARNING "Fix inflate_threshold_root. Now=%d size=%d bits\n",
597 inflate_threshold_root, tn->bits);
598 else
599 printk(KERN_WARNING "Fix inflate_threshold. Now=%d size=%d bits\n",
600 inflate_threshold, tn->bits);
601 }
602
Robert Olsson19baf832005-06-21 12:43:18 -0700603 check_tnode(tn);
604
605 /*
606 * Halve as long as the number of empty children in this
607 * node is above threshold.
608 */
Robert Olsson2f368952005-07-05 15:02:40 -0700609
Robert Olssone6308be2005-10-04 13:01:58 -0700610
611 /* Keep root node larger */
612
Stephen Hemminger132adf52007-03-08 20:44:43 -0800613 if (!tn->parent)
Robert Olssone6308be2005-10-04 13:01:58 -0700614 halve_threshold_use = halve_threshold_root;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900615 else
Robert Olssone6308be2005-10-04 13:01:58 -0700616 halve_threshold_use = halve_threshold;
617
Robert Olsson2f368952005-07-05 15:02:40 -0700618 err = 0;
Robert Olsson05eee482007-03-19 16:27:37 -0700619 max_resize = 10;
620 while (tn->bits > 1 && max_resize-- &&
Robert Olsson19baf832005-06-21 12:43:18 -0700621 100 * (tnode_child_length(tn) - tn->empty_children) <
Robert Olssone6308be2005-10-04 13:01:58 -0700622 halve_threshold_use * tnode_child_length(tn)) {
Robert Olsson19baf832005-06-21 12:43:18 -0700623
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700624 old_tn = tn;
625 tn = halve(t, tn);
626 if (IS_ERR(tn)) {
627 tn = old_tn;
Robert Olsson2f368952005-07-05 15:02:40 -0700628#ifdef CONFIG_IP_FIB_TRIE_STATS
629 t->stats.resize_node_skipped++;
630#endif
631 break;
632 }
633 }
634
Robert Olsson05eee482007-03-19 16:27:37 -0700635 if (max_resize < 0) {
636 if (!tn->parent)
637 printk(KERN_WARNING "Fix halve_threshold_root. Now=%d size=%d bits\n",
638 halve_threshold_root, tn->bits);
639 else
640 printk(KERN_WARNING "Fix halve_threshold. Now=%d size=%d bits\n",
641 halve_threshold, tn->bits);
642 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700643
Robert Olsson19baf832005-06-21 12:43:18 -0700644 /* Only one child remains */
Robert Olsson19baf832005-06-21 12:43:18 -0700645 if (tn->empty_children == tnode_child_length(tn) - 1)
646 for (i = 0; i < tnode_child_length(tn); i++) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700647 struct node *n;
648
Olof Johansson91b9a272005-08-09 20:24:39 -0700649 n = tn->child[i];
Robert Olsson2373ce12005-08-25 13:01:29 -0700650 if (!n)
Olof Johansson91b9a272005-08-09 20:24:39 -0700651 continue;
Olof Johansson91b9a272005-08-09 20:24:39 -0700652
653 /* compress one level */
654
Stephen Hemminger06801912007-08-10 15:22:13 -0700655 node_set_parent(n, NULL);
Olof Johansson91b9a272005-08-09 20:24:39 -0700656 tnode_free(tn);
657 return n;
Robert Olsson19baf832005-06-21 12:43:18 -0700658 }
659
660 return (struct node *) tn;
661}
662
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700663static struct tnode *inflate(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700664{
Robert Olsson19baf832005-06-21 12:43:18 -0700665 struct tnode *oldtnode = tn;
666 int olen = tnode_child_length(tn);
667 int i;
668
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700669 pr_debug("In inflate\n");
Robert Olsson19baf832005-06-21 12:43:18 -0700670
671 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
672
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700673 if (!tn)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700674 return ERR_PTR(-ENOMEM);
Robert Olsson2f368952005-07-05 15:02:40 -0700675
676 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700677 * Preallocate and store tnodes before the actual work so we
678 * don't get into an inconsistent state if memory allocation
679 * fails. In case of failure we return the oldnode and inflate
Robert Olsson2f368952005-07-05 15:02:40 -0700680 * of tnode is ignored.
681 */
Olof Johansson91b9a272005-08-09 20:24:39 -0700682
683 for (i = 0; i < olen; i++) {
Robert Olsson2f368952005-07-05 15:02:40 -0700684 struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i);
685
686 if (inode &&
687 IS_TNODE(inode) &&
688 inode->pos == oldtnode->pos + oldtnode->bits &&
689 inode->bits > 1) {
690 struct tnode *left, *right;
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -0700691 t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700692
Robert Olsson2f368952005-07-05 15:02:40 -0700693 left = tnode_new(inode->key&(~m), inode->pos + 1,
694 inode->bits - 1);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700695 if (!left)
696 goto nomem;
Olof Johansson91b9a272005-08-09 20:24:39 -0700697
Robert Olsson2f368952005-07-05 15:02:40 -0700698 right = tnode_new(inode->key|m, inode->pos + 1,
699 inode->bits - 1);
700
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900701 if (!right) {
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700702 tnode_free(left);
703 goto nomem;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900704 }
Robert Olsson2f368952005-07-05 15:02:40 -0700705
706 put_child(t, tn, 2*i, (struct node *) left);
707 put_child(t, tn, 2*i+1, (struct node *) right);
708 }
709 }
710
Olof Johansson91b9a272005-08-09 20:24:39 -0700711 for (i = 0; i < olen; i++) {
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -0800712 struct tnode *inode;
Robert Olsson19baf832005-06-21 12:43:18 -0700713 struct node *node = tnode_get_child(oldtnode, i);
Olof Johansson91b9a272005-08-09 20:24:39 -0700714 struct tnode *left, *right;
715 int size, j;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700716
Robert Olsson19baf832005-06-21 12:43:18 -0700717 /* An empty child */
718 if (node == NULL)
719 continue;
720
721 /* A leaf or an internal node with skipped bits */
722
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700723 if (IS_LEAF(node) || ((struct tnode *) node)->pos >
Robert Olsson19baf832005-06-21 12:43:18 -0700724 tn->pos + tn->bits - 1) {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700725 if (tkey_extract_bits(node->key, oldtnode->pos + oldtnode->bits,
Robert Olsson19baf832005-06-21 12:43:18 -0700726 1) == 0)
727 put_child(t, tn, 2*i, node);
728 else
729 put_child(t, tn, 2*i+1, node);
730 continue;
731 }
732
733 /* An internal node with two children */
734 inode = (struct tnode *) node;
735
736 if (inode->bits == 1) {
737 put_child(t, tn, 2*i, inode->child[0]);
738 put_child(t, tn, 2*i+1, inode->child[1]);
739
740 tnode_free(inode);
Olof Johansson91b9a272005-08-09 20:24:39 -0700741 continue;
Robert Olsson19baf832005-06-21 12:43:18 -0700742 }
743
Olof Johansson91b9a272005-08-09 20:24:39 -0700744 /* An internal node with more than two children */
Robert Olsson19baf832005-06-21 12:43:18 -0700745
Olof Johansson91b9a272005-08-09 20:24:39 -0700746 /* We will replace this node 'inode' with two new
747 * ones, 'left' and 'right', each with half of the
748 * original children. The two new nodes will have
749 * a position one bit further down the key and this
750 * means that the "significant" part of their keys
751 * (see the discussion near the top of this file)
752 * will differ by one bit, which will be "0" in
753 * left's key and "1" in right's key. Since we are
754 * moving the key position by one step, the bit that
755 * we are moving away from - the bit at position
756 * (inode->pos) - is the one that will differ between
757 * left and right. So... we synthesize that bit in the
758 * two new keys.
759 * The mask 'm' below will be a single "one" bit at
760 * the position (inode->pos)
761 */
Robert Olsson19baf832005-06-21 12:43:18 -0700762
Olof Johansson91b9a272005-08-09 20:24:39 -0700763 /* Use the old key, but set the new significant
764 * bit to zero.
765 */
Robert Olsson19baf832005-06-21 12:43:18 -0700766
Olof Johansson91b9a272005-08-09 20:24:39 -0700767 left = (struct tnode *) tnode_get_child(tn, 2*i);
768 put_child(t, tn, 2*i, NULL);
Robert Olsson19baf832005-06-21 12:43:18 -0700769
Olof Johansson91b9a272005-08-09 20:24:39 -0700770 BUG_ON(!left);
Robert Olsson2f368952005-07-05 15:02:40 -0700771
Olof Johansson91b9a272005-08-09 20:24:39 -0700772 right = (struct tnode *) tnode_get_child(tn, 2*i+1);
773 put_child(t, tn, 2*i+1, NULL);
Robert Olsson2f368952005-07-05 15:02:40 -0700774
Olof Johansson91b9a272005-08-09 20:24:39 -0700775 BUG_ON(!right);
Robert Olsson2f368952005-07-05 15:02:40 -0700776
Olof Johansson91b9a272005-08-09 20:24:39 -0700777 size = tnode_child_length(left);
778 for (j = 0; j < size; j++) {
779 put_child(t, left, j, inode->child[j]);
780 put_child(t, right, j, inode->child[j + size]);
Robert Olsson19baf832005-06-21 12:43:18 -0700781 }
Olof Johansson91b9a272005-08-09 20:24:39 -0700782 put_child(t, tn, 2*i, resize(t, left));
783 put_child(t, tn, 2*i+1, resize(t, right));
784
785 tnode_free(inode);
Robert Olsson19baf832005-06-21 12:43:18 -0700786 }
787 tnode_free(oldtnode);
788 return tn;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700789nomem:
790 {
791 int size = tnode_child_length(tn);
792 int j;
793
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700794 for (j = 0; j < size; j++)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700795 if (tn->child[j])
796 tnode_free((struct tnode *)tn->child[j]);
797
798 tnode_free(tn);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700799
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700800 return ERR_PTR(-ENOMEM);
801 }
Robert Olsson19baf832005-06-21 12:43:18 -0700802}
803
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700804static struct tnode *halve(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700805{
806 struct tnode *oldtnode = tn;
807 struct node *left, *right;
808 int i;
809 int olen = tnode_child_length(tn);
810
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700811 pr_debug("In halve\n");
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700812
813 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
Robert Olsson19baf832005-06-21 12:43:18 -0700814
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700815 if (!tn)
816 return ERR_PTR(-ENOMEM);
Robert Olsson2f368952005-07-05 15:02:40 -0700817
818 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700819 * Preallocate and store tnodes before the actual work so we
820 * don't get into an inconsistent state if memory allocation
821 * fails. In case of failure we return the oldnode and halve
Robert Olsson2f368952005-07-05 15:02:40 -0700822 * of tnode is ignored.
823 */
824
Olof Johansson91b9a272005-08-09 20:24:39 -0700825 for (i = 0; i < olen; i += 2) {
Robert Olsson2f368952005-07-05 15:02:40 -0700826 left = tnode_get_child(oldtnode, i);
827 right = tnode_get_child(oldtnode, i+1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700828
Robert Olsson2f368952005-07-05 15:02:40 -0700829 /* Two nonempty children */
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700830 if (left && right) {
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700831 struct tnode *newn;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700832
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700833 newn = tnode_new(left->key, tn->pos + tn->bits, 1);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700834
835 if (!newn)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700836 goto nomem;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700837
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700838 put_child(t, tn, i/2, (struct node *)newn);
Robert Olsson2f368952005-07-05 15:02:40 -0700839 }
Robert Olsson2f368952005-07-05 15:02:40 -0700840
Robert Olsson2f368952005-07-05 15:02:40 -0700841 }
Robert Olsson19baf832005-06-21 12:43:18 -0700842
Olof Johansson91b9a272005-08-09 20:24:39 -0700843 for (i = 0; i < olen; i += 2) {
844 struct tnode *newBinNode;
845
Robert Olsson19baf832005-06-21 12:43:18 -0700846 left = tnode_get_child(oldtnode, i);
847 right = tnode_get_child(oldtnode, i+1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700848
Robert Olsson19baf832005-06-21 12:43:18 -0700849 /* At least one of the children is empty */
850 if (left == NULL) {
851 if (right == NULL) /* Both are empty */
852 continue;
853 put_child(t, tn, i/2, right);
Olof Johansson91b9a272005-08-09 20:24:39 -0700854 continue;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700855 }
Olof Johansson91b9a272005-08-09 20:24:39 -0700856
857 if (right == NULL) {
Robert Olsson19baf832005-06-21 12:43:18 -0700858 put_child(t, tn, i/2, left);
Olof Johansson91b9a272005-08-09 20:24:39 -0700859 continue;
860 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700861
Robert Olsson19baf832005-06-21 12:43:18 -0700862 /* Two nonempty children */
Olof Johansson91b9a272005-08-09 20:24:39 -0700863 newBinNode = (struct tnode *) tnode_get_child(tn, i/2);
864 put_child(t, tn, i/2, NULL);
Olof Johansson91b9a272005-08-09 20:24:39 -0700865 put_child(t, newBinNode, 0, left);
866 put_child(t, newBinNode, 1, right);
867 put_child(t, tn, i/2, resize(t, newBinNode));
Robert Olsson19baf832005-06-21 12:43:18 -0700868 }
869 tnode_free(oldtnode);
870 return tn;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700871nomem:
872 {
873 int size = tnode_child_length(tn);
874 int j;
875
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700876 for (j = 0; j < size; j++)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700877 if (tn->child[j])
878 tnode_free((struct tnode *)tn->child[j]);
879
880 tnode_free(tn);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700881
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700882 return ERR_PTR(-ENOMEM);
883 }
Robert Olsson19baf832005-06-21 12:43:18 -0700884}
885
Robert Olsson772cb712005-09-19 15:31:18 -0700886/* readside must use rcu_read_lock currently dump routines
Robert Olsson2373ce12005-08-25 13:01:29 -0700887 via get_fa_head and dump */
888
Robert Olsson772cb712005-09-19 15:31:18 -0700889static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700890{
Robert Olsson772cb712005-09-19 15:31:18 -0700891 struct hlist_head *head = &l->list;
Robert Olsson19baf832005-06-21 12:43:18 -0700892 struct hlist_node *node;
893 struct leaf_info *li;
894
Robert Olsson2373ce12005-08-25 13:01:29 -0700895 hlist_for_each_entry_rcu(li, node, head, hlist)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700896 if (li->plen == plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700897 return li;
Olof Johansson91b9a272005-08-09 20:24:39 -0700898
Robert Olsson19baf832005-06-21 12:43:18 -0700899 return NULL;
900}
901
902static inline struct list_head * get_fa_head(struct leaf *l, int plen)
903{
Robert Olsson772cb712005-09-19 15:31:18 -0700904 struct leaf_info *li = find_leaf_info(l, plen);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700905
Olof Johansson91b9a272005-08-09 20:24:39 -0700906 if (!li)
907 return NULL;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700908
Olof Johansson91b9a272005-08-09 20:24:39 -0700909 return &li->falh;
Robert Olsson19baf832005-06-21 12:43:18 -0700910}
911
912static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
913{
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900914 struct leaf_info *li = NULL, *last = NULL;
915 struct hlist_node *node;
Robert Olsson19baf832005-06-21 12:43:18 -0700916
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900917 if (hlist_empty(head)) {
918 hlist_add_head_rcu(&new->hlist, head);
919 } else {
920 hlist_for_each_entry(li, node, head, hlist) {
921 if (new->plen > li->plen)
922 break;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700923
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900924 last = li;
925 }
926 if (last)
927 hlist_add_after_rcu(&last->hlist, &new->hlist);
928 else
929 hlist_add_before_rcu(&new->hlist, &li->hlist);
930 }
Robert Olsson19baf832005-06-21 12:43:18 -0700931}
932
Robert Olsson2373ce12005-08-25 13:01:29 -0700933/* rcu_read_lock needs to be hold by caller from readside */
934
Robert Olsson19baf832005-06-21 12:43:18 -0700935static struct leaf *
936fib_find_node(struct trie *t, u32 key)
937{
938 int pos;
939 struct tnode *tn;
940 struct node *n;
941
942 pos = 0;
Robert Olsson2373ce12005-08-25 13:01:29 -0700943 n = rcu_dereference(t->trie);
Robert Olsson19baf832005-06-21 12:43:18 -0700944
945 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
946 tn = (struct tnode *) n;
Olof Johansson91b9a272005-08-09 20:24:39 -0700947
Robert Olsson19baf832005-06-21 12:43:18 -0700948 check_tnode(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -0700949
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700950 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700951 pos = tn->pos + tn->bits;
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800952 n = tnode_get_child_rcu(tn, tkey_extract_bits(key, tn->pos, tn->bits));
Olof Johansson91b9a272005-08-09 20:24:39 -0700953 } else
Robert Olsson19baf832005-06-21 12:43:18 -0700954 break;
955 }
956 /* Case we have found a leaf. Compare prefixes */
957
Olof Johansson91b9a272005-08-09 20:24:39 -0700958 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key))
959 return (struct leaf *)n;
960
Robert Olsson19baf832005-06-21 12:43:18 -0700961 return NULL;
962}
963
964static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
965{
Robert Olsson19baf832005-06-21 12:43:18 -0700966 int wasfull;
Stephen Hemminger06801912007-08-10 15:22:13 -0700967 t_key cindex, key = tn->key;
968 struct tnode *tp;
Robert Olsson19baf832005-06-21 12:43:18 -0700969
Stephen Hemminger06801912007-08-10 15:22:13 -0700970 while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) {
Robert Olsson19baf832005-06-21 12:43:18 -0700971 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
972 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
973 tn = (struct tnode *) resize (t, (struct tnode *)tn);
974 tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull);
Olof Johansson91b9a272005-08-09 20:24:39 -0700975
Stephen Hemminger06801912007-08-10 15:22:13 -0700976 tp = node_parent((struct node *) tn);
977 if (!tp)
Robert Olsson19baf832005-06-21 12:43:18 -0700978 break;
Stephen Hemminger06801912007-08-10 15:22:13 -0700979 tn = tp;
Robert Olsson19baf832005-06-21 12:43:18 -0700980 }
Stephen Hemminger06801912007-08-10 15:22:13 -0700981
Robert Olsson19baf832005-06-21 12:43:18 -0700982 /* Handle last (top) tnode */
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700983 if (IS_TNODE(tn))
Robert Olsson19baf832005-06-21 12:43:18 -0700984 tn = (struct tnode*) resize(t, (struct tnode *)tn);
985
986 return (struct node*) tn;
987}
988
Robert Olsson2373ce12005-08-25 13:01:29 -0700989/* only used from updater-side */
990
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -0800991static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700992{
993 int pos, newpos;
994 struct tnode *tp = NULL, *tn = NULL;
995 struct node *n;
996 struct leaf *l;
997 int missbit;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700998 struct list_head *fa_head = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -0700999 struct leaf_info *li;
1000 t_key cindex;
1001
1002 pos = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001003 n = t->trie;
Robert Olsson19baf832005-06-21 12:43:18 -07001004
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001005 /* If we point to NULL, stop. Either the tree is empty and we should
1006 * just put a new leaf in if, or we have reached an empty child slot,
Robert Olsson19baf832005-06-21 12:43:18 -07001007 * and we should just put our new leaf in that.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001008 * If we point to a T_TNODE, check if it matches our key. Note that
1009 * a T_TNODE might be skipping any number of bits - its 'pos' need
Robert Olsson19baf832005-06-21 12:43:18 -07001010 * not be the parent's 'pos'+'bits'!
1011 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001012 * If it does match the current key, get pos/bits from it, extract
Robert Olsson19baf832005-06-21 12:43:18 -07001013 * the index from our key, push the T_TNODE and walk the tree.
1014 *
1015 * If it doesn't, we have to replace it with a new T_TNODE.
1016 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001017 * If we point to a T_LEAF, it might or might not have the same key
1018 * as we do. If it does, just change the value, update the T_LEAF's
1019 * value, and return it.
Robert Olsson19baf832005-06-21 12:43:18 -07001020 * If it doesn't, we need to replace it with a T_TNODE.
1021 */
1022
1023 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
1024 tn = (struct tnode *) n;
Olof Johansson91b9a272005-08-09 20:24:39 -07001025
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001026 check_tnode(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001027
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001028 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
Robert Olsson19baf832005-06-21 12:43:18 -07001029 tp = tn;
Olof Johansson91b9a272005-08-09 20:24:39 -07001030 pos = tn->pos + tn->bits;
Robert Olsson19baf832005-06-21 12:43:18 -07001031 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
1032
Stephen Hemminger06801912007-08-10 15:22:13 -07001033 BUG_ON(n && node_parent(n) != tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001034 } else
Robert Olsson19baf832005-06-21 12:43:18 -07001035 break;
1036 }
1037
1038 /*
1039 * n ----> NULL, LEAF or TNODE
1040 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001041 * tp is n's (parent) ----> NULL or TNODE
Robert Olsson19baf832005-06-21 12:43:18 -07001042 */
1043
Olof Johansson91b9a272005-08-09 20:24:39 -07001044 BUG_ON(tp && IS_LEAF(tp));
Robert Olsson19baf832005-06-21 12:43:18 -07001045
1046 /* Case 1: n is a leaf. Compare prefixes */
1047
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001048 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -08001049 l = (struct leaf *) n;
Robert Olsson19baf832005-06-21 12:43:18 -07001050 li = leaf_info_new(plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001051
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001052 if (!li)
1053 return NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001054
1055 fa_head = &li->falh;
1056 insert_leaf_info(&l->list, li);
1057 goto done;
1058 }
Robert Olsson19baf832005-06-21 12:43:18 -07001059 l = leaf_new();
1060
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001061 if (!l)
1062 return NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001063
1064 l->key = key;
1065 li = leaf_info_new(plen);
1066
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001067 if (!li) {
Robert Olssonf835e472005-06-28 15:00:39 -07001068 tnode_free((struct tnode *) l);
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001069 return NULL;
Robert Olssonf835e472005-06-28 15:00:39 -07001070 }
Robert Olsson19baf832005-06-21 12:43:18 -07001071
1072 fa_head = &li->falh;
1073 insert_leaf_info(&l->list, li);
1074
Robert Olsson19baf832005-06-21 12:43:18 -07001075 if (t->trie && n == NULL) {
Olof Johansson91b9a272005-08-09 20:24:39 -07001076 /* Case 2: n is NULL, and will just insert a new leaf */
Robert Olsson19baf832005-06-21 12:43:18 -07001077
Stephen Hemminger06801912007-08-10 15:22:13 -07001078 node_set_parent((struct node *)l, tp);
Robert Olsson19baf832005-06-21 12:43:18 -07001079
Olof Johansson91b9a272005-08-09 20:24:39 -07001080 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1081 put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
1082 } else {
1083 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001084 /*
1085 * Add a new tnode here
Robert Olsson19baf832005-06-21 12:43:18 -07001086 * first tnode need some special handling
1087 */
1088
1089 if (tp)
Olof Johansson91b9a272005-08-09 20:24:39 -07001090 pos = tp->pos+tp->bits;
Robert Olsson19baf832005-06-21 12:43:18 -07001091 else
Olof Johansson91b9a272005-08-09 20:24:39 -07001092 pos = 0;
1093
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001094 if (n) {
Robert Olsson19baf832005-06-21 12:43:18 -07001095 newpos = tkey_mismatch(key, pos, n->key);
1096 tn = tnode_new(n->key, newpos, 1);
Olof Johansson91b9a272005-08-09 20:24:39 -07001097 } else {
Robert Olsson19baf832005-06-21 12:43:18 -07001098 newpos = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001099 tn = tnode_new(key, newpos, 1); /* First tnode */
Robert Olsson19baf832005-06-21 12:43:18 -07001100 }
Robert Olsson19baf832005-06-21 12:43:18 -07001101
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001102 if (!tn) {
Robert Olssonf835e472005-06-28 15:00:39 -07001103 free_leaf_info(li);
1104 tnode_free((struct tnode *) l);
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001105 return NULL;
Olof Johansson91b9a272005-08-09 20:24:39 -07001106 }
1107
Stephen Hemminger06801912007-08-10 15:22:13 -07001108 node_set_parent((struct node *)tn, tp);
Robert Olsson19baf832005-06-21 12:43:18 -07001109
Olof Johansson91b9a272005-08-09 20:24:39 -07001110 missbit = tkey_extract_bits(key, newpos, 1);
Robert Olsson19baf832005-06-21 12:43:18 -07001111 put_child(t, tn, missbit, (struct node *)l);
1112 put_child(t, tn, 1-missbit, n);
1113
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001114 if (tp) {
Robert Olsson19baf832005-06-21 12:43:18 -07001115 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1116 put_child(t, (struct tnode *)tp, cindex, (struct node *)tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001117 } else {
Robert Olsson2373ce12005-08-25 13:01:29 -07001118 rcu_assign_pointer(t->trie, (struct node *)tn); /* First tnode */
Robert Olsson19baf832005-06-21 12:43:18 -07001119 tp = tn;
1120 }
1121 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001122
1123 if (tp && tp->pos + tp->bits > 32)
Stephen Hemminger78c66712005-09-21 00:15:39 -07001124 printk(KERN_WARNING "fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
Robert Olsson19baf832005-06-21 12:43:18 -07001125 tp, tp->pos, tp->bits, key, plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001126
Robert Olsson19baf832005-06-21 12:43:18 -07001127 /* Rebalance the trie */
Robert Olsson2373ce12005-08-25 13:01:29 -07001128
1129 rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
Robert Olssonf835e472005-06-28 15:00:39 -07001130done:
Robert Olsson19baf832005-06-21 12:43:18 -07001131 return fa_head;
1132}
1133
Robert Olssond562f1f2007-03-26 14:22:22 -07001134/*
1135 * Caller must hold RTNL.
1136 */
Thomas Graf4e902c52006-08-17 18:14:52 -07001137static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
Robert Olsson19baf832005-06-21 12:43:18 -07001138{
1139 struct trie *t = (struct trie *) tb->tb_data;
1140 struct fib_alias *fa, *new_fa;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001141 struct list_head *fa_head = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001142 struct fib_info *fi;
Thomas Graf4e902c52006-08-17 18:14:52 -07001143 int plen = cfg->fc_dst_len;
1144 u8 tos = cfg->fc_tos;
Robert Olsson19baf832005-06-21 12:43:18 -07001145 u32 key, mask;
1146 int err;
1147 struct leaf *l;
1148
1149 if (plen > 32)
1150 return -EINVAL;
1151
Thomas Graf4e902c52006-08-17 18:14:52 -07001152 key = ntohl(cfg->fc_dst);
Robert Olsson19baf832005-06-21 12:43:18 -07001153
Patrick McHardy2dfe55b2006-08-10 23:08:33 -07001154 pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
Robert Olsson19baf832005-06-21 12:43:18 -07001155
Olof Johansson91b9a272005-08-09 20:24:39 -07001156 mask = ntohl(inet_make_mask(plen));
Robert Olsson19baf832005-06-21 12:43:18 -07001157
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001158 if (key & ~mask)
Robert Olsson19baf832005-06-21 12:43:18 -07001159 return -EINVAL;
1160
1161 key = key & mask;
1162
Thomas Graf4e902c52006-08-17 18:14:52 -07001163 fi = fib_create_info(cfg);
1164 if (IS_ERR(fi)) {
1165 err = PTR_ERR(fi);
Robert Olsson19baf832005-06-21 12:43:18 -07001166 goto err;
Thomas Graf4e902c52006-08-17 18:14:52 -07001167 }
Robert Olsson19baf832005-06-21 12:43:18 -07001168
1169 l = fib_find_node(t, key);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001170 fa = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001171
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001172 if (l) {
Robert Olsson19baf832005-06-21 12:43:18 -07001173 fa_head = get_fa_head(l, plen);
1174 fa = fib_find_alias(fa_head, tos, fi->fib_priority);
1175 }
1176
1177 /* Now fa, if non-NULL, points to the first fib alias
1178 * with the same keys [prefix,tos,priority], if such key already
1179 * exists or to the node before which we will insert new one.
1180 *
1181 * If fa is NULL, we will need to allocate a new one and
1182 * insert to the head of f.
1183 *
1184 * If f is NULL, no fib node matched the destination key
1185 * and we need to allocate a new one of those as well.
1186 */
1187
Olof Johansson91b9a272005-08-09 20:24:39 -07001188 if (fa && fa->fa_info->fib_priority == fi->fib_priority) {
Robert Olsson19baf832005-06-21 12:43:18 -07001189 struct fib_alias *fa_orig;
1190
1191 err = -EEXIST;
Thomas Graf4e902c52006-08-17 18:14:52 -07001192 if (cfg->fc_nlflags & NLM_F_EXCL)
Robert Olsson19baf832005-06-21 12:43:18 -07001193 goto out;
1194
Thomas Graf4e902c52006-08-17 18:14:52 -07001195 if (cfg->fc_nlflags & NLM_F_REPLACE) {
Robert Olsson19baf832005-06-21 12:43:18 -07001196 struct fib_info *fi_drop;
1197 u8 state;
1198
Joonwoo Park67250332008-01-18 03:45:18 -08001199 if (fi->fib_treeref > 1)
1200 goto out;
1201
Robert Olsson2373ce12005-08-25 13:01:29 -07001202 err = -ENOBUFS;
Christoph Lametere94b1762006-12-06 20:33:17 -08001203 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
Robert Olsson2373ce12005-08-25 13:01:29 -07001204 if (new_fa == NULL)
1205 goto out;
Robert Olsson19baf832005-06-21 12:43:18 -07001206
1207 fi_drop = fa->fa_info;
Robert Olsson2373ce12005-08-25 13:01:29 -07001208 new_fa->fa_tos = fa->fa_tos;
1209 new_fa->fa_info = fi;
Thomas Graf4e902c52006-08-17 18:14:52 -07001210 new_fa->fa_type = cfg->fc_type;
1211 new_fa->fa_scope = cfg->fc_scope;
Robert Olsson19baf832005-06-21 12:43:18 -07001212 state = fa->fa_state;
Robert Olsson2373ce12005-08-25 13:01:29 -07001213 new_fa->fa_state &= ~FA_S_ACCESSED;
Robert Olsson19baf832005-06-21 12:43:18 -07001214
Robert Olsson2373ce12005-08-25 13:01:29 -07001215 list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
1216 alias_free_mem_rcu(fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001217
1218 fib_release_info(fi_drop);
1219 if (state & FA_S_ACCESSED)
Olof Johansson91b9a272005-08-09 20:24:39 -07001220 rt_cache_flush(-1);
Milan Kocianb8f55832007-05-23 14:55:06 -07001221 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
1222 tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
Robert Olsson19baf832005-06-21 12:43:18 -07001223
Olof Johansson91b9a272005-08-09 20:24:39 -07001224 goto succeeded;
Robert Olsson19baf832005-06-21 12:43:18 -07001225 }
1226 /* Error if we find a perfect match which
1227 * uses the same scope, type, and nexthop
1228 * information.
1229 */
1230 fa_orig = fa;
1231 list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) {
1232 if (fa->fa_tos != tos)
1233 break;
1234 if (fa->fa_info->fib_priority != fi->fib_priority)
1235 break;
Thomas Graf4e902c52006-08-17 18:14:52 -07001236 if (fa->fa_type == cfg->fc_type &&
1237 fa->fa_scope == cfg->fc_scope &&
Robert Olsson19baf832005-06-21 12:43:18 -07001238 fa->fa_info == fi) {
1239 goto out;
1240 }
1241 }
Thomas Graf4e902c52006-08-17 18:14:52 -07001242 if (!(cfg->fc_nlflags & NLM_F_APPEND))
Robert Olsson19baf832005-06-21 12:43:18 -07001243 fa = fa_orig;
1244 }
1245 err = -ENOENT;
Thomas Graf4e902c52006-08-17 18:14:52 -07001246 if (!(cfg->fc_nlflags & NLM_F_CREATE))
Robert Olsson19baf832005-06-21 12:43:18 -07001247 goto out;
1248
1249 err = -ENOBUFS;
Christoph Lametere94b1762006-12-06 20:33:17 -08001250 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
Robert Olsson19baf832005-06-21 12:43:18 -07001251 if (new_fa == NULL)
1252 goto out;
1253
1254 new_fa->fa_info = fi;
1255 new_fa->fa_tos = tos;
Thomas Graf4e902c52006-08-17 18:14:52 -07001256 new_fa->fa_type = cfg->fc_type;
1257 new_fa->fa_scope = cfg->fc_scope;
Robert Olsson19baf832005-06-21 12:43:18 -07001258 new_fa->fa_state = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001259 /*
1260 * Insert new entry to the list.
1261 */
1262
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001263 if (!fa_head) {
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001264 fa_head = fib_insert_node(t, key, plen);
1265 if (unlikely(!fa_head)) {
1266 err = -ENOMEM;
Robert Olssonf835e472005-06-28 15:00:39 -07001267 goto out_free_new_fa;
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001268 }
Robert Olssonf835e472005-06-28 15:00:39 -07001269 }
Robert Olsson19baf832005-06-21 12:43:18 -07001270
Robert Olsson2373ce12005-08-25 13:01:29 -07001271 list_add_tail_rcu(&new_fa->fa_list,
1272 (fa ? &fa->fa_list : fa_head));
Robert Olsson19baf832005-06-21 12:43:18 -07001273
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08001274 t->size++;
1275
Robert Olsson19baf832005-06-21 12:43:18 -07001276 rt_cache_flush(-1);
Thomas Graf4e902c52006-08-17 18:14:52 -07001277 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
Milan Kocianb8f55832007-05-23 14:55:06 -07001278 &cfg->fc_nlinfo, 0);
Robert Olsson19baf832005-06-21 12:43:18 -07001279succeeded:
1280 return 0;
Robert Olssonf835e472005-06-28 15:00:39 -07001281
1282out_free_new_fa:
1283 kmem_cache_free(fn_alias_kmem, new_fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001284out:
1285 fib_release_info(fi);
Olof Johansson91b9a272005-08-09 20:24:39 -07001286err:
Robert Olsson19baf832005-06-21 12:43:18 -07001287 return err;
1288}
1289
Robert Olsson2373ce12005-08-25 13:01:29 -07001290
Robert Olsson772cb712005-09-19 15:31:18 -07001291/* should be called with rcu_read_lock */
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001292static inline int check_leaf(struct trie *t, struct leaf *l,
1293 t_key key, int *plen, const struct flowi *flp,
Patrick McHardy06c74272005-08-23 22:06:09 -07001294 struct fib_result *res)
Robert Olsson19baf832005-06-21 12:43:18 -07001295{
Patrick McHardy06c74272005-08-23 22:06:09 -07001296 int err, i;
Al Viro888454c2006-09-19 13:42:46 -07001297 __be32 mask;
Robert Olsson19baf832005-06-21 12:43:18 -07001298 struct leaf_info *li;
1299 struct hlist_head *hhead = &l->list;
1300 struct hlist_node *node;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001301
Robert Olsson2373ce12005-08-25 13:01:29 -07001302 hlist_for_each_entry_rcu(li, node, hhead, hlist) {
Robert Olsson19baf832005-06-21 12:43:18 -07001303 i = li->plen;
Al Viro888454c2006-09-19 13:42:46 -07001304 mask = inet_make_mask(i);
1305 if (l->key != (key & ntohl(mask)))
Robert Olsson19baf832005-06-21 12:43:18 -07001306 continue;
1307
Al Viro888454c2006-09-19 13:42:46 -07001308 if ((err = fib_semantic_match(&li->falh, flp, res, htonl(l->key), mask, i)) <= 0) {
Robert Olsson19baf832005-06-21 12:43:18 -07001309 *plen = i;
1310#ifdef CONFIG_IP_FIB_TRIE_STATS
1311 t->stats.semantic_match_passed++;
1312#endif
Patrick McHardy06c74272005-08-23 22:06:09 -07001313 return err;
Robert Olsson19baf832005-06-21 12:43:18 -07001314 }
1315#ifdef CONFIG_IP_FIB_TRIE_STATS
1316 t->stats.semantic_match_miss++;
1317#endif
1318 }
Patrick McHardy06c74272005-08-23 22:06:09 -07001319 return 1;
Robert Olsson19baf832005-06-21 12:43:18 -07001320}
1321
1322static int
1323fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1324{
1325 struct trie *t = (struct trie *) tb->tb_data;
1326 int plen, ret = 0;
1327 struct node *n;
1328 struct tnode *pn;
1329 int pos, bits;
Olof Johansson91b9a272005-08-09 20:24:39 -07001330 t_key key = ntohl(flp->fl4_dst);
Robert Olsson19baf832005-06-21 12:43:18 -07001331 int chopped_off;
1332 t_key cindex = 0;
1333 int current_prefix_length = KEYLENGTH;
Olof Johansson91b9a272005-08-09 20:24:39 -07001334 struct tnode *cn;
1335 t_key node_prefix, key_prefix, pref_mismatch;
1336 int mp;
1337
Robert Olsson2373ce12005-08-25 13:01:29 -07001338 rcu_read_lock();
Robert Olsson19baf832005-06-21 12:43:18 -07001339
Robert Olsson2373ce12005-08-25 13:01:29 -07001340 n = rcu_dereference(t->trie);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001341 if (!n)
Robert Olsson19baf832005-06-21 12:43:18 -07001342 goto failed;
1343
1344#ifdef CONFIG_IP_FIB_TRIE_STATS
1345 t->stats.gets++;
1346#endif
1347
1348 /* Just a leaf? */
1349 if (IS_LEAF(n)) {
Patrick McHardy06c74272005-08-23 22:06:09 -07001350 if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
Robert Olsson19baf832005-06-21 12:43:18 -07001351 goto found;
1352 goto failed;
1353 }
1354 pn = (struct tnode *) n;
1355 chopped_off = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001356
Olof Johansson91b9a272005-08-09 20:24:39 -07001357 while (pn) {
Robert Olsson19baf832005-06-21 12:43:18 -07001358 pos = pn->pos;
1359 bits = pn->bits;
1360
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001361 if (!chopped_off)
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -07001362 cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length),
1363 pos, bits);
Robert Olsson19baf832005-06-21 12:43:18 -07001364
1365 n = tnode_get_child(pn, cindex);
1366
1367 if (n == NULL) {
1368#ifdef CONFIG_IP_FIB_TRIE_STATS
1369 t->stats.null_node_hit++;
1370#endif
1371 goto backtrace;
1372 }
1373
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001374 if (IS_LEAF(n)) {
Patrick McHardy06c74272005-08-23 22:06:09 -07001375 if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
Robert Olsson19baf832005-06-21 12:43:18 -07001376 goto found;
Olof Johansson91b9a272005-08-09 20:24:39 -07001377 else
1378 goto backtrace;
1379 }
1380
1381#define HL_OPTIMIZE
1382#ifdef HL_OPTIMIZE
1383 cn = (struct tnode *)n;
1384
1385 /*
1386 * It's a tnode, and we can do some extra checks here if we
1387 * like, to avoid descending into a dead-end branch.
1388 * This tnode is in the parent's child array at index
1389 * key[p_pos..p_pos+p_bits] but potentially with some bits
1390 * chopped off, so in reality the index may be just a
1391 * subprefix, padded with zero at the end.
1392 * We can also take a look at any skipped bits in this
1393 * tnode - everything up to p_pos is supposed to be ok,
1394 * and the non-chopped bits of the index (se previous
1395 * paragraph) are also guaranteed ok, but the rest is
1396 * considered unknown.
1397 *
1398 * The skipped bits are key[pos+bits..cn->pos].
1399 */
1400
1401 /* If current_prefix_length < pos+bits, we are already doing
1402 * actual prefix matching, which means everything from
1403 * pos+(bits-chopped_off) onward must be zero along some
1404 * branch of this subtree - otherwise there is *no* valid
1405 * prefix present. Here we can only check the skipped
1406 * bits. Remember, since we have already indexed into the
1407 * parent's child array, we know that the bits we chopped of
1408 * *are* zero.
1409 */
1410
1411 /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */
1412
1413 if (current_prefix_length < pos+bits) {
1414 if (tkey_extract_bits(cn->key, current_prefix_length,
1415 cn->pos - current_prefix_length) != 0 ||
1416 !(cn->child[0]))
1417 goto backtrace;
1418 }
1419
1420 /*
1421 * If chopped_off=0, the index is fully validated and we
1422 * only need to look at the skipped bits for this, the new,
1423 * tnode. What we actually want to do is to find out if
1424 * these skipped bits match our key perfectly, or if we will
1425 * have to count on finding a matching prefix further down,
1426 * because if we do, we would like to have some way of
1427 * verifying the existence of such a prefix at this point.
1428 */
1429
1430 /* The only thing we can do at this point is to verify that
1431 * any such matching prefix can indeed be a prefix to our
1432 * key, and if the bits in the node we are inspecting that
1433 * do not match our key are not ZERO, this cannot be true.
1434 * Thus, find out where there is a mismatch (before cn->pos)
1435 * and verify that all the mismatching bits are zero in the
1436 * new tnode's key.
1437 */
1438
1439 /* Note: We aren't very concerned about the piece of the key
1440 * that precede pn->pos+pn->bits, since these have already been
1441 * checked. The bits after cn->pos aren't checked since these are
1442 * by definition "unknown" at this point. Thus, what we want to
1443 * see is if we are about to enter the "prefix matching" state,
1444 * and in that case verify that the skipped bits that will prevail
1445 * throughout this subtree are zero, as they have to be if we are
1446 * to find a matching prefix.
1447 */
1448
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -07001449 node_prefix = mask_pfx(cn->key, cn->pos);
1450 key_prefix = mask_pfx(key, cn->pos);
Olof Johansson91b9a272005-08-09 20:24:39 -07001451 pref_mismatch = key_prefix^node_prefix;
1452 mp = 0;
1453
1454 /* In short: If skipped bits in this node do not match the search
1455 * key, enter the "prefix matching" state.directly.
1456 */
1457 if (pref_mismatch) {
1458 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
1459 mp++;
1460 pref_mismatch = pref_mismatch <<1;
1461 }
1462 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
1463
1464 if (key_prefix != 0)
1465 goto backtrace;
1466
1467 if (current_prefix_length >= cn->pos)
1468 current_prefix_length = mp;
1469 }
1470#endif
1471 pn = (struct tnode *)n; /* Descend */
1472 chopped_off = 0;
1473 continue;
1474
Robert Olsson19baf832005-06-21 12:43:18 -07001475backtrace:
1476 chopped_off++;
1477
1478 /* As zero don't change the child key (cindex) */
Olof Johansson91b9a272005-08-09 20:24:39 -07001479 while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1))))
Robert Olsson19baf832005-06-21 12:43:18 -07001480 chopped_off++;
Robert Olsson19baf832005-06-21 12:43:18 -07001481
1482 /* Decrease current_... with bits chopped off */
1483 if (current_prefix_length > pn->pos + pn->bits - chopped_off)
1484 current_prefix_length = pn->pos + pn->bits - chopped_off;
Olof Johansson91b9a272005-08-09 20:24:39 -07001485
Robert Olsson19baf832005-06-21 12:43:18 -07001486 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001487 * Either we do the actual chop off according or if we have
Robert Olsson19baf832005-06-21 12:43:18 -07001488 * chopped off all bits in this tnode walk up to our parent.
1489 */
1490
Olof Johansson91b9a272005-08-09 20:24:39 -07001491 if (chopped_off <= pn->bits) {
Robert Olsson19baf832005-06-21 12:43:18 -07001492 cindex &= ~(1 << (chopped_off-1));
Olof Johansson91b9a272005-08-09 20:24:39 -07001493 } else {
Stephen Hemminger06801912007-08-10 15:22:13 -07001494 struct tnode *parent = node_parent((struct node *) pn);
1495 if (!parent)
Robert Olsson19baf832005-06-21 12:43:18 -07001496 goto failed;
Olof Johansson91b9a272005-08-09 20:24:39 -07001497
Robert Olsson19baf832005-06-21 12:43:18 -07001498 /* Get Child's index */
Stephen Hemminger06801912007-08-10 15:22:13 -07001499 cindex = tkey_extract_bits(pn->key, parent->pos, parent->bits);
1500 pn = parent;
Robert Olsson19baf832005-06-21 12:43:18 -07001501 chopped_off = 0;
1502
1503#ifdef CONFIG_IP_FIB_TRIE_STATS
1504 t->stats.backtrack++;
1505#endif
1506 goto backtrace;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001507 }
Robert Olsson19baf832005-06-21 12:43:18 -07001508 }
1509failed:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001510 ret = 1;
Robert Olsson19baf832005-06-21 12:43:18 -07001511found:
Robert Olsson2373ce12005-08-25 13:01:29 -07001512 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07001513 return ret;
1514}
1515
Robert Olsson2373ce12005-08-25 13:01:29 -07001516/* only called from updater side */
Robert Olsson19baf832005-06-21 12:43:18 -07001517static int trie_leaf_remove(struct trie *t, t_key key)
1518{
1519 t_key cindex;
1520 struct tnode *tp = NULL;
1521 struct node *n = t->trie;
1522 struct leaf *l;
1523
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001524 pr_debug("entering trie_leaf_remove(%p)\n", n);
Robert Olsson19baf832005-06-21 12:43:18 -07001525
1526 /* Note that in the case skipped bits, those bits are *not* checked!
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001527 * When we finish this, we will have NULL or a T_LEAF, and the
Robert Olsson19baf832005-06-21 12:43:18 -07001528 * T_LEAF may or may not match our key.
1529 */
1530
Olof Johansson91b9a272005-08-09 20:24:39 -07001531 while (n != NULL && IS_TNODE(n)) {
Robert Olsson19baf832005-06-21 12:43:18 -07001532 struct tnode *tn = (struct tnode *) n;
1533 check_tnode(tn);
1534 n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits));
1535
Stephen Hemminger06801912007-08-10 15:22:13 -07001536 BUG_ON(n && node_parent(n) != tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001537 }
Robert Olsson19baf832005-06-21 12:43:18 -07001538 l = (struct leaf *) n;
1539
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001540 if (!n || !tkey_equals(l->key, key))
Robert Olsson19baf832005-06-21 12:43:18 -07001541 return 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001542
1543 /*
1544 * Key found.
1545 * Remove the leaf and rebalance the tree
Robert Olsson19baf832005-06-21 12:43:18 -07001546 */
1547
Robert Olsson19baf832005-06-21 12:43:18 -07001548 t->size--;
1549
Stephen Hemminger06801912007-08-10 15:22:13 -07001550 tp = node_parent(n);
Robert Olsson19baf832005-06-21 12:43:18 -07001551 tnode_free((struct tnode *) n);
1552
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001553 if (tp) {
Robert Olsson19baf832005-06-21 12:43:18 -07001554 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1555 put_child(t, (struct tnode *)tp, cindex, NULL);
Robert Olsson2373ce12005-08-25 13:01:29 -07001556 rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
Olof Johansson91b9a272005-08-09 20:24:39 -07001557 } else
Robert Olsson2373ce12005-08-25 13:01:29 -07001558 rcu_assign_pointer(t->trie, NULL);
Robert Olsson19baf832005-06-21 12:43:18 -07001559
1560 return 1;
1561}
1562
Robert Olssond562f1f2007-03-26 14:22:22 -07001563/*
1564 * Caller must hold RTNL.
1565 */
Thomas Graf4e902c52006-08-17 18:14:52 -07001566static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
Robert Olsson19baf832005-06-21 12:43:18 -07001567{
1568 struct trie *t = (struct trie *) tb->tb_data;
1569 u32 key, mask;
Thomas Graf4e902c52006-08-17 18:14:52 -07001570 int plen = cfg->fc_dst_len;
1571 u8 tos = cfg->fc_tos;
Robert Olsson19baf832005-06-21 12:43:18 -07001572 struct fib_alias *fa, *fa_to_delete;
1573 struct list_head *fa_head;
1574 struct leaf *l;
Olof Johansson91b9a272005-08-09 20:24:39 -07001575 struct leaf_info *li;
1576
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001577 if (plen > 32)
Robert Olsson19baf832005-06-21 12:43:18 -07001578 return -EINVAL;
1579
Thomas Graf4e902c52006-08-17 18:14:52 -07001580 key = ntohl(cfg->fc_dst);
Olof Johansson91b9a272005-08-09 20:24:39 -07001581 mask = ntohl(inet_make_mask(plen));
Robert Olsson19baf832005-06-21 12:43:18 -07001582
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001583 if (key & ~mask)
Robert Olsson19baf832005-06-21 12:43:18 -07001584 return -EINVAL;
1585
1586 key = key & mask;
1587 l = fib_find_node(t, key);
1588
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001589 if (!l)
Robert Olsson19baf832005-06-21 12:43:18 -07001590 return -ESRCH;
1591
1592 fa_head = get_fa_head(l, plen);
1593 fa = fib_find_alias(fa_head, tos, 0);
1594
1595 if (!fa)
1596 return -ESRCH;
1597
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001598 pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
Robert Olsson19baf832005-06-21 12:43:18 -07001599
1600 fa_to_delete = NULL;
1601 fa_head = fa->fa_list.prev;
Robert Olsson2373ce12005-08-25 13:01:29 -07001602
Robert Olsson19baf832005-06-21 12:43:18 -07001603 list_for_each_entry(fa, fa_head, fa_list) {
1604 struct fib_info *fi = fa->fa_info;
1605
1606 if (fa->fa_tos != tos)
1607 break;
1608
Thomas Graf4e902c52006-08-17 18:14:52 -07001609 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
1610 (cfg->fc_scope == RT_SCOPE_NOWHERE ||
1611 fa->fa_scope == cfg->fc_scope) &&
1612 (!cfg->fc_protocol ||
1613 fi->fib_protocol == cfg->fc_protocol) &&
1614 fib_nh_match(cfg, fi) == 0) {
Robert Olsson19baf832005-06-21 12:43:18 -07001615 fa_to_delete = fa;
1616 break;
1617 }
1618 }
1619
Olof Johansson91b9a272005-08-09 20:24:39 -07001620 if (!fa_to_delete)
1621 return -ESRCH;
Robert Olsson19baf832005-06-21 12:43:18 -07001622
Olof Johansson91b9a272005-08-09 20:24:39 -07001623 fa = fa_to_delete;
Thomas Graf4e902c52006-08-17 18:14:52 -07001624 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
Milan Kocianb8f55832007-05-23 14:55:06 -07001625 &cfg->fc_nlinfo, 0);
Robert Olsson19baf832005-06-21 12:43:18 -07001626
Olof Johansson91b9a272005-08-09 20:24:39 -07001627 l = fib_find_node(t, key);
Robert Olsson772cb712005-09-19 15:31:18 -07001628 li = find_leaf_info(l, plen);
Robert Olsson19baf832005-06-21 12:43:18 -07001629
Robert Olsson2373ce12005-08-25 13:01:29 -07001630 list_del_rcu(&fa->fa_list);
Robert Olsson19baf832005-06-21 12:43:18 -07001631
Olof Johansson91b9a272005-08-09 20:24:39 -07001632 if (list_empty(fa_head)) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001633 hlist_del_rcu(&li->hlist);
Olof Johansson91b9a272005-08-09 20:24:39 -07001634 free_leaf_info(li);
Robert Olsson2373ce12005-08-25 13:01:29 -07001635 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001636
1637 if (hlist_empty(&l->list))
1638 trie_leaf_remove(t, key);
1639
1640 if (fa->fa_state & FA_S_ACCESSED)
1641 rt_cache_flush(-1);
1642
Robert Olsson2373ce12005-08-25 13:01:29 -07001643 fib_release_info(fa->fa_info);
1644 alias_free_mem_rcu(fa);
Olof Johansson91b9a272005-08-09 20:24:39 -07001645 return 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001646}
1647
1648static int trie_flush_list(struct trie *t, struct list_head *head)
1649{
1650 struct fib_alias *fa, *fa_node;
1651 int found = 0;
1652
1653 list_for_each_entry_safe(fa, fa_node, head, fa_list) {
1654 struct fib_info *fi = fa->fa_info;
Robert Olsson19baf832005-06-21 12:43:18 -07001655
Robert Olsson2373ce12005-08-25 13:01:29 -07001656 if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
1657 list_del_rcu(&fa->fa_list);
1658 fib_release_info(fa->fa_info);
1659 alias_free_mem_rcu(fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001660 found++;
1661 }
1662 }
1663 return found;
1664}
1665
1666static int trie_flush_leaf(struct trie *t, struct leaf *l)
1667{
1668 int found = 0;
1669 struct hlist_head *lih = &l->list;
1670 struct hlist_node *node, *tmp;
1671 struct leaf_info *li = NULL;
1672
1673 hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
Robert Olsson19baf832005-06-21 12:43:18 -07001674 found += trie_flush_list(t, &li->falh);
1675
1676 if (list_empty(&li->falh)) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001677 hlist_del_rcu(&li->hlist);
Robert Olsson19baf832005-06-21 12:43:18 -07001678 free_leaf_info(li);
1679 }
1680 }
1681 return found;
1682}
1683
Robert Olsson2373ce12005-08-25 13:01:29 -07001684/* rcu_read_lock needs to be hold by caller from readside */
1685
Robert Olsson19baf832005-06-21 12:43:18 -07001686static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
1687{
1688 struct node *c = (struct node *) thisleaf;
1689 struct tnode *p;
1690 int idx;
Robert Olsson2373ce12005-08-25 13:01:29 -07001691 struct node *trie = rcu_dereference(t->trie);
Robert Olsson19baf832005-06-21 12:43:18 -07001692
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001693 if (c == NULL) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001694 if (trie == NULL)
Robert Olsson19baf832005-06-21 12:43:18 -07001695 return NULL;
1696
Robert Olsson2373ce12005-08-25 13:01:29 -07001697 if (IS_LEAF(trie)) /* trie w. just a leaf */
1698 return (struct leaf *) trie;
Robert Olsson19baf832005-06-21 12:43:18 -07001699
Robert Olsson2373ce12005-08-25 13:01:29 -07001700 p = (struct tnode*) trie; /* Start */
Olof Johansson91b9a272005-08-09 20:24:39 -07001701 } else
Eric Dumazetb59cfbf2008-01-18 03:31:36 -08001702 p = node_parent_rcu(c);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001703
Robert Olsson19baf832005-06-21 12:43:18 -07001704 while (p) {
1705 int pos, last;
1706
1707 /* Find the next child of the parent */
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001708 if (c)
1709 pos = 1 + tkey_extract_bits(c->key, p->pos, p->bits);
1710 else
Robert Olsson19baf832005-06-21 12:43:18 -07001711 pos = 0;
1712
1713 last = 1 << p->bits;
Olof Johansson91b9a272005-08-09 20:24:39 -07001714 for (idx = pos; idx < last ; idx++) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001715 c = rcu_dereference(p->child[idx]);
1716
1717 if (!c)
Olof Johansson91b9a272005-08-09 20:24:39 -07001718 continue;
Robert Olsson19baf832005-06-21 12:43:18 -07001719
Olof Johansson91b9a272005-08-09 20:24:39 -07001720 /* Decend if tnode */
Robert Olsson2373ce12005-08-25 13:01:29 -07001721 while (IS_TNODE(c)) {
1722 p = (struct tnode *) c;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001723 idx = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001724
Olof Johansson91b9a272005-08-09 20:24:39 -07001725 /* Rightmost non-NULL branch */
1726 if (p && IS_TNODE(p))
Robert Olsson2373ce12005-08-25 13:01:29 -07001727 while (!(c = rcu_dereference(p->child[idx]))
1728 && idx < (1<<p->bits)) idx++;
Robert Olsson19baf832005-06-21 12:43:18 -07001729
Olof Johansson91b9a272005-08-09 20:24:39 -07001730 /* Done with this tnode? */
Robert Olsson2373ce12005-08-25 13:01:29 -07001731 if (idx >= (1 << p->bits) || !c)
Olof Johansson91b9a272005-08-09 20:24:39 -07001732 goto up;
Robert Olsson19baf832005-06-21 12:43:18 -07001733 }
Robert Olsson2373ce12005-08-25 13:01:29 -07001734 return (struct leaf *) c;
Robert Olsson19baf832005-06-21 12:43:18 -07001735 }
1736up:
1737 /* No more children go up one step */
Olof Johansson91b9a272005-08-09 20:24:39 -07001738 c = (struct node *) p;
Eric Dumazetb59cfbf2008-01-18 03:31:36 -08001739 p = node_parent_rcu(c);
Robert Olsson19baf832005-06-21 12:43:18 -07001740 }
1741 return NULL; /* Ready. Root of trie */
1742}
1743
Robert Olssond562f1f2007-03-26 14:22:22 -07001744/*
1745 * Caller must hold RTNL.
1746 */
Robert Olsson19baf832005-06-21 12:43:18 -07001747static int fn_trie_flush(struct fib_table *tb)
1748{
1749 struct trie *t = (struct trie *) tb->tb_data;
1750 struct leaf *ll = NULL, *l = NULL;
1751 int found = 0, h;
1752
Olof Johansson91b9a272005-08-09 20:24:39 -07001753 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
Robert Olsson19baf832005-06-21 12:43:18 -07001754 found += trie_flush_leaf(t, l);
1755
1756 if (ll && hlist_empty(&ll->list))
1757 trie_leaf_remove(t, ll->key);
1758 ll = l;
1759 }
1760
1761 if (ll && hlist_empty(&ll->list))
1762 trie_leaf_remove(t, ll->key);
1763
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001764 pr_debug("trie_flush found=%d\n", found);
Robert Olsson19baf832005-06-21 12:43:18 -07001765 return found;
1766}
1767
Robert Olsson19baf832005-06-21 12:43:18 -07001768static void
1769fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1770{
1771 struct trie *t = (struct trie *) tb->tb_data;
1772 int order, last_idx;
1773 struct fib_info *fi = NULL;
1774 struct fib_info *last_resort;
1775 struct fib_alias *fa = NULL;
1776 struct list_head *fa_head;
1777 struct leaf *l;
1778
1779 last_idx = -1;
1780 last_resort = NULL;
1781 order = -1;
1782
Robert Olsson2373ce12005-08-25 13:01:29 -07001783 rcu_read_lock();
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001784
Robert Olsson19baf832005-06-21 12:43:18 -07001785 l = fib_find_node(t, 0);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001786 if (!l)
Robert Olsson19baf832005-06-21 12:43:18 -07001787 goto out;
1788
1789 fa_head = get_fa_head(l, 0);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001790 if (!fa_head)
Robert Olsson19baf832005-06-21 12:43:18 -07001791 goto out;
1792
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001793 if (list_empty(fa_head))
Robert Olsson19baf832005-06-21 12:43:18 -07001794 goto out;
1795
Robert Olsson2373ce12005-08-25 13:01:29 -07001796 list_for_each_entry_rcu(fa, fa_head, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001797 struct fib_info *next_fi = fa->fa_info;
Olof Johansson91b9a272005-08-09 20:24:39 -07001798
Robert Olsson19baf832005-06-21 12:43:18 -07001799 if (fa->fa_scope != res->scope ||
1800 fa->fa_type != RTN_UNICAST)
1801 continue;
Olof Johansson91b9a272005-08-09 20:24:39 -07001802
Robert Olsson19baf832005-06-21 12:43:18 -07001803 if (next_fi->fib_priority > res->fi->fib_priority)
1804 break;
1805 if (!next_fi->fib_nh[0].nh_gw ||
1806 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1807 continue;
1808 fa->fa_state |= FA_S_ACCESSED;
Olof Johansson91b9a272005-08-09 20:24:39 -07001809
Robert Olsson19baf832005-06-21 12:43:18 -07001810 if (fi == NULL) {
1811 if (next_fi != res->fi)
1812 break;
1813 } else if (!fib_detect_death(fi, order, &last_resort,
Denis V. Lunev971b8932007-12-08 00:32:23 -08001814 &last_idx, tb->tb_default)) {
Denis V. Luneva2bbe682007-12-08 00:31:44 -08001815 fib_result_assign(res, fi);
Denis V. Lunev971b8932007-12-08 00:32:23 -08001816 tb->tb_default = order;
Robert Olsson19baf832005-06-21 12:43:18 -07001817 goto out;
1818 }
1819 fi = next_fi;
1820 order++;
1821 }
1822 if (order <= 0 || fi == NULL) {
Denis V. Lunev971b8932007-12-08 00:32:23 -08001823 tb->tb_default = -1;
Robert Olsson19baf832005-06-21 12:43:18 -07001824 goto out;
1825 }
1826
Denis V. Lunev971b8932007-12-08 00:32:23 -08001827 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
1828 tb->tb_default)) {
Denis V. Luneva2bbe682007-12-08 00:31:44 -08001829 fib_result_assign(res, fi);
Denis V. Lunev971b8932007-12-08 00:32:23 -08001830 tb->tb_default = order;
Robert Olsson19baf832005-06-21 12:43:18 -07001831 goto out;
1832 }
Denis V. Luneva2bbe682007-12-08 00:31:44 -08001833 if (last_idx >= 0)
1834 fib_result_assign(res, last_resort);
Denis V. Lunev971b8932007-12-08 00:32:23 -08001835 tb->tb_default = last_idx;
1836out:
Robert Olsson2373ce12005-08-25 13:01:29 -07001837 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07001838}
1839
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001840static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb,
Robert Olsson19baf832005-06-21 12:43:18 -07001841 struct sk_buff *skb, struct netlink_callback *cb)
1842{
1843 int i, s_i;
1844 struct fib_alias *fa;
1845
Al Viro32ab5f82006-09-26 22:21:45 -07001846 __be32 xkey = htonl(key);
Robert Olsson19baf832005-06-21 12:43:18 -07001847
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001848 s_i = cb->args[4];
Robert Olsson19baf832005-06-21 12:43:18 -07001849 i = 0;
1850
Robert Olsson2373ce12005-08-25 13:01:29 -07001851 /* rcu_read_lock is hold by caller */
1852
1853 list_for_each_entry_rcu(fa, fah, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001854 if (i < s_i) {
1855 i++;
1856 continue;
1857 }
Stephen Hemminger78c66712005-09-21 00:15:39 -07001858 BUG_ON(!fa->fa_info);
Robert Olsson19baf832005-06-21 12:43:18 -07001859
1860 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
1861 cb->nlh->nlmsg_seq,
1862 RTM_NEWROUTE,
1863 tb->tb_id,
1864 fa->fa_type,
1865 fa->fa_scope,
Thomas Grafbe403ea2006-08-17 18:15:17 -07001866 xkey,
Robert Olsson19baf832005-06-21 12:43:18 -07001867 plen,
1868 fa->fa_tos,
David S. Miller90f66912005-06-21 14:43:28 -07001869 fa->fa_info, 0) < 0) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001870 cb->args[4] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001871 return -1;
Olof Johansson91b9a272005-08-09 20:24:39 -07001872 }
Robert Olsson19baf832005-06-21 12:43:18 -07001873 i++;
1874 }
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001875 cb->args[4] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001876 return skb->len;
1877}
1878
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001879static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, struct sk_buff *skb,
Robert Olsson19baf832005-06-21 12:43:18 -07001880 struct netlink_callback *cb)
1881{
1882 int h, s_h;
1883 struct list_head *fa_head;
1884 struct leaf *l = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001885
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001886 s_h = cb->args[3];
Robert Olsson19baf832005-06-21 12:43:18 -07001887
Olof Johansson91b9a272005-08-09 20:24:39 -07001888 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
Robert Olsson19baf832005-06-21 12:43:18 -07001889 if (h < s_h)
1890 continue;
1891 if (h > s_h)
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001892 memset(&cb->args[4], 0,
1893 sizeof(cb->args) - 4*sizeof(cb->args[0]));
Robert Olsson19baf832005-06-21 12:43:18 -07001894
1895 fa_head = get_fa_head(l, plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001896
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001897 if (!fa_head)
Robert Olsson19baf832005-06-21 12:43:18 -07001898 continue;
1899
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001900 if (list_empty(fa_head))
Robert Olsson19baf832005-06-21 12:43:18 -07001901 continue;
1902
1903 if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001904 cb->args[3] = h;
Robert Olsson19baf832005-06-21 12:43:18 -07001905 return -1;
1906 }
1907 }
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001908 cb->args[3] = h;
Robert Olsson19baf832005-06-21 12:43:18 -07001909 return skb->len;
1910}
1911
1912static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
1913{
1914 int m, s_m;
1915 struct trie *t = (struct trie *) tb->tb_data;
1916
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001917 s_m = cb->args[2];
Robert Olsson19baf832005-06-21 12:43:18 -07001918
Robert Olsson2373ce12005-08-25 13:01:29 -07001919 rcu_read_lock();
Olof Johansson91b9a272005-08-09 20:24:39 -07001920 for (m = 0; m <= 32; m++) {
Robert Olsson19baf832005-06-21 12:43:18 -07001921 if (m < s_m)
1922 continue;
1923 if (m > s_m)
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001924 memset(&cb->args[3], 0,
1925 sizeof(cb->args) - 3*sizeof(cb->args[0]));
Robert Olsson19baf832005-06-21 12:43:18 -07001926
1927 if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001928 cb->args[2] = m;
Robert Olsson19baf832005-06-21 12:43:18 -07001929 goto out;
1930 }
1931 }
Robert Olsson2373ce12005-08-25 13:01:29 -07001932 rcu_read_unlock();
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001933 cb->args[2] = m;
Robert Olsson19baf832005-06-21 12:43:18 -07001934 return skb->len;
Olof Johansson91b9a272005-08-09 20:24:39 -07001935out:
Robert Olsson2373ce12005-08-25 13:01:29 -07001936 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07001937 return -1;
1938}
1939
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08001940void __init fib_hash_init(void)
1941{
1942 fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias),
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -08001943 0, SLAB_PANIC, NULL);
1944
1945 trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
1946 max(sizeof(struct leaf),
1947 sizeof(struct leaf_info)),
1948 0, SLAB_PANIC, NULL);
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08001949}
Robert Olsson19baf832005-06-21 12:43:18 -07001950
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08001951
1952/* Fix more generic FIB names for init later */
1953struct fib_table *fib_hash_table(u32 id)
Robert Olsson19baf832005-06-21 12:43:18 -07001954{
1955 struct fib_table *tb;
1956 struct trie *t;
1957
Robert Olsson19baf832005-06-21 12:43:18 -07001958 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
1959 GFP_KERNEL);
1960 if (tb == NULL)
1961 return NULL;
1962
1963 tb->tb_id = id;
Denis V. Lunev971b8932007-12-08 00:32:23 -08001964 tb->tb_default = -1;
Robert Olsson19baf832005-06-21 12:43:18 -07001965 tb->tb_lookup = fn_trie_lookup;
1966 tb->tb_insert = fn_trie_insert;
1967 tb->tb_delete = fn_trie_delete;
1968 tb->tb_flush = fn_trie_flush;
1969 tb->tb_select_default = fn_trie_select_default;
1970 tb->tb_dump = fn_trie_dump;
Robert Olsson19baf832005-06-21 12:43:18 -07001971
1972 t = (struct trie *) tb->tb_data;
Stephen Hemmingerc28a1cf2008-01-12 20:49:13 -08001973 memset(t, 0, sizeof(*t));
Robert Olsson19baf832005-06-21 12:43:18 -07001974
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001975 if (id == RT_TABLE_LOCAL)
Stephen Hemminger78c66712005-09-21 00:15:39 -07001976 printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION);
Robert Olsson19baf832005-06-21 12:43:18 -07001977
1978 return tb;
1979}
1980
Robert Olsson19baf832005-06-21 12:43:18 -07001981#ifdef CONFIG_PROC_FS
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001982/* Depth first Trie walk iterator */
1983struct fib_trie_iter {
Denis V. Lunev1c340b22008-01-10 03:27:17 -08001984 struct seq_net_private p;
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08001985 struct trie *trie_local, *trie_main;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001986 struct tnode *tnode;
1987 struct trie *trie;
1988 unsigned index;
1989 unsigned depth;
1990};
Robert Olsson19baf832005-06-21 12:43:18 -07001991
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001992static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
Robert Olsson19baf832005-06-21 12:43:18 -07001993{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001994 struct tnode *tn = iter->tnode;
1995 unsigned cindex = iter->index;
1996 struct tnode *p;
1997
Eric W. Biederman6640e692007-01-24 14:42:04 -08001998 /* A single entry routing table */
1999 if (!tn)
2000 return NULL;
2001
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002002 pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
2003 iter->tnode, iter->index, iter->depth);
2004rescan:
2005 while (cindex < (1<<tn->bits)) {
Eric Dumazetb59cfbf2008-01-18 03:31:36 -08002006 struct node *n = tnode_get_child_rcu(tn, cindex);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002007
2008 if (n) {
2009 if (IS_LEAF(n)) {
2010 iter->tnode = tn;
2011 iter->index = cindex + 1;
2012 } else {
2013 /* push down one level */
2014 iter->tnode = (struct tnode *) n;
2015 iter->index = 0;
2016 ++iter->depth;
2017 }
2018 return n;
2019 }
2020
2021 ++cindex;
2022 }
2023
2024 /* Current node exhausted, pop back up */
Eric Dumazetb59cfbf2008-01-18 03:31:36 -08002025 p = node_parent_rcu((struct node *)tn);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002026 if (p) {
2027 cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
2028 tn = p;
2029 --iter->depth;
2030 goto rescan;
2031 }
2032
2033 /* got root? */
Robert Olsson19baf832005-06-21 12:43:18 -07002034 return NULL;
2035}
2036
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002037static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
2038 struct trie *t)
Robert Olsson19baf832005-06-21 12:43:18 -07002039{
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08002040 struct node *n ;
2041
Stephen Hemminger132adf52007-03-08 20:44:43 -08002042 if (!t)
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08002043 return NULL;
2044
2045 n = rcu_dereference(t->trie);
2046
Stephen Hemminger132adf52007-03-08 20:44:43 -08002047 if (!iter)
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08002048 return NULL;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002049
Eric W. Biederman6640e692007-01-24 14:42:04 -08002050 if (n) {
2051 if (IS_TNODE(n)) {
2052 iter->tnode = (struct tnode *) n;
2053 iter->trie = t;
2054 iter->index = 0;
2055 iter->depth = 1;
2056 } else {
2057 iter->tnode = NULL;
2058 iter->trie = t;
2059 iter->index = 0;
2060 iter->depth = 0;
2061 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002062 return n;
2063 }
Robert Olsson19baf832005-06-21 12:43:18 -07002064 return NULL;
2065}
2066
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002067static void trie_collect_stats(struct trie *t, struct trie_stat *s)
Robert Olsson19baf832005-06-21 12:43:18 -07002068{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002069 struct node *n;
2070 struct fib_trie_iter iter;
Robert Olsson19baf832005-06-21 12:43:18 -07002071
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002072 memset(s, 0, sizeof(*s));
Robert Olsson19baf832005-06-21 12:43:18 -07002073
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002074 rcu_read_lock();
2075 for (n = fib_trie_get_first(&iter, t); n;
2076 n = fib_trie_get_next(&iter)) {
2077 if (IS_LEAF(n)) {
2078 s->leaves++;
2079 s->totdepth += iter.depth;
2080 if (iter.depth > s->maxdepth)
2081 s->maxdepth = iter.depth;
2082 } else {
2083 const struct tnode *tn = (const struct tnode *) n;
2084 int i;
Robert Olsson19baf832005-06-21 12:43:18 -07002085
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002086 s->tnodes++;
Stephen Hemminger132adf52007-03-08 20:44:43 -08002087 if (tn->bits < MAX_STAT_DEPTH)
Robert Olsson06ef9212006-03-20 21:35:01 -08002088 s->nodesizes[tn->bits]++;
2089
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002090 for (i = 0; i < (1<<tn->bits); i++)
2091 if (!tn->child[i])
2092 s->nullpointers++;
2093 }
2094 }
2095 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07002096}
2097
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002098/*
Robert Olsson19baf832005-06-21 12:43:18 -07002099 * This outputs /proc/net/fib_triestats
Robert Olsson19baf832005-06-21 12:43:18 -07002100 */
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002101static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
Robert Olsson19baf832005-06-21 12:43:18 -07002102{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002103 unsigned i, max, pointers, bytes, avdepth;
Robert Olsson19baf832005-06-21 12:43:18 -07002104
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002105 if (stat->leaves)
2106 avdepth = stat->totdepth*100 / stat->leaves;
2107 else
2108 avdepth = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07002109
Stephen Hemminger187b5182008-01-12 20:55:55 -08002110 seq_printf(seq, "\tAver depth: %u.%02d\n", avdepth / 100, avdepth % 100 );
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002111 seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth);
Robert Olsson19baf832005-06-21 12:43:18 -07002112
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002113 seq_printf(seq, "\tLeaves: %u\n", stat->leaves);
Olof Johansson91b9a272005-08-09 20:24:39 -07002114
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002115 bytes = sizeof(struct leaf) * stat->leaves;
Stephen Hemminger187b5182008-01-12 20:55:55 -08002116 seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002117 bytes += sizeof(struct tnode) * stat->tnodes;
Robert Olsson19baf832005-06-21 12:43:18 -07002118
Robert Olsson06ef9212006-03-20 21:35:01 -08002119 max = MAX_STAT_DEPTH;
2120 while (max > 0 && stat->nodesizes[max-1] == 0)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002121 max--;
Robert Olsson19baf832005-06-21 12:43:18 -07002122
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002123 pointers = 0;
2124 for (i = 1; i <= max; i++)
2125 if (stat->nodesizes[i] != 0) {
Stephen Hemminger187b5182008-01-12 20:55:55 -08002126 seq_printf(seq, " %u: %u", i, stat->nodesizes[i]);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002127 pointers += (1<<i) * stat->nodesizes[i];
2128 }
2129 seq_putc(seq, '\n');
Stephen Hemminger187b5182008-01-12 20:55:55 -08002130 seq_printf(seq, "\tPointers: %u\n", pointers);
Robert Olsson19baf832005-06-21 12:43:18 -07002131
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002132 bytes += sizeof(struct node *) * pointers;
Stephen Hemminger187b5182008-01-12 20:55:55 -08002133 seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
2134 seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024);
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002135}
Robert Olsson19baf832005-06-21 12:43:18 -07002136
2137#ifdef CONFIG_IP_FIB_TRIE_STATS
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002138static void trie_show_usage(struct seq_file *seq,
2139 const struct trie_use_stats *stats)
2140{
2141 seq_printf(seq, "\nCounters:\n---------\n");
2142 seq_printf(seq,"gets = %u\n", stats->gets);
2143 seq_printf(seq,"backtracks = %u\n", stats->backtrack);
2144 seq_printf(seq,"semantic match passed = %u\n", stats->semantic_match_passed);
2145 seq_printf(seq,"semantic match miss = %u\n", stats->semantic_match_miss);
2146 seq_printf(seq,"null node hit= %u\n", stats->null_node_hit);
2147 seq_printf(seq,"skipped node resize = %u\n\n", stats->resize_node_skipped);
Robert Olsson19baf832005-06-21 12:43:18 -07002148}
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002149#endif /* CONFIG_IP_FIB_TRIE_STATS */
2150
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002151static void fib_trie_show(struct seq_file *seq, const char *name, struct trie *trie)
2152{
2153 struct trie_stat stat;
2154
2155 seq_printf(seq, "%s: %d\n", name, trie->size);
2156 trie_collect_stats(trie, &stat);
2157 trie_show_stats(seq, &stat);
2158#ifdef CONFIG_IP_FIB_TRIE_STATS
2159 trie_show_usage(seq, &trie->stats);
2160#endif
2161}
Robert Olsson19baf832005-06-21 12:43:18 -07002162
2163static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2164{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002165 struct net *net = (struct net *)seq->private;
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08002166 struct fib_table *tb;
2167
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002168 seq_printf(seq,
2169 "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002170 sizeof(struct leaf), sizeof(struct tnode));
Olof Johansson91b9a272005-08-09 20:24:39 -07002171
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002172 tb = fib_get_table(net, RT_TABLE_LOCAL);
2173 if (tb)
2174 fib_trie_show(seq, "Local", (struct trie *) tb->tb_data);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002175
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002176 tb = fib_get_table(net, RT_TABLE_MAIN);
2177 if (tb)
2178 fib_trie_show(seq, "Main", (struct trie *) tb->tb_data);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002179
Robert Olsson19baf832005-06-21 12:43:18 -07002180 return 0;
2181}
2182
Robert Olsson19baf832005-06-21 12:43:18 -07002183static int fib_triestat_seq_open(struct inode *inode, struct file *file)
2184{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002185 int err;
2186 struct net *net;
2187
2188 net = get_proc_net(inode);
2189 if (net == NULL)
2190 return -ENXIO;
2191 err = single_open(file, fib_triestat_seq_show, net);
2192 if (err < 0) {
2193 put_net(net);
2194 return err;
2195 }
2196 return 0;
2197}
2198
2199static int fib_triestat_seq_release(struct inode *ino, struct file *f)
2200{
2201 struct seq_file *seq = f->private_data;
2202 put_net(seq->private);
2203 return single_release(ino, f);
Robert Olsson19baf832005-06-21 12:43:18 -07002204}
2205
Arjan van de Ven9a321442007-02-12 00:55:35 -08002206static const struct file_operations fib_triestat_fops = {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002207 .owner = THIS_MODULE,
2208 .open = fib_triestat_seq_open,
2209 .read = seq_read,
2210 .llseek = seq_lseek,
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002211 .release = fib_triestat_seq_release,
Robert Olsson19baf832005-06-21 12:43:18 -07002212};
2213
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002214static struct node *fib_trie_get_idx(struct fib_trie_iter *iter,
2215 loff_t pos)
Robert Olsson19baf832005-06-21 12:43:18 -07002216{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002217 loff_t idx = 0;
2218 struct node *n;
Robert Olsson19baf832005-06-21 12:43:18 -07002219
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08002220 for (n = fib_trie_get_first(iter, iter->trie_local);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002221 n; ++idx, n = fib_trie_get_next(iter)) {
2222 if (pos == idx)
2223 return n;
2224 }
Robert Olsson19baf832005-06-21 12:43:18 -07002225
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08002226 for (n = fib_trie_get_first(iter, iter->trie_main);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002227 n; ++idx, n = fib_trie_get_next(iter)) {
2228 if (pos == idx)
2229 return n;
2230 }
Robert Olsson19baf832005-06-21 12:43:18 -07002231 return NULL;
2232}
2233
2234static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -08002235 __acquires(RCU)
Robert Olsson19baf832005-06-21 12:43:18 -07002236{
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08002237 struct fib_trie_iter *iter = seq->private;
2238 struct fib_table *tb;
2239
2240 if (!iter->trie_local) {
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002241 tb = fib_get_table(iter->p.net, RT_TABLE_LOCAL);
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08002242 if (tb)
2243 iter->trie_local = (struct trie *) tb->tb_data;
2244 }
2245 if (!iter->trie_main) {
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002246 tb = fib_get_table(iter->p.net, RT_TABLE_MAIN);
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08002247 if (tb)
2248 iter->trie_main = (struct trie *) tb->tb_data;
2249 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002250 rcu_read_lock();
2251 if (*pos == 0)
Olof Johansson91b9a272005-08-09 20:24:39 -07002252 return SEQ_START_TOKEN;
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08002253 return fib_trie_get_idx(iter, *pos - 1);
Robert Olsson19baf832005-06-21 12:43:18 -07002254}
2255
2256static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2257{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002258 struct fib_trie_iter *iter = seq->private;
2259 void *l = v;
2260
Robert Olsson19baf832005-06-21 12:43:18 -07002261 ++*pos;
Olof Johansson91b9a272005-08-09 20:24:39 -07002262 if (v == SEQ_START_TOKEN)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002263 return fib_trie_get_idx(iter, 0);
Olof Johansson91b9a272005-08-09 20:24:39 -07002264
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002265 v = fib_trie_get_next(iter);
2266 BUG_ON(v == l);
2267 if (v)
2268 return v;
2269
2270 /* continue scan in next trie */
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08002271 if (iter->trie == iter->trie_local)
2272 return fib_trie_get_first(iter, iter->trie_main);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002273
2274 return NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07002275}
2276
2277static void fib_trie_seq_stop(struct seq_file *seq, void *v)
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -08002278 __releases(RCU)
Robert Olsson19baf832005-06-21 12:43:18 -07002279{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002280 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07002281}
2282
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002283static void seq_indent(struct seq_file *seq, int n)
2284{
2285 while (n-- > 0) seq_puts(seq, " ");
2286}
Robert Olsson19baf832005-06-21 12:43:18 -07002287
Eric Dumazet28d36e32008-01-14 23:09:56 -08002288static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002289{
Stephen Hemminger132adf52007-03-08 20:44:43 -08002290 switch (s) {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002291 case RT_SCOPE_UNIVERSE: return "universe";
2292 case RT_SCOPE_SITE: return "site";
2293 case RT_SCOPE_LINK: return "link";
2294 case RT_SCOPE_HOST: return "host";
2295 case RT_SCOPE_NOWHERE: return "nowhere";
2296 default:
Eric Dumazet28d36e32008-01-14 23:09:56 -08002297 snprintf(buf, len, "scope=%d", s);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002298 return buf;
2299 }
2300}
2301
2302static const char *rtn_type_names[__RTN_MAX] = {
2303 [RTN_UNSPEC] = "UNSPEC",
2304 [RTN_UNICAST] = "UNICAST",
2305 [RTN_LOCAL] = "LOCAL",
2306 [RTN_BROADCAST] = "BROADCAST",
2307 [RTN_ANYCAST] = "ANYCAST",
2308 [RTN_MULTICAST] = "MULTICAST",
2309 [RTN_BLACKHOLE] = "BLACKHOLE",
2310 [RTN_UNREACHABLE] = "UNREACHABLE",
2311 [RTN_PROHIBIT] = "PROHIBIT",
2312 [RTN_THROW] = "THROW",
2313 [RTN_NAT] = "NAT",
2314 [RTN_XRESOLVE] = "XRESOLVE",
2315};
2316
Eric Dumazet28d36e32008-01-14 23:09:56 -08002317static inline const char *rtn_type(char *buf, size_t len, unsigned t)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002318{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002319 if (t < __RTN_MAX && rtn_type_names[t])
2320 return rtn_type_names[t];
Eric Dumazet28d36e32008-01-14 23:09:56 -08002321 snprintf(buf, len, "type %u", t);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002322 return buf;
2323}
2324
2325/* Pretty print the trie */
Robert Olsson19baf832005-06-21 12:43:18 -07002326static int fib_trie_seq_show(struct seq_file *seq, void *v)
2327{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002328 const struct fib_trie_iter *iter = seq->private;
2329 struct node *n = v;
Robert Olsson19baf832005-06-21 12:43:18 -07002330
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002331 if (v == SEQ_START_TOKEN)
2332 return 0;
Robert Olsson19baf832005-06-21 12:43:18 -07002333
Eric Dumazetb59cfbf2008-01-18 03:31:36 -08002334 if (!node_parent_rcu(n)) {
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08002335 if (iter->trie == iter->trie_local)
Robert Olsson095b8502007-01-26 19:06:01 -08002336 seq_puts(seq, "<local>:\n");
2337 else
2338 seq_puts(seq, "<main>:\n");
2339 }
2340
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002341 if (IS_TNODE(n)) {
2342 struct tnode *tn = (struct tnode *) n;
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -07002343 __be32 prf = htonl(mask_pfx(tn->key, tn->pos));
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002344
Robert Olsson1d25cd62005-09-19 15:29:52 -07002345 seq_indent(seq, iter->depth-1);
2346 seq_printf(seq, " +-- %d.%d.%d.%d/%d %d %d %d\n",
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002347 NIPQUAD(prf), tn->pos, tn->bits, tn->full_children,
Robert Olsson1d25cd62005-09-19 15:29:52 -07002348 tn->empty_children);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002349
Olof Johansson91b9a272005-08-09 20:24:39 -07002350 } else {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002351 struct leaf *l = (struct leaf *) n;
2352 int i;
Al Viro32ab5f82006-09-26 22:21:45 -07002353 __be32 val = htonl(l->key);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002354
2355 seq_indent(seq, iter->depth);
2356 seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val));
2357 for (i = 32; i >= 0; i--) {
Robert Olsson772cb712005-09-19 15:31:18 -07002358 struct leaf_info *li = find_leaf_info(l, i);
Eric Dumazet28d36e32008-01-14 23:09:56 -08002359
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002360 if (li) {
2361 struct fib_alias *fa;
Eric Dumazet28d36e32008-01-14 23:09:56 -08002362
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002363 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
Eric Dumazet28d36e32008-01-14 23:09:56 -08002364 char buf1[32], buf2[32];
2365
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002366 seq_indent(seq, iter->depth+1);
2367 seq_printf(seq, " /%d %s %s", i,
Eric Dumazet28d36e32008-01-14 23:09:56 -08002368 rtn_scope(buf1, sizeof(buf1),
2369 fa->fa_scope),
2370 rtn_type(buf2, sizeof(buf2),
2371 fa->fa_type));
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002372 if (fa->fa_tos)
2373 seq_printf(seq, "tos =%d\n",
2374 fa->fa_tos);
2375 seq_putc(seq, '\n');
2376 }
2377 }
2378 }
Robert Olsson19baf832005-06-21 12:43:18 -07002379 }
2380
2381 return 0;
2382}
2383
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002384static const struct seq_operations fib_trie_seq_ops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002385 .start = fib_trie_seq_start,
2386 .next = fib_trie_seq_next,
2387 .stop = fib_trie_seq_stop,
2388 .show = fib_trie_seq_show,
Robert Olsson19baf832005-06-21 12:43:18 -07002389};
2390
2391static int fib_trie_seq_open(struct inode *inode, struct file *file)
2392{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002393 return seq_open_net(inode, file, &fib_trie_seq_ops,
2394 sizeof(struct fib_trie_iter));
Robert Olsson19baf832005-06-21 12:43:18 -07002395}
2396
Arjan van de Ven9a321442007-02-12 00:55:35 -08002397static const struct file_operations fib_trie_fops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002398 .owner = THIS_MODULE,
2399 .open = fib_trie_seq_open,
2400 .read = seq_read,
2401 .llseek = seq_lseek,
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002402 .release = seq_release_net,
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002403};
2404
Al Viro32ab5f82006-09-26 22:21:45 -07002405static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002406{
2407 static unsigned type2flags[RTN_MAX + 1] = {
2408 [7] = RTF_REJECT, [8] = RTF_REJECT,
2409 };
2410 unsigned flags = type2flags[type];
2411
2412 if (fi && fi->fib_nh->nh_gw)
2413 flags |= RTF_GATEWAY;
Al Viro32ab5f82006-09-26 22:21:45 -07002414 if (mask == htonl(0xFFFFFFFF))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002415 flags |= RTF_HOST;
2416 flags |= RTF_UP;
2417 return flags;
2418}
2419
2420/*
2421 * This outputs /proc/net/route.
2422 * The format of the file is not supposed to be changed
2423 * and needs to be same as fib_hash output to avoid breaking
2424 * legacy utilities
2425 */
2426static int fib_route_seq_show(struct seq_file *seq, void *v)
2427{
Patrick McHardyc9e53cb2005-11-20 21:09:00 -08002428 const struct fib_trie_iter *iter = seq->private;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002429 struct leaf *l = v;
2430 int i;
2431 char bf[128];
2432
2433 if (v == SEQ_START_TOKEN) {
2434 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
2435 "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
2436 "\tWindow\tIRTT");
2437 return 0;
2438 }
2439
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08002440 if (iter->trie == iter->trie_local)
Patrick McHardyc9e53cb2005-11-20 21:09:00 -08002441 return 0;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002442 if (IS_TNODE(l))
2443 return 0;
2444
2445 for (i=32; i>=0; i--) {
Robert Olsson772cb712005-09-19 15:31:18 -07002446 struct leaf_info *li = find_leaf_info(l, i);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002447 struct fib_alias *fa;
Al Viro32ab5f82006-09-26 22:21:45 -07002448 __be32 mask, prefix;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002449
2450 if (!li)
2451 continue;
2452
2453 mask = inet_make_mask(li->plen);
2454 prefix = htonl(l->key);
2455
2456 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
Herbert Xu1371e372005-10-15 09:42:39 +10002457 const struct fib_info *fi = fa->fa_info;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002458 unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
2459
2460 if (fa->fa_type == RTN_BROADCAST
2461 || fa->fa_type == RTN_MULTICAST)
2462 continue;
2463
2464 if (fi)
2465 snprintf(bf, sizeof(bf),
2466 "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
2467 fi->fib_dev ? fi->fib_dev->name : "*",
2468 prefix,
2469 fi->fib_nh->nh_gw, flags, 0, 0,
2470 fi->fib_priority,
2471 mask,
2472 (fi->fib_advmss ? fi->fib_advmss + 40 : 0),
2473 fi->fib_window,
2474 fi->fib_rtt >> 3);
2475 else
2476 snprintf(bf, sizeof(bf),
2477 "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
2478 prefix, 0, flags, 0, 0, 0,
2479 mask, 0, 0, 0);
2480
2481 seq_printf(seq, "%-127s\n", bf);
2482 }
2483 }
2484
2485 return 0;
2486}
2487
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002488static const struct seq_operations fib_route_seq_ops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002489 .start = fib_trie_seq_start,
2490 .next = fib_trie_seq_next,
2491 .stop = fib_trie_seq_stop,
2492 .show = fib_route_seq_show,
2493};
2494
2495static int fib_route_seq_open(struct inode *inode, struct file *file)
2496{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002497 return seq_open_net(inode, file, &fib_route_seq_ops,
2498 sizeof(struct fib_trie_iter));
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002499}
2500
Arjan van de Ven9a321442007-02-12 00:55:35 -08002501static const struct file_operations fib_route_fops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002502 .owner = THIS_MODULE,
2503 .open = fib_route_seq_open,
2504 .read = seq_read,
2505 .llseek = seq_lseek,
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002506 .release = seq_release_net,
Robert Olsson19baf832005-06-21 12:43:18 -07002507};
2508
Denis V. Lunev61a02652008-01-10 03:21:09 -08002509int __net_init fib_proc_init(struct net *net)
Robert Olsson19baf832005-06-21 12:43:18 -07002510{
Denis V. Lunev61a02652008-01-10 03:21:09 -08002511 if (!proc_net_fops_create(net, "fib_trie", S_IRUGO, &fib_trie_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002512 goto out1;
2513
Denis V. Lunev61a02652008-01-10 03:21:09 -08002514 if (!proc_net_fops_create(net, "fib_triestat", S_IRUGO,
2515 &fib_triestat_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002516 goto out2;
2517
Denis V. Lunev61a02652008-01-10 03:21:09 -08002518 if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_route_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002519 goto out3;
2520
Robert Olsson19baf832005-06-21 12:43:18 -07002521 return 0;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002522
2523out3:
Denis V. Lunev61a02652008-01-10 03:21:09 -08002524 proc_net_remove(net, "fib_triestat");
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002525out2:
Denis V. Lunev61a02652008-01-10 03:21:09 -08002526 proc_net_remove(net, "fib_trie");
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002527out1:
2528 return -ENOMEM;
Robert Olsson19baf832005-06-21 12:43:18 -07002529}
2530
Denis V. Lunev61a02652008-01-10 03:21:09 -08002531void __net_exit fib_proc_exit(struct net *net)
Robert Olsson19baf832005-06-21 12:43:18 -07002532{
Denis V. Lunev61a02652008-01-10 03:21:09 -08002533 proc_net_remove(net, "fib_trie");
2534 proc_net_remove(net, "fib_triestat");
2535 proc_net_remove(net, "route");
Robert Olsson19baf832005-06-21 12:43:18 -07002536}
2537
2538#endif /* CONFIG_PROC_FS */