blob: 8a147c3baaba1a6f172b6c9a997ac4df9c68ea81 [file] [log] [blame]
Robert Olsson19baf832005-06-21 12:43:18 -07001/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version
5 * 2 of the License, or (at your option) any later version.
6 *
7 * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
8 * & Swedish University of Agricultural Sciences.
9 *
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090010 * Jens Laas <jens.laas@data.slu.se> Swedish University of
Robert Olsson19baf832005-06-21 12:43:18 -070011 * Agricultural Sciences.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090012 *
Robert Olsson19baf832005-06-21 12:43:18 -070013 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
14 *
Lucas De Marchi25985ed2011-03-30 22:57:33 -030015 * This work is based on the LPC-trie which is originally described in:
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090016 *
Robert Olsson19baf832005-06-21 12:43:18 -070017 * An experimental study of compression methods for dynamic tries
18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
Justin P. Mattock631dd1a2010-10-18 11:03:14 +020019 * http://www.csc.kth.se/~snilsson/software/dyntrie2/
Robert Olsson19baf832005-06-21 12:43:18 -070020 *
21 *
22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
23 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
24 *
Robert Olsson19baf832005-06-21 12:43:18 -070025 *
26 * Code from fib_hash has been reused which includes the following header:
27 *
28 *
29 * INET An implementation of the TCP/IP protocol suite for the LINUX
30 * operating system. INET is implemented using the BSD Socket
31 * interface as the means of communication with the user level.
32 *
33 * IPv4 FIB: lookup engine and maintenance routines.
34 *
35 *
36 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
37 *
38 * This program is free software; you can redistribute it and/or
39 * modify it under the terms of the GNU General Public License
40 * as published by the Free Software Foundation; either version
41 * 2 of the License, or (at your option) any later version.
Robert Olssonfd966252005-12-22 11:25:10 -080042 *
43 * Substantial contributions to this work comes from:
44 *
45 * David S. Miller, <davem@davemloft.net>
46 * Stephen Hemminger <shemminger@osdl.org>
47 * Paul E. McKenney <paulmck@us.ibm.com>
48 * Patrick McHardy <kaber@trash.net>
Robert Olsson19baf832005-06-21 12:43:18 -070049 */
50
Jens Låås80b71b82009-08-28 23:57:15 -070051#define VERSION "0.409"
Robert Olsson19baf832005-06-21 12:43:18 -070052
Robert Olsson19baf832005-06-21 12:43:18 -070053#include <asm/uaccess.h>
Jiri Slaby1977f032007-10-18 23:40:25 -070054#include <linux/bitops.h>
Robert Olsson19baf832005-06-21 12:43:18 -070055#include <linux/types.h>
56#include <linux/kernel.h>
Robert Olsson19baf832005-06-21 12:43:18 -070057#include <linux/mm.h>
58#include <linux/string.h>
59#include <linux/socket.h>
60#include <linux/sockios.h>
61#include <linux/errno.h>
62#include <linux/in.h>
63#include <linux/inet.h>
Stephen Hemmingercd8787a2006-01-03 14:38:34 -080064#include <linux/inetdevice.h>
Robert Olsson19baf832005-06-21 12:43:18 -070065#include <linux/netdevice.h>
66#include <linux/if_arp.h>
67#include <linux/proc_fs.h>
Robert Olsson2373ce12005-08-25 13:01:29 -070068#include <linux/rcupdate.h>
Robert Olsson19baf832005-06-21 12:43:18 -070069#include <linux/skbuff.h>
70#include <linux/netlink.h>
71#include <linux/init.h>
72#include <linux/list.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090073#include <linux/slab.h>
Paul Gortmakerbc3b2d72011-07-15 11:47:34 -040074#include <linux/export.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020075#include <net/net_namespace.h>
Robert Olsson19baf832005-06-21 12:43:18 -070076#include <net/ip.h>
77#include <net/protocol.h>
78#include <net/route.h>
79#include <net/tcp.h>
80#include <net/sock.h>
81#include <net/ip_fib.h>
82#include "fib_lookup.h"
83
Robert Olsson06ef9212006-03-20 21:35:01 -080084#define MAX_STAT_DEPTH 32
Robert Olsson19baf832005-06-21 12:43:18 -070085
Robert Olsson19baf832005-06-21 12:43:18 -070086#define KEYLENGTH (8*sizeof(t_key))
Robert Olsson19baf832005-06-21 12:43:18 -070087
Robert Olsson19baf832005-06-21 12:43:18 -070088typedef unsigned int t_key;
89
Alexander Duyck64c9b6f2014-12-31 10:55:35 -080090#define IS_TNODE(n) ((n)->bits)
91#define IS_LEAF(n) (!(n)->bits)
Robert Olsson2373ce12005-08-25 13:01:29 -070092
Alexander Duyck9f9e6362014-12-31 10:55:54 -080093#define get_shift(_kv) (KEYLENGTH - (_kv)->pos - (_kv)->bits)
94#define get_index(_key, _kv) (((_key) ^ (_kv)->key) >> get_shift(_kv))
95
Alexander Duyck64c9b6f2014-12-31 10:55:35 -080096struct tnode {
97 t_key key;
98 unsigned char bits; /* 2log(KEYLENGTH) bits needed */
99 unsigned char pos; /* 2log(KEYLENGTH) bits needed */
100 struct tnode __rcu *parent;
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800101 struct rcu_head rcu;
Alexander Duyckadaf9812014-12-31 10:55:47 -0800102 union {
103 /* The fields in this struct are valid if bits > 0 (TNODE) */
104 struct {
105 unsigned int full_children; /* KEYLENGTH bits needed */
106 unsigned int empty_children; /* KEYLENGTH bits needed */
107 struct tnode __rcu *child[0];
108 };
109 /* This list pointer if valid if bits == 0 (LEAF) */
110 struct hlist_head list;
111 };
Robert Olsson19baf832005-06-21 12:43:18 -0700112};
113
114struct leaf_info {
115 struct hlist_node hlist;
116 int plen;
Eric Dumazet5c745012011-07-18 03:16:33 +0000117 u32 mask_plen; /* ntohl(inet_make_mask(plen)) */
Robert Olsson19baf832005-06-21 12:43:18 -0700118 struct list_head falh;
Eric Dumazet5c745012011-07-18 03:16:33 +0000119 struct rcu_head rcu;
Robert Olsson19baf832005-06-21 12:43:18 -0700120};
121
Robert Olsson19baf832005-06-21 12:43:18 -0700122#ifdef CONFIG_IP_FIB_TRIE_STATS
123struct trie_use_stats {
124 unsigned int gets;
125 unsigned int backtrack;
126 unsigned int semantic_match_passed;
127 unsigned int semantic_match_miss;
128 unsigned int null_node_hit;
Robert Olsson2f368952005-07-05 15:02:40 -0700129 unsigned int resize_node_skipped;
Robert Olsson19baf832005-06-21 12:43:18 -0700130};
131#endif
132
133struct trie_stat {
134 unsigned int totdepth;
135 unsigned int maxdepth;
136 unsigned int tnodes;
137 unsigned int leaves;
138 unsigned int nullpointers;
Stephen Hemminger93672292008-01-22 21:54:05 -0800139 unsigned int prefixes;
Robert Olsson06ef9212006-03-20 21:35:01 -0800140 unsigned int nodesizes[MAX_STAT_DEPTH];
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700141};
Robert Olsson19baf832005-06-21 12:43:18 -0700142
143struct trie {
Alexander Duyckadaf9812014-12-31 10:55:47 -0800144 struct tnode __rcu *trie;
Robert Olsson19baf832005-06-21 12:43:18 -0700145#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -0800146 struct trie_use_stats __percpu *stats;
Robert Olsson19baf832005-06-21 12:43:18 -0700147#endif
Robert Olsson19baf832005-06-21 12:43:18 -0700148};
149
Alexander Duyckadaf9812014-12-31 10:55:47 -0800150static void tnode_put_child_reorg(struct tnode *tn, int i, struct tnode *n,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800151 int wasfull);
Alexander Duyckadaf9812014-12-31 10:55:47 -0800152static struct tnode *resize(struct trie *t, struct tnode *tn);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700153static struct tnode *inflate(struct trie *t, struct tnode *tn);
154static struct tnode *halve(struct trie *t, struct tnode *tn);
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700155/* tnodes to free after resize(); protected by RTNL */
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800156static struct callback_head *tnode_free_head;
Jarek Poplawskic3059472009-07-14 08:33:08 +0000157static size_t tnode_free_size;
158
159/*
160 * synchronize_rcu after call_rcu for that many pages; it should be especially
161 * useful before resizing the root node with PREEMPT_NONE configs; the value was
162 * obtained experimentally, aiming to avoid visible slowdown.
163 */
164static const int sync_pages = 128;
Robert Olsson19baf832005-06-21 12:43:18 -0700165
Christoph Lametere18b8902006-12-06 20:33:20 -0800166static struct kmem_cache *fn_alias_kmem __read_mostly;
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -0800167static struct kmem_cache *trie_leaf_kmem __read_mostly;
Robert Olsson19baf832005-06-21 12:43:18 -0700168
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800169/* caller must hold RTNL */
170#define node_parent(n) rtnl_dereference((n)->parent)
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700171
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800172/* caller must hold RCU read lock or RTNL */
173#define node_parent_rcu(n) rcu_dereference_rtnl((n)->parent)
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700174
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800175/* wrapper for rcu_assign_pointer */
Alexander Duyckadaf9812014-12-31 10:55:47 -0800176static inline void node_set_parent(struct tnode *n, struct tnode *tp)
Stephen Hemminger06801912007-08-10 15:22:13 -0700177{
Alexander Duyckadaf9812014-12-31 10:55:47 -0800178 if (n)
179 rcu_assign_pointer(n->parent, tp);
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800180}
181
182#define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER((n)->parent, p)
183
184/* This provides us with the number of children in this node, in the case of a
185 * leaf this will return 0 meaning none of the children are accessible.
186 */
187static inline int tnode_child_length(const struct tnode *tn)
188{
189 return (1ul << tn->bits) & ~(1ul);
Stephen Hemminger06801912007-08-10 15:22:13 -0700190}
Robert Olsson2373ce12005-08-25 13:01:29 -0700191
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700192/*
193 * caller must hold RTNL
194 */
Alexander Duyckadaf9812014-12-31 10:55:47 -0800195static inline struct tnode *tnode_get_child(const struct tnode *tn, unsigned int i)
Robert Olsson19baf832005-06-21 12:43:18 -0700196{
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800197 BUG_ON(i >= tnode_child_length(tn));
Robert Olsson19baf832005-06-21 12:43:18 -0700198
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700199 return rtnl_dereference(tn->child[i]);
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800200}
201
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700202/*
203 * caller must hold RCU read lock or RTNL
204 */
Alexander Duyckadaf9812014-12-31 10:55:47 -0800205static inline struct tnode *tnode_get_child_rcu(const struct tnode *tn, unsigned int i)
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800206{
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800207 BUG_ON(i >= tnode_child_length(tn));
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800208
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700209 return rcu_dereference_rtnl(tn->child[i]);
Robert Olsson19baf832005-06-21 12:43:18 -0700210}
211
David S. Miller3b004562011-02-16 14:56:22 -0800212static inline t_key mask_pfx(t_key k, unsigned int l)
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -0700213{
214 return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);
215}
216
David S. Miller3b004562011-02-16 14:56:22 -0800217static inline t_key tkey_extract_bits(t_key a, unsigned int offset, unsigned int bits)
Robert Olsson19baf832005-06-21 12:43:18 -0700218{
Olof Johansson91b9a272005-08-09 20:24:39 -0700219 if (offset < KEYLENGTH)
Robert Olsson19baf832005-06-21 12:43:18 -0700220 return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
Olof Johansson91b9a272005-08-09 20:24:39 -0700221 else
Robert Olsson19baf832005-06-21 12:43:18 -0700222 return 0;
223}
224
Robert Olsson19baf832005-06-21 12:43:18 -0700225/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900226 To understand this stuff, an understanding of keys and all their bits is
227 necessary. Every node in the trie has a key associated with it, but not
Robert Olsson19baf832005-06-21 12:43:18 -0700228 all of the bits in that key are significant.
229
230 Consider a node 'n' and its parent 'tp'.
231
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900232 If n is a leaf, every bit in its key is significant. Its presence is
233 necessitated by path compression, since during a tree traversal (when
234 searching for a leaf - unless we are doing an insertion) we will completely
235 ignore all skipped bits we encounter. Thus we need to verify, at the end of
236 a potentially successful search, that we have indeed been walking the
Robert Olsson19baf832005-06-21 12:43:18 -0700237 correct key path.
238
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900239 Note that we can never "miss" the correct key in the tree if present by
240 following the wrong path. Path compression ensures that segments of the key
241 that are the same for all keys with a given prefix are skipped, but the
242 skipped part *is* identical for each node in the subtrie below the skipped
243 bit! trie_insert() in this implementation takes care of that - note the
Robert Olsson19baf832005-06-21 12:43:18 -0700244 call to tkey_sub_equals() in trie_insert().
245
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900246 if n is an internal node - a 'tnode' here, the various parts of its key
Robert Olsson19baf832005-06-21 12:43:18 -0700247 have many different meanings.
248
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900249 Example:
Robert Olsson19baf832005-06-21 12:43:18 -0700250 _________________________________________________________________
251 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
252 -----------------------------------------------------------------
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900253 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
Robert Olsson19baf832005-06-21 12:43:18 -0700254
255 _________________________________________________________________
256 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
257 -----------------------------------------------------------------
258 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
259
260 tp->pos = 7
261 tp->bits = 3
262 n->pos = 15
Olof Johansson91b9a272005-08-09 20:24:39 -0700263 n->bits = 4
Robert Olsson19baf832005-06-21 12:43:18 -0700264
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900265 First, let's just ignore the bits that come before the parent tp, that is
266 the bits from 0 to (tp->pos-1). They are *known* but at this point we do
Robert Olsson19baf832005-06-21 12:43:18 -0700267 not use them for anything.
268
269 The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900270 index into the parent's child array. That is, they will be used to find
Robert Olsson19baf832005-06-21 12:43:18 -0700271 'n' among tp's children.
272
273 The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
274 for the node n.
275
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900276 All the bits we have seen so far are significant to the node n. The rest
Robert Olsson19baf832005-06-21 12:43:18 -0700277 of the bits are really not needed or indeed known in n->key.
278
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900279 The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
Robert Olsson19baf832005-06-21 12:43:18 -0700280 n's child array, and will of course be different for each child.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900281
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700282
Robert Olsson19baf832005-06-21 12:43:18 -0700283 The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
284 at this point.
285
286*/
287
Denis V. Lunevf5026fa2007-12-13 09:47:57 -0800288static const int halve_threshold = 25;
289static const int inflate_threshold = 50;
Jarek Poplawski345aa032009-07-07 19:39:16 -0700290static const int halve_threshold_root = 15;
Jens Låås80b71b82009-08-28 23:57:15 -0700291static const int inflate_threshold_root = 30;
Robert Olsson2373ce12005-08-25 13:01:29 -0700292
293static void __alias_free_mem(struct rcu_head *head)
294{
295 struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
296 kmem_cache_free(fn_alias_kmem, fa);
297}
298
299static inline void alias_free_mem_rcu(struct fib_alias *fa)
300{
301 call_rcu(&fa->rcu, __alias_free_mem);
302}
303
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800304#define TNODE_KMALLOC_MAX \
Alexander Duyckadaf9812014-12-31 10:55:47 -0800305 ilog2((PAGE_SIZE - sizeof(struct tnode)) / sizeof(struct tnode *))
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800306
307static void __node_free_rcu(struct rcu_head *head)
Robert Olsson2373ce12005-08-25 13:01:29 -0700308{
Alexander Duyckadaf9812014-12-31 10:55:47 -0800309 struct tnode *n = container_of(head, struct tnode, rcu);
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800310
311 if (IS_LEAF(n))
312 kmem_cache_free(trie_leaf_kmem, n);
313 else if (n->bits <= TNODE_KMALLOC_MAX)
314 kfree(n);
315 else
316 vfree(n);
Robert Olsson2373ce12005-08-25 13:01:29 -0700317}
318
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800319#define node_free(n) call_rcu(&n->rcu, __node_free_rcu)
Stephen Hemminger387a5482008-04-10 03:47:34 -0700320
Robert Olsson2373ce12005-08-25 13:01:29 -0700321static inline void free_leaf_info(struct leaf_info *leaf)
322{
Lai Jiangshanbceb0f42011-03-18 11:42:34 +0800323 kfree_rcu(leaf, rcu);
Robert Olsson2373ce12005-08-25 13:01:29 -0700324}
325
Eric Dumazet8d965442008-01-13 22:31:44 -0800326static struct tnode *tnode_alloc(size_t size)
Robert Olsson2373ce12005-08-25 13:01:29 -0700327{
Robert Olsson2373ce12005-08-25 13:01:29 -0700328 if (size <= PAGE_SIZE)
Eric Dumazet8d965442008-01-13 22:31:44 -0800329 return kzalloc(size, GFP_KERNEL);
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700330 else
Eric Dumazet7a1c8e52010-11-20 07:46:35 +0000331 return vzalloc(size);
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700332}
Robert Olsson2373ce12005-08-25 13:01:29 -0700333
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700334static void tnode_free_safe(struct tnode *tn)
335{
336 BUG_ON(IS_LEAF(tn));
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800337 tn->rcu.next = tnode_free_head;
338 tnode_free_head = &tn->rcu;
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700339}
340
341static void tnode_free_flush(void)
342{
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800343 struct callback_head *head;
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700344
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800345 while ((head = tnode_free_head)) {
346 struct tnode *tn = container_of(head, struct tnode, rcu);
347
348 tnode_free_head = head->next;
349 tnode_free_size += offsetof(struct tnode, child[1 << tn->bits]);
350
351 node_free(tn);
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700352 }
Jarek Poplawskic3059472009-07-14 08:33:08 +0000353
354 if (tnode_free_size >= PAGE_SIZE * sync_pages) {
355 tnode_free_size = 0;
356 synchronize_rcu();
357 }
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700358}
359
Alexander Duyckadaf9812014-12-31 10:55:47 -0800360static struct tnode *leaf_new(t_key key)
Robert Olsson19baf832005-06-21 12:43:18 -0700361{
Alexander Duyckadaf9812014-12-31 10:55:47 -0800362 struct tnode *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700363 if (l) {
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800364 l->parent = NULL;
365 /* set key and pos to reflect full key value
366 * any trailing zeros in the key should be ignored
367 * as the nodes are searched
368 */
369 l->key = key;
370 l->pos = KEYLENGTH;
371 /* set bits to 0 indicating we are not a tnode */
372 l->bits = 0;
373
Robert Olsson19baf832005-06-21 12:43:18 -0700374 INIT_HLIST_HEAD(&l->list);
375 }
376 return l;
377}
378
379static struct leaf_info *leaf_info_new(int plen)
380{
381 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
Robert Olsson2373ce12005-08-25 13:01:29 -0700382 if (li) {
383 li->plen = plen;
Eric Dumazet5c745012011-07-18 03:16:33 +0000384 li->mask_plen = ntohl(inet_make_mask(plen));
Robert Olsson2373ce12005-08-25 13:01:29 -0700385 INIT_LIST_HEAD(&li->falh);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700386 }
Robert Olsson2373ce12005-08-25 13:01:29 -0700387 return li;
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700388}
389
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800390static struct tnode *tnode_new(t_key key, int pos, int bits)
Robert Olsson19baf832005-06-21 12:43:18 -0700391{
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800392 size_t sz = offsetof(struct tnode, child[1 << bits]);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700393 struct tnode *tn = tnode_alloc(sz);
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800394 unsigned int shift = pos + bits;
395
396 /* verify bits and pos their msb bits clear and values are valid */
397 BUG_ON(!bits || (shift > KEYLENGTH));
Robert Olsson19baf832005-06-21 12:43:18 -0700398
Olof Johansson91b9a272005-08-09 20:24:39 -0700399 if (tn) {
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800400 tn->parent = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -0700401 tn->pos = pos;
402 tn->bits = bits;
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800403 tn->key = mask_pfx(key, pos);
Robert Olsson19baf832005-06-21 12:43:18 -0700404 tn->full_children = 0;
405 tn->empty_children = 1<<bits;
406 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700407
Eric Dumazeta034ee32010-09-09 23:32:28 +0000408 pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode),
Alexander Duyckadaf9812014-12-31 10:55:47 -0800409 sizeof(struct tnode *) << bits);
Robert Olsson19baf832005-06-21 12:43:18 -0700410 return tn;
411}
412
Robert Olsson19baf832005-06-21 12:43:18 -0700413/*
414 * Check whether a tnode 'n' is "full", i.e. it is an internal node
415 * and no bits are skipped. See discussion in dyntree paper p. 6
416 */
417
Alexander Duyckadaf9812014-12-31 10:55:47 -0800418static inline int tnode_full(const struct tnode *tn, const struct tnode *n)
Robert Olsson19baf832005-06-21 12:43:18 -0700419{
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800420 return n && IS_TNODE(n) && (n->pos == (tn->pos + tn->bits));
Robert Olsson19baf832005-06-21 12:43:18 -0700421}
422
Lin Ming61648d92012-07-29 02:00:03 +0000423static inline void put_child(struct tnode *tn, int i,
Alexander Duyckadaf9812014-12-31 10:55:47 -0800424 struct tnode *n)
Robert Olsson19baf832005-06-21 12:43:18 -0700425{
426 tnode_put_child_reorg(tn, i, n, -1);
427}
428
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700429 /*
Robert Olsson19baf832005-06-21 12:43:18 -0700430 * Add a child at position i overwriting the old value.
431 * Update the value of full_children and empty_children.
432 */
433
Alexander Duyckadaf9812014-12-31 10:55:47 -0800434static void tnode_put_child_reorg(struct tnode *tn, int i, struct tnode *n,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800435 int wasfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700436{
Alexander Duyckadaf9812014-12-31 10:55:47 -0800437 struct tnode *chi = rtnl_dereference(tn->child[i]);
Robert Olsson19baf832005-06-21 12:43:18 -0700438 int isfull;
439
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700440 BUG_ON(i >= 1<<tn->bits);
441
Robert Olsson19baf832005-06-21 12:43:18 -0700442 /* update emptyChildren */
443 if (n == NULL && chi != NULL)
444 tn->empty_children++;
445 else if (n != NULL && chi == NULL)
446 tn->empty_children--;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700447
Robert Olsson19baf832005-06-21 12:43:18 -0700448 /* update fullChildren */
Olof Johansson91b9a272005-08-09 20:24:39 -0700449 if (wasfull == -1)
Robert Olsson19baf832005-06-21 12:43:18 -0700450 wasfull = tnode_full(tn, chi);
451
452 isfull = tnode_full(tn, n);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700453 if (wasfull && !isfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700454 tn->full_children--;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700455 else if (!wasfull && isfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700456 tn->full_children++;
Olof Johansson91b9a272005-08-09 20:24:39 -0700457
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800458 node_set_parent(n, tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700459
Eric Dumazetcf778b02012-01-12 04:41:32 +0000460 rcu_assign_pointer(tn->child[i], n);
Robert Olsson19baf832005-06-21 12:43:18 -0700461}
462
Alexander Duyck836a0122014-12-31 10:56:06 -0800463static void put_child_root(struct tnode *tp, struct trie *t,
464 t_key key, struct tnode *n)
465{
466 if (tp)
467 put_child(tp, get_index(key, tp), n);
468 else
469 rcu_assign_pointer(t->trie, n);
470}
471
Jens Låås80b71b82009-08-28 23:57:15 -0700472#define MAX_WORK 10
Alexander Duyckadaf9812014-12-31 10:55:47 -0800473static struct tnode *resize(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700474{
Alexander Duyckadaf9812014-12-31 10:55:47 -0800475 struct tnode *old_tn, *n = NULL;
Robert Olssone6308be2005-10-04 13:01:58 -0700476 int inflate_threshold_use;
477 int halve_threshold_use;
Jens Låås80b71b82009-08-28 23:57:15 -0700478 int max_work;
Robert Olsson19baf832005-06-21 12:43:18 -0700479
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900480 if (!tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700481 return NULL;
482
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700483 pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
484 tn, inflate_threshold, halve_threshold);
Robert Olsson19baf832005-06-21 12:43:18 -0700485
486 /* No children */
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800487 if (tn->empty_children > (tnode_child_length(tn) - 1))
488 goto no_children;
489
Robert Olsson19baf832005-06-21 12:43:18 -0700490 /* One child */
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800491 if (tn->empty_children == (tnode_child_length(tn) - 1))
Jens Låås80b71b82009-08-28 23:57:15 -0700492 goto one_child;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700493 /*
Robert Olsson19baf832005-06-21 12:43:18 -0700494 * Double as long as the resulting node has a number of
495 * nonempty nodes that are above the threshold.
496 */
497
498 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700499 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
500 * the Helsinki University of Technology and Matti Tikkanen of Nokia
Robert Olsson19baf832005-06-21 12:43:18 -0700501 * Telecommunications, page 6:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700502 * "A node is doubled if the ratio of non-empty children to all
Robert Olsson19baf832005-06-21 12:43:18 -0700503 * children in the *doubled* node is at least 'high'."
504 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700505 * 'high' in this instance is the variable 'inflate_threshold'. It
506 * is expressed as a percentage, so we multiply it with
507 * tnode_child_length() and instead of multiplying by 2 (since the
508 * child array will be doubled by inflate()) and multiplying
509 * the left-hand side by 100 (to handle the percentage thing) we
Robert Olsson19baf832005-06-21 12:43:18 -0700510 * multiply the left-hand side by 50.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700511 *
512 * The left-hand side may look a bit weird: tnode_child_length(tn)
513 * - tn->empty_children is of course the number of non-null children
514 * in the current node. tn->full_children is the number of "full"
Robert Olsson19baf832005-06-21 12:43:18 -0700515 * children, that is non-null tnodes with a skip value of 0.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700516 * All of those will be doubled in the resulting inflated tnode, so
Robert Olsson19baf832005-06-21 12:43:18 -0700517 * we just count them one extra time here.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700518 *
Robert Olsson19baf832005-06-21 12:43:18 -0700519 * A clearer way to write this would be:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700520 *
Robert Olsson19baf832005-06-21 12:43:18 -0700521 * to_be_doubled = tn->full_children;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700522 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
Robert Olsson19baf832005-06-21 12:43:18 -0700523 * tn->full_children;
524 *
525 * new_child_length = tnode_child_length(tn) * 2;
526 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700527 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
Robert Olsson19baf832005-06-21 12:43:18 -0700528 * new_child_length;
529 * if (new_fill_factor >= inflate_threshold)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700530 *
531 * ...and so on, tho it would mess up the while () loop.
532 *
Robert Olsson19baf832005-06-21 12:43:18 -0700533 * anyway,
534 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
535 * inflate_threshold
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700536 *
Robert Olsson19baf832005-06-21 12:43:18 -0700537 * avoid a division:
538 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
539 * inflate_threshold * new_child_length
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700540 *
Robert Olsson19baf832005-06-21 12:43:18 -0700541 * expand not_to_be_doubled and to_be_doubled, and shorten:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700542 * 100 * (tnode_child_length(tn) - tn->empty_children +
Olof Johansson91b9a272005-08-09 20:24:39 -0700543 * tn->full_children) >= inflate_threshold * new_child_length
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700544 *
Robert Olsson19baf832005-06-21 12:43:18 -0700545 * expand new_child_length:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700546 * 100 * (tnode_child_length(tn) - tn->empty_children +
Olof Johansson91b9a272005-08-09 20:24:39 -0700547 * tn->full_children) >=
Robert Olsson19baf832005-06-21 12:43:18 -0700548 * inflate_threshold * tnode_child_length(tn) * 2
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700549 *
Robert Olsson19baf832005-06-21 12:43:18 -0700550 * shorten again:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700551 * 50 * (tn->full_children + tnode_child_length(tn) -
Olof Johansson91b9a272005-08-09 20:24:39 -0700552 * tn->empty_children) >= inflate_threshold *
Robert Olsson19baf832005-06-21 12:43:18 -0700553 * tnode_child_length(tn)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700554 *
Robert Olsson19baf832005-06-21 12:43:18 -0700555 */
556
Robert Olssone6308be2005-10-04 13:01:58 -0700557 /* Keep root node larger */
558
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800559 if (!node_parent(tn)) {
Jens Låås80b71b82009-08-28 23:57:15 -0700560 inflate_threshold_use = inflate_threshold_root;
561 halve_threshold_use = halve_threshold_root;
Eric Dumazeta034ee32010-09-09 23:32:28 +0000562 } else {
Robert Olssone6308be2005-10-04 13:01:58 -0700563 inflate_threshold_use = inflate_threshold;
Jens Låås80b71b82009-08-28 23:57:15 -0700564 halve_threshold_use = halve_threshold;
565 }
Robert Olssone6308be2005-10-04 13:01:58 -0700566
Jens Låås80b71b82009-08-28 23:57:15 -0700567 max_work = MAX_WORK;
568 while ((tn->full_children > 0 && max_work-- &&
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800569 50 * (tn->full_children + tnode_child_length(tn)
570 - tn->empty_children)
571 >= inflate_threshold_use * tnode_child_length(tn))) {
Robert Olsson19baf832005-06-21 12:43:18 -0700572
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700573 old_tn = tn;
574 tn = inflate(t, tn);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800575
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700576 if (IS_ERR(tn)) {
577 tn = old_tn;
Robert Olsson2f368952005-07-05 15:02:40 -0700578#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -0800579 this_cpu_inc(t->stats->resize_node_skipped);
Robert Olsson2f368952005-07-05 15:02:40 -0700580#endif
581 break;
582 }
Robert Olsson19baf832005-06-21 12:43:18 -0700583 }
584
Jens Låås80b71b82009-08-28 23:57:15 -0700585 /* Return if at least one inflate is run */
Eric Dumazeta034ee32010-09-09 23:32:28 +0000586 if (max_work != MAX_WORK)
Alexander Duyckadaf9812014-12-31 10:55:47 -0800587 return tn;
Jens Låås80b71b82009-08-28 23:57:15 -0700588
Robert Olsson19baf832005-06-21 12:43:18 -0700589 /*
590 * Halve as long as the number of empty children in this
591 * node is above threshold.
592 */
Robert Olsson2f368952005-07-05 15:02:40 -0700593
Jens Låås80b71b82009-08-28 23:57:15 -0700594 max_work = MAX_WORK;
595 while (tn->bits > 1 && max_work-- &&
Robert Olsson19baf832005-06-21 12:43:18 -0700596 100 * (tnode_child_length(tn) - tn->empty_children) <
Robert Olssone6308be2005-10-04 13:01:58 -0700597 halve_threshold_use * tnode_child_length(tn)) {
Robert Olsson19baf832005-06-21 12:43:18 -0700598
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700599 old_tn = tn;
600 tn = halve(t, tn);
601 if (IS_ERR(tn)) {
602 tn = old_tn;
Robert Olsson2f368952005-07-05 15:02:40 -0700603#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -0800604 this_cpu_inc(t->stats->resize_node_skipped);
Robert Olsson2f368952005-07-05 15:02:40 -0700605#endif
606 break;
607 }
608 }
609
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700610
Robert Olsson19baf832005-06-21 12:43:18 -0700611 /* Only one child remains */
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800612 if (tn->empty_children == (tnode_child_length(tn) - 1)) {
613 unsigned long i;
Jens Låås80b71b82009-08-28 23:57:15 -0700614one_child:
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800615 for (i = tnode_child_length(tn); !n && i;)
616 n = tnode_get_child(tn, --i);
617no_children:
618 /* compress one level */
619 node_set_parent(n, NULL);
620 tnode_free_safe(tn);
621 return n;
Jens Låås80b71b82009-08-28 23:57:15 -0700622 }
Alexander Duyckadaf9812014-12-31 10:55:47 -0800623 return tn;
Robert Olsson19baf832005-06-21 12:43:18 -0700624}
625
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700626
627static void tnode_clean_free(struct tnode *tn)
628{
Alexander Duyckadaf9812014-12-31 10:55:47 -0800629 struct tnode *tofree;
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700630 int i;
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700631
632 for (i = 0; i < tnode_child_length(tn); i++) {
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800633 tofree = rtnl_dereference(tn->child[i]);
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700634 if (tofree)
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800635 node_free(tofree);
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700636 }
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800637 node_free(tn);
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700638}
639
Alexander Duyckadaf9812014-12-31 10:55:47 -0800640static struct tnode *inflate(struct trie *t, struct tnode *oldtnode)
Robert Olsson19baf832005-06-21 12:43:18 -0700641{
Alexander Duyckadaf9812014-12-31 10:55:47 -0800642 int olen = tnode_child_length(oldtnode);
643 struct tnode *tn;
Robert Olsson19baf832005-06-21 12:43:18 -0700644 int i;
645
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700646 pr_debug("In inflate\n");
Robert Olsson19baf832005-06-21 12:43:18 -0700647
648 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
649
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700650 if (!tn)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700651 return ERR_PTR(-ENOMEM);
Robert Olsson2f368952005-07-05 15:02:40 -0700652
653 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700654 * Preallocate and store tnodes before the actual work so we
655 * don't get into an inconsistent state if memory allocation
656 * fails. In case of failure we return the oldnode and inflate
Robert Olsson2f368952005-07-05 15:02:40 -0700657 * of tnode is ignored.
658 */
Olof Johansson91b9a272005-08-09 20:24:39 -0700659
660 for (i = 0; i < olen; i++) {
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800661 struct tnode *inode;
Robert Olsson2f368952005-07-05 15:02:40 -0700662
Alexander Duyckadaf9812014-12-31 10:55:47 -0800663 inode = tnode_get_child(oldtnode, i);
664 if (tnode_full(oldtnode, inode) && inode->bits > 1) {
Robert Olsson2f368952005-07-05 15:02:40 -0700665 struct tnode *left, *right;
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -0700666 t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700667
Robert Olsson2f368952005-07-05 15:02:40 -0700668 left = tnode_new(inode->key&(~m), inode->pos + 1,
669 inode->bits - 1);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700670 if (!left)
671 goto nomem;
Olof Johansson91b9a272005-08-09 20:24:39 -0700672
Robert Olsson2f368952005-07-05 15:02:40 -0700673 right = tnode_new(inode->key|m, inode->pos + 1,
674 inode->bits - 1);
675
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900676 if (!right) {
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800677 node_free(left);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700678 goto nomem;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900679 }
Robert Olsson2f368952005-07-05 15:02:40 -0700680
Alexander Duyckadaf9812014-12-31 10:55:47 -0800681 put_child(tn, 2*i, left);
682 put_child(tn, 2*i+1, right);
Robert Olsson2f368952005-07-05 15:02:40 -0700683 }
684 }
685
Olof Johansson91b9a272005-08-09 20:24:39 -0700686 for (i = 0; i < olen; i++) {
Alexander Duyckadaf9812014-12-31 10:55:47 -0800687 struct tnode *inode = tnode_get_child(oldtnode, i);
Olof Johansson91b9a272005-08-09 20:24:39 -0700688 struct tnode *left, *right;
689 int size, j;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700690
Robert Olsson19baf832005-06-21 12:43:18 -0700691 /* An empty child */
Alexander Duyckadaf9812014-12-31 10:55:47 -0800692 if (inode == NULL)
Robert Olsson19baf832005-06-21 12:43:18 -0700693 continue;
694
695 /* A leaf or an internal node with skipped bits */
Alexander Duyckadaf9812014-12-31 10:55:47 -0800696 if (!tnode_full(oldtnode, inode)) {
baker.zhangbbe34cf2013-10-01 07:45:09 +0800697 put_child(tn,
Alexander Duyckadaf9812014-12-31 10:55:47 -0800698 tkey_extract_bits(inode->key, tn->pos, tn->bits),
699 inode);
Robert Olsson19baf832005-06-21 12:43:18 -0700700 continue;
701 }
702
703 /* An internal node with two children */
Robert Olsson19baf832005-06-21 12:43:18 -0700704 if (inode->bits == 1) {
Lin Ming61648d92012-07-29 02:00:03 +0000705 put_child(tn, 2*i, rtnl_dereference(inode->child[0]));
706 put_child(tn, 2*i+1, rtnl_dereference(inode->child[1]));
Robert Olsson19baf832005-06-21 12:43:18 -0700707
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700708 tnode_free_safe(inode);
Olof Johansson91b9a272005-08-09 20:24:39 -0700709 continue;
Robert Olsson19baf832005-06-21 12:43:18 -0700710 }
711
Olof Johansson91b9a272005-08-09 20:24:39 -0700712 /* An internal node with more than two children */
Robert Olsson19baf832005-06-21 12:43:18 -0700713
Olof Johansson91b9a272005-08-09 20:24:39 -0700714 /* We will replace this node 'inode' with two new
715 * ones, 'left' and 'right', each with half of the
716 * original children. The two new nodes will have
717 * a position one bit further down the key and this
718 * means that the "significant" part of their keys
719 * (see the discussion near the top of this file)
720 * will differ by one bit, which will be "0" in
721 * left's key and "1" in right's key. Since we are
722 * moving the key position by one step, the bit that
723 * we are moving away from - the bit at position
724 * (inode->pos) - is the one that will differ between
725 * left and right. So... we synthesize that bit in the
726 * two new keys.
727 * The mask 'm' below will be a single "one" bit at
728 * the position (inode->pos)
729 */
Robert Olsson19baf832005-06-21 12:43:18 -0700730
Olof Johansson91b9a272005-08-09 20:24:39 -0700731 /* Use the old key, but set the new significant
732 * bit to zero.
733 */
Robert Olsson19baf832005-06-21 12:43:18 -0700734
Alexander Duyckadaf9812014-12-31 10:55:47 -0800735 left = tnode_get_child(tn, 2*i);
Lin Ming61648d92012-07-29 02:00:03 +0000736 put_child(tn, 2*i, NULL);
Robert Olsson19baf832005-06-21 12:43:18 -0700737
Olof Johansson91b9a272005-08-09 20:24:39 -0700738 BUG_ON(!left);
Robert Olsson2f368952005-07-05 15:02:40 -0700739
Alexander Duyckadaf9812014-12-31 10:55:47 -0800740 right = tnode_get_child(tn, 2*i+1);
Lin Ming61648d92012-07-29 02:00:03 +0000741 put_child(tn, 2*i+1, NULL);
Robert Olsson2f368952005-07-05 15:02:40 -0700742
Olof Johansson91b9a272005-08-09 20:24:39 -0700743 BUG_ON(!right);
Robert Olsson2f368952005-07-05 15:02:40 -0700744
Olof Johansson91b9a272005-08-09 20:24:39 -0700745 size = tnode_child_length(left);
746 for (j = 0; j < size; j++) {
Lin Ming61648d92012-07-29 02:00:03 +0000747 put_child(left, j, rtnl_dereference(inode->child[j]));
748 put_child(right, j, rtnl_dereference(inode->child[j + size]));
Robert Olsson19baf832005-06-21 12:43:18 -0700749 }
Lin Ming61648d92012-07-29 02:00:03 +0000750 put_child(tn, 2*i, resize(t, left));
751 put_child(tn, 2*i+1, resize(t, right));
Olof Johansson91b9a272005-08-09 20:24:39 -0700752
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700753 tnode_free_safe(inode);
Robert Olsson19baf832005-06-21 12:43:18 -0700754 }
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700755 tnode_free_safe(oldtnode);
Robert Olsson19baf832005-06-21 12:43:18 -0700756 return tn;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700757nomem:
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700758 tnode_clean_free(tn);
759 return ERR_PTR(-ENOMEM);
Robert Olsson19baf832005-06-21 12:43:18 -0700760}
761
Alexander Duyckadaf9812014-12-31 10:55:47 -0800762static struct tnode *halve(struct trie *t, struct tnode *oldtnode)
Robert Olsson19baf832005-06-21 12:43:18 -0700763{
Alexander Duyckadaf9812014-12-31 10:55:47 -0800764 int olen = tnode_child_length(oldtnode);
765 struct tnode *tn, *left, *right;
Robert Olsson19baf832005-06-21 12:43:18 -0700766 int i;
Robert Olsson19baf832005-06-21 12:43:18 -0700767
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700768 pr_debug("In halve\n");
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700769
770 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
Robert Olsson19baf832005-06-21 12:43:18 -0700771
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700772 if (!tn)
773 return ERR_PTR(-ENOMEM);
Robert Olsson2f368952005-07-05 15:02:40 -0700774
775 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700776 * Preallocate and store tnodes before the actual work so we
777 * don't get into an inconsistent state if memory allocation
778 * fails. In case of failure we return the oldnode and halve
Robert Olsson2f368952005-07-05 15:02:40 -0700779 * of tnode is ignored.
780 */
781
Olof Johansson91b9a272005-08-09 20:24:39 -0700782 for (i = 0; i < olen; i += 2) {
Robert Olsson2f368952005-07-05 15:02:40 -0700783 left = tnode_get_child(oldtnode, i);
784 right = tnode_get_child(oldtnode, i+1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700785
Robert Olsson2f368952005-07-05 15:02:40 -0700786 /* Two nonempty children */
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700787 if (left && right) {
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700788 struct tnode *newn;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700789
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700790 newn = tnode_new(left->key, tn->pos + tn->bits, 1);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700791
792 if (!newn)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700793 goto nomem;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700794
Alexander Duyckadaf9812014-12-31 10:55:47 -0800795 put_child(tn, i/2, newn);
Robert Olsson2f368952005-07-05 15:02:40 -0700796 }
Robert Olsson2f368952005-07-05 15:02:40 -0700797
Robert Olsson2f368952005-07-05 15:02:40 -0700798 }
Robert Olsson19baf832005-06-21 12:43:18 -0700799
Olof Johansson91b9a272005-08-09 20:24:39 -0700800 for (i = 0; i < olen; i += 2) {
801 struct tnode *newBinNode;
802
Robert Olsson19baf832005-06-21 12:43:18 -0700803 left = tnode_get_child(oldtnode, i);
804 right = tnode_get_child(oldtnode, i+1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700805
Robert Olsson19baf832005-06-21 12:43:18 -0700806 /* At least one of the children is empty */
807 if (left == NULL) {
808 if (right == NULL) /* Both are empty */
809 continue;
Lin Ming61648d92012-07-29 02:00:03 +0000810 put_child(tn, i/2, right);
Olof Johansson91b9a272005-08-09 20:24:39 -0700811 continue;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700812 }
Olof Johansson91b9a272005-08-09 20:24:39 -0700813
814 if (right == NULL) {
Lin Ming61648d92012-07-29 02:00:03 +0000815 put_child(tn, i/2, left);
Olof Johansson91b9a272005-08-09 20:24:39 -0700816 continue;
817 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700818
Robert Olsson19baf832005-06-21 12:43:18 -0700819 /* Two nonempty children */
Alexander Duyckadaf9812014-12-31 10:55:47 -0800820 newBinNode = tnode_get_child(tn, i/2);
Lin Ming61648d92012-07-29 02:00:03 +0000821 put_child(tn, i/2, NULL);
822 put_child(newBinNode, 0, left);
823 put_child(newBinNode, 1, right);
824 put_child(tn, i/2, resize(t, newBinNode));
Robert Olsson19baf832005-06-21 12:43:18 -0700825 }
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700826 tnode_free_safe(oldtnode);
Robert Olsson19baf832005-06-21 12:43:18 -0700827 return tn;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700828nomem:
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700829 tnode_clean_free(tn);
830 return ERR_PTR(-ENOMEM);
Robert Olsson19baf832005-06-21 12:43:18 -0700831}
832
Robert Olsson772cb712005-09-19 15:31:18 -0700833/* readside must use rcu_read_lock currently dump routines
Robert Olsson2373ce12005-08-25 13:01:29 -0700834 via get_fa_head and dump */
835
Alexander Duyckadaf9812014-12-31 10:55:47 -0800836static struct leaf_info *find_leaf_info(struct tnode *l, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700837{
Robert Olsson772cb712005-09-19 15:31:18 -0700838 struct hlist_head *head = &l->list;
Robert Olsson19baf832005-06-21 12:43:18 -0700839 struct leaf_info *li;
840
Sasha Levinb67bfe02013-02-27 17:06:00 -0800841 hlist_for_each_entry_rcu(li, head, hlist)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700842 if (li->plen == plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700843 return li;
Olof Johansson91b9a272005-08-09 20:24:39 -0700844
Robert Olsson19baf832005-06-21 12:43:18 -0700845 return NULL;
846}
847
Alexander Duyckadaf9812014-12-31 10:55:47 -0800848static inline struct list_head *get_fa_head(struct tnode *l, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700849{
Robert Olsson772cb712005-09-19 15:31:18 -0700850 struct leaf_info *li = find_leaf_info(l, plen);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700851
Olof Johansson91b9a272005-08-09 20:24:39 -0700852 if (!li)
853 return NULL;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700854
Olof Johansson91b9a272005-08-09 20:24:39 -0700855 return &li->falh;
Robert Olsson19baf832005-06-21 12:43:18 -0700856}
857
858static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
859{
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900860 struct leaf_info *li = NULL, *last = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -0700861
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900862 if (hlist_empty(head)) {
863 hlist_add_head_rcu(&new->hlist, head);
864 } else {
Sasha Levinb67bfe02013-02-27 17:06:00 -0800865 hlist_for_each_entry(li, head, hlist) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900866 if (new->plen > li->plen)
867 break;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700868
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900869 last = li;
870 }
871 if (last)
Ken Helias1d023282014-08-06 16:09:16 -0700872 hlist_add_behind_rcu(&new->hlist, &last->hlist);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900873 else
874 hlist_add_before_rcu(&new->hlist, &li->hlist);
875 }
Robert Olsson19baf832005-06-21 12:43:18 -0700876}
877
Robert Olsson2373ce12005-08-25 13:01:29 -0700878/* rcu_read_lock needs to be hold by caller from readside */
Alexander Duyckadaf9812014-12-31 10:55:47 -0800879static struct tnode *fib_find_node(struct trie *t, u32 key)
Robert Olsson19baf832005-06-21 12:43:18 -0700880{
Alexander Duyckadaf9812014-12-31 10:55:47 -0800881 struct tnode *n = rcu_dereference_rtnl(t->trie);
Robert Olsson19baf832005-06-21 12:43:18 -0700882
Alexander Duyck939afb02014-12-31 10:56:00 -0800883 while (n) {
884 unsigned long index = get_index(key, n);
885
886 /* This bit of code is a bit tricky but it combines multiple
887 * checks into a single check. The prefix consists of the
888 * prefix plus zeros for the bits in the cindex. The index
889 * is the difference between the key and this value. From
890 * this we can actually derive several pieces of data.
891 * if !(index >> bits)
892 * we know the value is cindex
893 * else
894 * we have a mismatch in skip bits and failed
895 */
896 if (index >> n->bits)
897 return NULL;
898
899 /* we have found a leaf. Prefixes have already been compared */
900 if (IS_LEAF(n))
Robert Olsson19baf832005-06-21 12:43:18 -0700901 break;
Alexander Duyck939afb02014-12-31 10:56:00 -0800902
903 n = rcu_dereference_rtnl(n->child[index]);
Robert Olsson19baf832005-06-21 12:43:18 -0700904 }
Robert Olsson19baf832005-06-21 12:43:18 -0700905
Alexander Duyck939afb02014-12-31 10:56:00 -0800906 return n;
Robert Olsson19baf832005-06-21 12:43:18 -0700907}
908
Jarek Poplawski7b855762009-06-18 00:28:51 -0700909static void trie_rebalance(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700910{
Robert Olsson19baf832005-06-21 12:43:18 -0700911 int wasfull;
Robert Olsson3ed18d72009-05-21 15:20:59 -0700912 t_key cindex, key;
Stephen Hemminger06801912007-08-10 15:22:13 -0700913 struct tnode *tp;
Robert Olsson19baf832005-06-21 12:43:18 -0700914
Robert Olsson3ed18d72009-05-21 15:20:59 -0700915 key = tn->key;
916
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800917 while (tn != NULL && (tp = node_parent(tn)) != NULL) {
Robert Olsson19baf832005-06-21 12:43:18 -0700918 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
919 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
Alexander Duyckadaf9812014-12-31 10:55:47 -0800920 tn = resize(t, tn);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800921
Alexander Duyckadaf9812014-12-31 10:55:47 -0800922 tnode_put_child_reorg(tp, cindex, tn, wasfull);
Olof Johansson91b9a272005-08-09 20:24:39 -0700923
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800924 tp = node_parent(tn);
Jarek Poplawski008440e2009-06-30 12:47:19 -0700925 if (!tp)
Alexander Duyckadaf9812014-12-31 10:55:47 -0800926 rcu_assign_pointer(t->trie, tn);
Jarek Poplawski008440e2009-06-30 12:47:19 -0700927
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700928 tnode_free_flush();
Stephen Hemminger06801912007-08-10 15:22:13 -0700929 if (!tp)
Robert Olsson19baf832005-06-21 12:43:18 -0700930 break;
Stephen Hemminger06801912007-08-10 15:22:13 -0700931 tn = tp;
Robert Olsson19baf832005-06-21 12:43:18 -0700932 }
Stephen Hemminger06801912007-08-10 15:22:13 -0700933
Robert Olsson19baf832005-06-21 12:43:18 -0700934 /* Handle last (top) tnode */
Jarek Poplawski7b855762009-06-18 00:28:51 -0700935 if (IS_TNODE(tn))
Alexander Duyckadaf9812014-12-31 10:55:47 -0800936 tn = resize(t, tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700937
Alexander Duyckadaf9812014-12-31 10:55:47 -0800938 rcu_assign_pointer(t->trie, tn);
Jarek Poplawski7b855762009-06-18 00:28:51 -0700939 tnode_free_flush();
Robert Olsson19baf832005-06-21 12:43:18 -0700940}
941
Robert Olsson2373ce12005-08-25 13:01:29 -0700942/* only used from updater-side */
943
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -0800944static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700945{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700946 struct list_head *fa_head = NULL;
Alexander Duyck836a0122014-12-31 10:56:06 -0800947 struct tnode *l, *n, *tp = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -0700948 struct leaf_info *li;
Robert Olsson19baf832005-06-21 12:43:18 -0700949
Alexander Duyck836a0122014-12-31 10:56:06 -0800950 li = leaf_info_new(plen);
951 if (!li)
952 return NULL;
953 fa_head = &li->falh;
954
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700955 n = rtnl_dereference(t->trie);
Robert Olsson19baf832005-06-21 12:43:18 -0700956
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700957 /* If we point to NULL, stop. Either the tree is empty and we should
958 * just put a new leaf in if, or we have reached an empty child slot,
Robert Olsson19baf832005-06-21 12:43:18 -0700959 * and we should just put our new leaf in that.
Robert Olsson19baf832005-06-21 12:43:18 -0700960 *
Alexander Duyck836a0122014-12-31 10:56:06 -0800961 * If we hit a node with a key that does't match then we should stop
962 * and create a new tnode to replace that node and insert ourselves
963 * and the other node into the new tnode.
Robert Olsson19baf832005-06-21 12:43:18 -0700964 */
Alexander Duyck836a0122014-12-31 10:56:06 -0800965 while (n) {
966 unsigned long index = get_index(key, n);
Robert Olsson19baf832005-06-21 12:43:18 -0700967
Alexander Duyck836a0122014-12-31 10:56:06 -0800968 /* This bit of code is a bit tricky but it combines multiple
969 * checks into a single check. The prefix consists of the
970 * prefix plus zeros for the "bits" in the prefix. The index
971 * is the difference between the key and this value. From
972 * this we can actually derive several pieces of data.
973 * if !(index >> bits)
974 * we know the value is child index
975 * else
976 * we have a mismatch in skip bits and failed
Robert Olsson19baf832005-06-21 12:43:18 -0700977 */
Alexander Duyck836a0122014-12-31 10:56:06 -0800978 if (index >> n->bits)
979 break;
Robert Olsson19baf832005-06-21 12:43:18 -0700980
Alexander Duyck836a0122014-12-31 10:56:06 -0800981 /* we have found a leaf. Prefixes have already been compared */
982 if (IS_LEAF(n)) {
983 /* Case 1: n is a leaf, and prefixes match*/
984 insert_leaf_info(&n->list, li);
985 return fa_head;
Robert Olsson19baf832005-06-21 12:43:18 -0700986 }
Robert Olsson19baf832005-06-21 12:43:18 -0700987
Alexander Duyck836a0122014-12-31 10:56:06 -0800988 tp = n;
989 n = rcu_dereference_rtnl(n->child[index]);
990 }
991
992 l = leaf_new(key);
993 if (!l) {
994 free_leaf_info(li);
995 return NULL;
996 }
997
998 insert_leaf_info(&l->list, li);
999
1000 /* Case 2: n is a LEAF or a TNODE and the key doesn't match.
1001 *
1002 * Add a new tnode here
1003 * first tnode need some special handling
1004 * leaves us in position for handling as case 3
1005 */
1006 if (n) {
1007 struct tnode *tn;
1008 int newpos;
1009
1010 newpos = KEYLENGTH - __fls(n->key ^ key) - 1;
1011
1012 tn = tnode_new(key, newpos, 1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001013 if (!tn) {
Robert Olssonf835e472005-06-28 15:00:39 -07001014 free_leaf_info(li);
Alexander Duyck37fd30f2014-12-31 10:55:41 -08001015 node_free(l);
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001016 return NULL;
Olof Johansson91b9a272005-08-09 20:24:39 -07001017 }
1018
Alexander Duyck836a0122014-12-31 10:56:06 -08001019 /* initialize routes out of node */
1020 NODE_INIT_PARENT(tn, tp);
1021 put_child(tn, get_index(key, tn) ^ 1, n);
Robert Olsson19baf832005-06-21 12:43:18 -07001022
Alexander Duyck836a0122014-12-31 10:56:06 -08001023 /* start adding routes into the node */
1024 put_child_root(tp, t, key, tn);
1025 node_set_parent(n, tn);
Robert Olsson19baf832005-06-21 12:43:18 -07001026
Alexander Duyck836a0122014-12-31 10:56:06 -08001027 /* parent now has a NULL spot where the leaf can go */
Alexander Duycke962f302014-12-10 21:49:22 -08001028 tp = tn;
Robert Olsson19baf832005-06-21 12:43:18 -07001029 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001030
Alexander Duyck836a0122014-12-31 10:56:06 -08001031 /* Case 3: n is NULL, and will just insert a new leaf */
1032 if (tp) {
1033 NODE_INIT_PARENT(l, tp);
1034 put_child(tp, get_index(key, tp), l);
1035 trie_rebalance(t, tp);
1036 } else {
1037 rcu_assign_pointer(t->trie, l);
1038 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001039
Robert Olsson19baf832005-06-21 12:43:18 -07001040 return fa_head;
1041}
1042
Robert Olssond562f1f2007-03-26 14:22:22 -07001043/*
1044 * Caller must hold RTNL.
1045 */
Stephen Hemminger16c6cf82009-09-20 10:35:36 +00001046int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
Robert Olsson19baf832005-06-21 12:43:18 -07001047{
1048 struct trie *t = (struct trie *) tb->tb_data;
1049 struct fib_alias *fa, *new_fa;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001050 struct list_head *fa_head = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001051 struct fib_info *fi;
Thomas Graf4e902c52006-08-17 18:14:52 -07001052 int plen = cfg->fc_dst_len;
1053 u8 tos = cfg->fc_tos;
Robert Olsson19baf832005-06-21 12:43:18 -07001054 u32 key, mask;
1055 int err;
Alexander Duyckadaf9812014-12-31 10:55:47 -08001056 struct tnode *l;
Robert Olsson19baf832005-06-21 12:43:18 -07001057
1058 if (plen > 32)
1059 return -EINVAL;
1060
Thomas Graf4e902c52006-08-17 18:14:52 -07001061 key = ntohl(cfg->fc_dst);
Robert Olsson19baf832005-06-21 12:43:18 -07001062
Patrick McHardy2dfe55b2006-08-10 23:08:33 -07001063 pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
Robert Olsson19baf832005-06-21 12:43:18 -07001064
Olof Johansson91b9a272005-08-09 20:24:39 -07001065 mask = ntohl(inet_make_mask(plen));
Robert Olsson19baf832005-06-21 12:43:18 -07001066
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001067 if (key & ~mask)
Robert Olsson19baf832005-06-21 12:43:18 -07001068 return -EINVAL;
1069
1070 key = key & mask;
1071
Thomas Graf4e902c52006-08-17 18:14:52 -07001072 fi = fib_create_info(cfg);
1073 if (IS_ERR(fi)) {
1074 err = PTR_ERR(fi);
Robert Olsson19baf832005-06-21 12:43:18 -07001075 goto err;
Thomas Graf4e902c52006-08-17 18:14:52 -07001076 }
Robert Olsson19baf832005-06-21 12:43:18 -07001077
1078 l = fib_find_node(t, key);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001079 fa = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001080
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001081 if (l) {
Robert Olsson19baf832005-06-21 12:43:18 -07001082 fa_head = get_fa_head(l, plen);
1083 fa = fib_find_alias(fa_head, tos, fi->fib_priority);
1084 }
1085
1086 /* Now fa, if non-NULL, points to the first fib alias
1087 * with the same keys [prefix,tos,priority], if such key already
1088 * exists or to the node before which we will insert new one.
1089 *
1090 * If fa is NULL, we will need to allocate a new one and
1091 * insert to the head of f.
1092 *
1093 * If f is NULL, no fib node matched the destination key
1094 * and we need to allocate a new one of those as well.
1095 */
1096
Julian Anastasov936f6f82008-01-28 21:18:06 -08001097 if (fa && fa->fa_tos == tos &&
1098 fa->fa_info->fib_priority == fi->fib_priority) {
1099 struct fib_alias *fa_first, *fa_match;
Robert Olsson19baf832005-06-21 12:43:18 -07001100
1101 err = -EEXIST;
Thomas Graf4e902c52006-08-17 18:14:52 -07001102 if (cfg->fc_nlflags & NLM_F_EXCL)
Robert Olsson19baf832005-06-21 12:43:18 -07001103 goto out;
1104
Julian Anastasov936f6f82008-01-28 21:18:06 -08001105 /* We have 2 goals:
1106 * 1. Find exact match for type, scope, fib_info to avoid
1107 * duplicate routes
1108 * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it
1109 */
1110 fa_match = NULL;
1111 fa_first = fa;
1112 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
1113 list_for_each_entry_continue(fa, fa_head, fa_list) {
1114 if (fa->fa_tos != tos)
1115 break;
1116 if (fa->fa_info->fib_priority != fi->fib_priority)
1117 break;
1118 if (fa->fa_type == cfg->fc_type &&
Julian Anastasov936f6f82008-01-28 21:18:06 -08001119 fa->fa_info == fi) {
1120 fa_match = fa;
1121 break;
1122 }
1123 }
1124
Thomas Graf4e902c52006-08-17 18:14:52 -07001125 if (cfg->fc_nlflags & NLM_F_REPLACE) {
Robert Olsson19baf832005-06-21 12:43:18 -07001126 struct fib_info *fi_drop;
1127 u8 state;
1128
Julian Anastasov936f6f82008-01-28 21:18:06 -08001129 fa = fa_first;
1130 if (fa_match) {
1131 if (fa == fa_match)
1132 err = 0;
Joonwoo Park67250332008-01-18 03:45:18 -08001133 goto out;
Julian Anastasov936f6f82008-01-28 21:18:06 -08001134 }
Robert Olsson2373ce12005-08-25 13:01:29 -07001135 err = -ENOBUFS;
Christoph Lametere94b1762006-12-06 20:33:17 -08001136 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
Robert Olsson2373ce12005-08-25 13:01:29 -07001137 if (new_fa == NULL)
1138 goto out;
Robert Olsson19baf832005-06-21 12:43:18 -07001139
1140 fi_drop = fa->fa_info;
Robert Olsson2373ce12005-08-25 13:01:29 -07001141 new_fa->fa_tos = fa->fa_tos;
1142 new_fa->fa_info = fi;
Thomas Graf4e902c52006-08-17 18:14:52 -07001143 new_fa->fa_type = cfg->fc_type;
Robert Olsson19baf832005-06-21 12:43:18 -07001144 state = fa->fa_state;
Julian Anastasov936f6f82008-01-28 21:18:06 -08001145 new_fa->fa_state = state & ~FA_S_ACCESSED;
Robert Olsson19baf832005-06-21 12:43:18 -07001146
Robert Olsson2373ce12005-08-25 13:01:29 -07001147 list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
1148 alias_free_mem_rcu(fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001149
1150 fib_release_info(fi_drop);
1151 if (state & FA_S_ACCESSED)
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00001152 rt_cache_flush(cfg->fc_nlinfo.nl_net);
Milan Kocianb8f55832007-05-23 14:55:06 -07001153 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
1154 tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
Robert Olsson19baf832005-06-21 12:43:18 -07001155
Olof Johansson91b9a272005-08-09 20:24:39 -07001156 goto succeeded;
Robert Olsson19baf832005-06-21 12:43:18 -07001157 }
1158 /* Error if we find a perfect match which
1159 * uses the same scope, type, and nexthop
1160 * information.
1161 */
Julian Anastasov936f6f82008-01-28 21:18:06 -08001162 if (fa_match)
1163 goto out;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001164
Thomas Graf4e902c52006-08-17 18:14:52 -07001165 if (!(cfg->fc_nlflags & NLM_F_APPEND))
Julian Anastasov936f6f82008-01-28 21:18:06 -08001166 fa = fa_first;
Robert Olsson19baf832005-06-21 12:43:18 -07001167 }
1168 err = -ENOENT;
Thomas Graf4e902c52006-08-17 18:14:52 -07001169 if (!(cfg->fc_nlflags & NLM_F_CREATE))
Robert Olsson19baf832005-06-21 12:43:18 -07001170 goto out;
1171
1172 err = -ENOBUFS;
Christoph Lametere94b1762006-12-06 20:33:17 -08001173 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
Robert Olsson19baf832005-06-21 12:43:18 -07001174 if (new_fa == NULL)
1175 goto out;
1176
1177 new_fa->fa_info = fi;
1178 new_fa->fa_tos = tos;
Thomas Graf4e902c52006-08-17 18:14:52 -07001179 new_fa->fa_type = cfg->fc_type;
Robert Olsson19baf832005-06-21 12:43:18 -07001180 new_fa->fa_state = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001181 /*
1182 * Insert new entry to the list.
1183 */
1184
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001185 if (!fa_head) {
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001186 fa_head = fib_insert_node(t, key, plen);
1187 if (unlikely(!fa_head)) {
1188 err = -ENOMEM;
Robert Olssonf835e472005-06-28 15:00:39 -07001189 goto out_free_new_fa;
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001190 }
Robert Olssonf835e472005-06-28 15:00:39 -07001191 }
Robert Olsson19baf832005-06-21 12:43:18 -07001192
David S. Miller21d8c492011-04-14 14:49:37 -07001193 if (!plen)
1194 tb->tb_num_default++;
1195
Robert Olsson2373ce12005-08-25 13:01:29 -07001196 list_add_tail_rcu(&new_fa->fa_list,
1197 (fa ? &fa->fa_list : fa_head));
Robert Olsson19baf832005-06-21 12:43:18 -07001198
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00001199 rt_cache_flush(cfg->fc_nlinfo.nl_net);
Thomas Graf4e902c52006-08-17 18:14:52 -07001200 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
Milan Kocianb8f55832007-05-23 14:55:06 -07001201 &cfg->fc_nlinfo, 0);
Robert Olsson19baf832005-06-21 12:43:18 -07001202succeeded:
1203 return 0;
Robert Olssonf835e472005-06-28 15:00:39 -07001204
1205out_free_new_fa:
1206 kmem_cache_free(fn_alias_kmem, new_fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001207out:
1208 fib_release_info(fi);
Olof Johansson91b9a272005-08-09 20:24:39 -07001209err:
Robert Olsson19baf832005-06-21 12:43:18 -07001210 return err;
1211}
1212
Robert Olsson772cb712005-09-19 15:31:18 -07001213/* should be called with rcu_read_lock */
Alexander Duyckadaf9812014-12-31 10:55:47 -08001214static int check_leaf(struct fib_table *tb, struct trie *t, struct tnode *l,
David S. Miller22bd5b92011-03-11 19:54:08 -05001215 t_key key, const struct flowi4 *flp,
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001216 struct fib_result *res, int fib_flags)
Robert Olsson19baf832005-06-21 12:43:18 -07001217{
Robert Olsson19baf832005-06-21 12:43:18 -07001218 struct leaf_info *li;
1219 struct hlist_head *hhead = &l->list;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001220
Sasha Levinb67bfe02013-02-27 17:06:00 -08001221 hlist_for_each_entry_rcu(li, hhead, hlist) {
David S. Miller3be06862011-03-07 15:01:10 -08001222 struct fib_alias *fa;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001223
Eric Dumazet5c745012011-07-18 03:16:33 +00001224 if (l->key != (key & li->mask_plen))
Robert Olsson19baf832005-06-21 12:43:18 -07001225 continue;
1226
David S. Miller3be06862011-03-07 15:01:10 -08001227 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
1228 struct fib_info *fi = fa->fa_info;
1229 int nhsel, err;
1230
David S. Miller22bd5b92011-03-11 19:54:08 -05001231 if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
David S. Miller3be06862011-03-07 15:01:10 -08001232 continue;
David S. Millerdccd9ecc2012-05-10 22:16:32 -04001233 if (fi->fib_dead)
1234 continue;
David S. Miller37e826c2011-03-24 18:06:47 -07001235 if (fa->fa_info->fib_scope < flp->flowi4_scope)
David S. Miller3be06862011-03-07 15:01:10 -08001236 continue;
1237 fib_alias_accessed(fa);
1238 err = fib_props[fa->fa_type].error;
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001239 if (unlikely(err < 0)) {
David S. Miller3be06862011-03-07 15:01:10 -08001240#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -08001241 this_cpu_inc(t->stats->semantic_match_passed);
David S. Miller3be06862011-03-07 15:01:10 -08001242#endif
Julian Anastasov1fbc7842011-03-25 20:33:23 -07001243 return err;
David S. Miller3be06862011-03-07 15:01:10 -08001244 }
1245 if (fi->fib_flags & RTNH_F_DEAD)
1246 continue;
1247 for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
1248 const struct fib_nh *nh = &fi->fib_nh[nhsel];
1249
1250 if (nh->nh_flags & RTNH_F_DEAD)
1251 continue;
David S. Miller22bd5b92011-03-11 19:54:08 -05001252 if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif)
David S. Miller3be06862011-03-07 15:01:10 -08001253 continue;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001254
Robert Olsson19baf832005-06-21 12:43:18 -07001255#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -08001256 this_cpu_inc(t->stats->semantic_match_passed);
Robert Olsson19baf832005-06-21 12:43:18 -07001257#endif
Eric Dumazet5c745012011-07-18 03:16:33 +00001258 res->prefixlen = li->plen;
David S. Miller3be06862011-03-07 15:01:10 -08001259 res->nh_sel = nhsel;
1260 res->type = fa->fa_type;
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001261 res->scope = fi->fib_scope;
David S. Miller3be06862011-03-07 15:01:10 -08001262 res->fi = fi;
1263 res->table = tb;
1264 res->fa_head = &li->falh;
1265 if (!(fib_flags & FIB_LOOKUP_NOREF))
Eric Dumazet5c745012011-07-18 03:16:33 +00001266 atomic_inc(&fi->fib_clntref);
David S. Miller3be06862011-03-07 15:01:10 -08001267 return 0;
1268 }
1269 }
1270
1271#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -08001272 this_cpu_inc(t->stats->semantic_match_miss);
David S. Miller3be06862011-03-07 15:01:10 -08001273#endif
Robert Olsson19baf832005-06-21 12:43:18 -07001274 }
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001275
Ben Hutchings2e655572008-07-10 16:52:52 -07001276 return 1;
Robert Olsson19baf832005-06-21 12:43:18 -07001277}
1278
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001279static inline t_key prefix_mismatch(t_key key, struct tnode *n)
1280{
1281 t_key prefix = n->key;
1282
1283 return (key ^ prefix) & (prefix | -prefix);
1284}
1285
David S. Miller22bd5b92011-03-11 19:54:08 -05001286int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001287 struct fib_result *res, int fib_flags)
Robert Olsson19baf832005-06-21 12:43:18 -07001288{
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001289 struct trie *t = (struct trie *)tb->tb_data;
Alexander Duyck8274a972014-12-31 10:55:29 -08001290#ifdef CONFIG_IP_FIB_TRIE_STATS
1291 struct trie_use_stats __percpu *stats = t->stats;
1292#endif
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001293 const t_key key = ntohl(flp->daddr);
1294 struct tnode *n, *pn;
1295 t_key cindex;
1296 int ret = 1;
Olof Johansson91b9a272005-08-09 20:24:39 -07001297
Robert Olsson2373ce12005-08-25 13:01:29 -07001298 rcu_read_lock();
Robert Olsson19baf832005-06-21 12:43:18 -07001299
Robert Olsson2373ce12005-08-25 13:01:29 -07001300 n = rcu_dereference(t->trie);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001301 if (!n)
Robert Olsson19baf832005-06-21 12:43:18 -07001302 goto failed;
1303
1304#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -08001305 this_cpu_inc(stats->gets);
Robert Olsson19baf832005-06-21 12:43:18 -07001306#endif
1307
Alexander Duyckadaf9812014-12-31 10:55:47 -08001308 pn = n;
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001309 cindex = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001310
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001311 /* Step 1: Travel to the longest prefix match in the trie */
1312 for (;;) {
1313 unsigned long index = get_index(key, n);
Robert Olsson19baf832005-06-21 12:43:18 -07001314
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001315 /* This bit of code is a bit tricky but it combines multiple
1316 * checks into a single check. The prefix consists of the
1317 * prefix plus zeros for the "bits" in the prefix. The index
1318 * is the difference between the key and this value. From
1319 * this we can actually derive several pieces of data.
1320 * if !(index >> bits)
1321 * we know the value is child index
1322 * else
1323 * we have a mismatch in skip bits and failed
1324 */
1325 if (index >> n->bits)
1326 break;
Robert Olsson19baf832005-06-21 12:43:18 -07001327
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001328 /* we have found a leaf. Prefixes have already been compared */
1329 if (IS_LEAF(n))
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001330 goto found;
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001331
1332 /* only record pn and cindex if we are going to be chopping
1333 * bits later. Otherwise we are just wasting cycles.
1334 */
1335 if (index) {
1336 pn = n;
1337 cindex = index;
Olof Johansson91b9a272005-08-09 20:24:39 -07001338 }
1339
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001340 n = rcu_dereference(n->child[index]);
1341 if (unlikely(!n))
Robert Olsson19baf832005-06-21 12:43:18 -07001342 goto backtrace;
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001343 }
1344
1345 /* Step 2: Sort out leaves and begin backtracing for longest prefix */
1346 for (;;) {
1347 /* record the pointer where our next node pointer is stored */
1348 struct tnode __rcu **cptr = n->child;
1349
1350 /* This test verifies that none of the bits that differ
1351 * between the key and the prefix exist in the region of
1352 * the lsb and higher in the prefix.
1353 */
1354 if (unlikely(prefix_mismatch(key, n)))
1355 goto backtrace;
1356
1357 /* exit out and process leaf */
1358 if (unlikely(IS_LEAF(n)))
1359 break;
1360
1361 /* Don't bother recording parent info. Since we are in
1362 * prefix match mode we will have to come back to wherever
1363 * we started this traversal anyway
1364 */
1365
1366 while ((n = rcu_dereference(*cptr)) == NULL) {
1367backtrace:
1368#ifdef CONFIG_IP_FIB_TRIE_STATS
1369 if (!n)
1370 this_cpu_inc(stats->null_node_hit);
1371#endif
1372 /* If we are at cindex 0 there are no more bits for
1373 * us to strip at this level so we must ascend back
1374 * up one level to see if there are any more bits to
1375 * be stripped there.
1376 */
1377 while (!cindex) {
1378 t_key pkey = pn->key;
1379
1380 pn = node_parent_rcu(pn);
1381 if (unlikely(!pn))
1382 goto failed;
1383#ifdef CONFIG_IP_FIB_TRIE_STATS
1384 this_cpu_inc(stats->backtrack);
1385#endif
1386 /* Get Child's index */
1387 cindex = get_index(pkey, pn);
1388 }
1389
1390 /* strip the least significant bit from the cindex */
1391 cindex &= cindex - 1;
1392
1393 /* grab pointer for next child node */
1394 cptr = &pn->child[cindex];
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001395 }
Robert Olsson19baf832005-06-21 12:43:18 -07001396 }
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001397
Robert Olsson19baf832005-06-21 12:43:18 -07001398found:
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001399 /* Step 3: Process the leaf, if that fails fall back to backtracing */
1400 ret = check_leaf(tb, t, n, key, flp, res, fib_flags);
1401 if (unlikely(ret > 0))
1402 goto backtrace;
1403failed:
Robert Olsson2373ce12005-08-25 13:01:29 -07001404 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07001405 return ret;
1406}
Florian Westphal6fc01432011-08-25 13:46:12 +02001407EXPORT_SYMBOL_GPL(fib_table_lookup);
Robert Olsson19baf832005-06-21 12:43:18 -07001408
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001409/*
1410 * Remove the leaf and return parent.
1411 */
Alexander Duyckadaf9812014-12-31 10:55:47 -08001412static void trie_leaf_remove(struct trie *t, struct tnode *l)
Robert Olsson19baf832005-06-21 12:43:18 -07001413{
Alexander Duyck64c9b6f2014-12-31 10:55:35 -08001414 struct tnode *tp = node_parent(l);
Robert Olsson19baf832005-06-21 12:43:18 -07001415
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001416 pr_debug("entering trie_leaf_remove(%p)\n", l);
Robert Olsson19baf832005-06-21 12:43:18 -07001417
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001418 if (tp) {
Alexander Duyck836a0122014-12-31 10:56:06 -08001419 put_child(tp, get_index(l->key, tp), NULL);
Jarek Poplawski7b855762009-06-18 00:28:51 -07001420 trie_rebalance(t, tp);
Alexander Duyck836a0122014-12-31 10:56:06 -08001421 } else {
Stephen Hemmingera9b3cd72011-08-01 16:19:00 +00001422 RCU_INIT_POINTER(t->trie, NULL);
Alexander Duyck836a0122014-12-31 10:56:06 -08001423 }
Robert Olsson19baf832005-06-21 12:43:18 -07001424
Alexander Duyck37fd30f2014-12-31 10:55:41 -08001425 node_free(l);
Robert Olsson19baf832005-06-21 12:43:18 -07001426}
1427
Robert Olssond562f1f2007-03-26 14:22:22 -07001428/*
1429 * Caller must hold RTNL.
1430 */
Stephen Hemminger16c6cf82009-09-20 10:35:36 +00001431int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
Robert Olsson19baf832005-06-21 12:43:18 -07001432{
1433 struct trie *t = (struct trie *) tb->tb_data;
1434 u32 key, mask;
Thomas Graf4e902c52006-08-17 18:14:52 -07001435 int plen = cfg->fc_dst_len;
1436 u8 tos = cfg->fc_tos;
Robert Olsson19baf832005-06-21 12:43:18 -07001437 struct fib_alias *fa, *fa_to_delete;
1438 struct list_head *fa_head;
Alexander Duyckadaf9812014-12-31 10:55:47 -08001439 struct tnode *l;
Olof Johansson91b9a272005-08-09 20:24:39 -07001440 struct leaf_info *li;
1441
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001442 if (plen > 32)
Robert Olsson19baf832005-06-21 12:43:18 -07001443 return -EINVAL;
1444
Thomas Graf4e902c52006-08-17 18:14:52 -07001445 key = ntohl(cfg->fc_dst);
Olof Johansson91b9a272005-08-09 20:24:39 -07001446 mask = ntohl(inet_make_mask(plen));
Robert Olsson19baf832005-06-21 12:43:18 -07001447
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001448 if (key & ~mask)
Robert Olsson19baf832005-06-21 12:43:18 -07001449 return -EINVAL;
1450
1451 key = key & mask;
1452 l = fib_find_node(t, key);
1453
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001454 if (!l)
Robert Olsson19baf832005-06-21 12:43:18 -07001455 return -ESRCH;
1456
Igor Maravicad5b3102012-08-13 10:26:08 +02001457 li = find_leaf_info(l, plen);
1458
1459 if (!li)
1460 return -ESRCH;
1461
1462 fa_head = &li->falh;
Robert Olsson19baf832005-06-21 12:43:18 -07001463 fa = fib_find_alias(fa_head, tos, 0);
1464
1465 if (!fa)
1466 return -ESRCH;
1467
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001468 pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
Robert Olsson19baf832005-06-21 12:43:18 -07001469
1470 fa_to_delete = NULL;
Julian Anastasov936f6f82008-01-28 21:18:06 -08001471 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
1472 list_for_each_entry_continue(fa, fa_head, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001473 struct fib_info *fi = fa->fa_info;
1474
1475 if (fa->fa_tos != tos)
1476 break;
1477
Thomas Graf4e902c52006-08-17 18:14:52 -07001478 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
1479 (cfg->fc_scope == RT_SCOPE_NOWHERE ||
David S. Miller37e826c2011-03-24 18:06:47 -07001480 fa->fa_info->fib_scope == cfg->fc_scope) &&
Julian Anastasov74cb3c12011-03-19 12:13:46 +00001481 (!cfg->fc_prefsrc ||
1482 fi->fib_prefsrc == cfg->fc_prefsrc) &&
Thomas Graf4e902c52006-08-17 18:14:52 -07001483 (!cfg->fc_protocol ||
1484 fi->fib_protocol == cfg->fc_protocol) &&
1485 fib_nh_match(cfg, fi) == 0) {
Robert Olsson19baf832005-06-21 12:43:18 -07001486 fa_to_delete = fa;
1487 break;
1488 }
1489 }
1490
Olof Johansson91b9a272005-08-09 20:24:39 -07001491 if (!fa_to_delete)
1492 return -ESRCH;
Robert Olsson19baf832005-06-21 12:43:18 -07001493
Olof Johansson91b9a272005-08-09 20:24:39 -07001494 fa = fa_to_delete;
Thomas Graf4e902c52006-08-17 18:14:52 -07001495 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
Milan Kocianb8f55832007-05-23 14:55:06 -07001496 &cfg->fc_nlinfo, 0);
Robert Olsson19baf832005-06-21 12:43:18 -07001497
Robert Olsson2373ce12005-08-25 13:01:29 -07001498 list_del_rcu(&fa->fa_list);
Robert Olsson19baf832005-06-21 12:43:18 -07001499
David S. Miller21d8c492011-04-14 14:49:37 -07001500 if (!plen)
1501 tb->tb_num_default--;
1502
Olof Johansson91b9a272005-08-09 20:24:39 -07001503 if (list_empty(fa_head)) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001504 hlist_del_rcu(&li->hlist);
Olof Johansson91b9a272005-08-09 20:24:39 -07001505 free_leaf_info(li);
Robert Olsson2373ce12005-08-25 13:01:29 -07001506 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001507
1508 if (hlist_empty(&l->list))
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001509 trie_leaf_remove(t, l);
Olof Johansson91b9a272005-08-09 20:24:39 -07001510
1511 if (fa->fa_state & FA_S_ACCESSED)
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00001512 rt_cache_flush(cfg->fc_nlinfo.nl_net);
Olof Johansson91b9a272005-08-09 20:24:39 -07001513
Robert Olsson2373ce12005-08-25 13:01:29 -07001514 fib_release_info(fa->fa_info);
1515 alias_free_mem_rcu(fa);
Olof Johansson91b9a272005-08-09 20:24:39 -07001516 return 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001517}
1518
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001519static int trie_flush_list(struct list_head *head)
Robert Olsson19baf832005-06-21 12:43:18 -07001520{
1521 struct fib_alias *fa, *fa_node;
1522 int found = 0;
1523
1524 list_for_each_entry_safe(fa, fa_node, head, fa_list) {
1525 struct fib_info *fi = fa->fa_info;
Robert Olsson19baf832005-06-21 12:43:18 -07001526
Robert Olsson2373ce12005-08-25 13:01:29 -07001527 if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
1528 list_del_rcu(&fa->fa_list);
1529 fib_release_info(fa->fa_info);
1530 alias_free_mem_rcu(fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001531 found++;
1532 }
1533 }
1534 return found;
1535}
1536
Alexander Duyckadaf9812014-12-31 10:55:47 -08001537static int trie_flush_leaf(struct tnode *l)
Robert Olsson19baf832005-06-21 12:43:18 -07001538{
1539 int found = 0;
1540 struct hlist_head *lih = &l->list;
Sasha Levinb67bfe02013-02-27 17:06:00 -08001541 struct hlist_node *tmp;
Robert Olsson19baf832005-06-21 12:43:18 -07001542 struct leaf_info *li = NULL;
1543
Sasha Levinb67bfe02013-02-27 17:06:00 -08001544 hlist_for_each_entry_safe(li, tmp, lih, hlist) {
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001545 found += trie_flush_list(&li->falh);
Robert Olsson19baf832005-06-21 12:43:18 -07001546
1547 if (list_empty(&li->falh)) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001548 hlist_del_rcu(&li->hlist);
Robert Olsson19baf832005-06-21 12:43:18 -07001549 free_leaf_info(li);
1550 }
1551 }
1552 return found;
1553}
1554
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001555/*
1556 * Scan for the next right leaf starting at node p->child[idx]
1557 * Since we have back pointer, no recursion necessary.
1558 */
Alexander Duyckadaf9812014-12-31 10:55:47 -08001559static struct tnode *leaf_walk_rcu(struct tnode *p, struct tnode *c)
Robert Olsson19baf832005-06-21 12:43:18 -07001560{
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001561 do {
1562 t_key idx;
Robert Olsson19baf832005-06-21 12:43:18 -07001563
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001564 if (c)
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001565 idx = tkey_extract_bits(c->key, p->pos, p->bits) + 1;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001566 else
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001567 idx = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001568
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001569 while (idx < 1u << p->bits) {
1570 c = tnode_get_child_rcu(p, idx++);
Robert Olsson2373ce12005-08-25 13:01:29 -07001571 if (!c)
Olof Johansson91b9a272005-08-09 20:24:39 -07001572 continue;
Robert Olsson19baf832005-06-21 12:43:18 -07001573
Eric Dumazetaab515d2013-08-05 11:18:49 -07001574 if (IS_LEAF(c))
Alexander Duyckadaf9812014-12-31 10:55:47 -08001575 return c;
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001576
1577 /* Rescan start scanning in new node */
Alexander Duyckadaf9812014-12-31 10:55:47 -08001578 p = c;
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001579 idx = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001580 }
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001581
1582 /* Node empty, walk back up to parent */
Alexander Duyckadaf9812014-12-31 10:55:47 -08001583 c = p;
Eric Dumazeta034ee32010-09-09 23:32:28 +00001584 } while ((p = node_parent_rcu(c)) != NULL);
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001585
1586 return NULL; /* Root of trie */
1587}
1588
Alexander Duyckadaf9812014-12-31 10:55:47 -08001589static struct tnode *trie_firstleaf(struct trie *t)
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001590{
Alexander Duyckadaf9812014-12-31 10:55:47 -08001591 struct tnode *n = rcu_dereference_rtnl(t->trie);
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001592
1593 if (!n)
1594 return NULL;
1595
1596 if (IS_LEAF(n)) /* trie is just a leaf */
Alexander Duyckadaf9812014-12-31 10:55:47 -08001597 return n;
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001598
1599 return leaf_walk_rcu(n, NULL);
1600}
1601
Alexander Duyckadaf9812014-12-31 10:55:47 -08001602static struct tnode *trie_nextleaf(struct tnode *l)
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001603{
Alexander Duyckadaf9812014-12-31 10:55:47 -08001604 struct tnode *p = node_parent_rcu(l);
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001605
1606 if (!p)
1607 return NULL; /* trie with just one leaf */
1608
Alexander Duyckadaf9812014-12-31 10:55:47 -08001609 return leaf_walk_rcu(p, l);
Robert Olsson19baf832005-06-21 12:43:18 -07001610}
1611
Alexander Duyckadaf9812014-12-31 10:55:47 -08001612static struct tnode *trie_leafindex(struct trie *t, int index)
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001613{
Alexander Duyckadaf9812014-12-31 10:55:47 -08001614 struct tnode *l = trie_firstleaf(t);
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001615
Stephen Hemmingerec28cf72008-02-11 21:12:49 -08001616 while (l && index-- > 0)
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001617 l = trie_nextleaf(l);
Stephen Hemmingerec28cf72008-02-11 21:12:49 -08001618
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001619 return l;
1620}
1621
1622
Robert Olssond562f1f2007-03-26 14:22:22 -07001623/*
1624 * Caller must hold RTNL.
1625 */
Stephen Hemminger16c6cf82009-09-20 10:35:36 +00001626int fib_table_flush(struct fib_table *tb)
Robert Olsson19baf832005-06-21 12:43:18 -07001627{
1628 struct trie *t = (struct trie *) tb->tb_data;
Alexander Duyckadaf9812014-12-31 10:55:47 -08001629 struct tnode *l, *ll = NULL;
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001630 int found = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001631
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001632 for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) {
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001633 found += trie_flush_leaf(l);
Robert Olsson19baf832005-06-21 12:43:18 -07001634
1635 if (ll && hlist_empty(&ll->list))
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001636 trie_leaf_remove(t, ll);
Robert Olsson19baf832005-06-21 12:43:18 -07001637 ll = l;
1638 }
1639
1640 if (ll && hlist_empty(&ll->list))
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001641 trie_leaf_remove(t, ll);
Robert Olsson19baf832005-06-21 12:43:18 -07001642
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001643 pr_debug("trie_flush found=%d\n", found);
Robert Olsson19baf832005-06-21 12:43:18 -07001644 return found;
1645}
1646
Pavel Emelyanov4aa2c462010-10-28 02:00:43 +00001647void fib_free_table(struct fib_table *tb)
1648{
Alexander Duyck8274a972014-12-31 10:55:29 -08001649#ifdef CONFIG_IP_FIB_TRIE_STATS
1650 struct trie *t = (struct trie *)tb->tb_data;
1651
1652 free_percpu(t->stats);
1653#endif /* CONFIG_IP_FIB_TRIE_STATS */
Pavel Emelyanov4aa2c462010-10-28 02:00:43 +00001654 kfree(tb);
1655}
1656
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001657static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
1658 struct fib_table *tb,
Robert Olsson19baf832005-06-21 12:43:18 -07001659 struct sk_buff *skb, struct netlink_callback *cb)
1660{
1661 int i, s_i;
1662 struct fib_alias *fa;
Al Viro32ab5f82006-09-26 22:21:45 -07001663 __be32 xkey = htonl(key);
Robert Olsson19baf832005-06-21 12:43:18 -07001664
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001665 s_i = cb->args[5];
Robert Olsson19baf832005-06-21 12:43:18 -07001666 i = 0;
1667
Robert Olsson2373ce12005-08-25 13:01:29 -07001668 /* rcu_read_lock is hold by caller */
1669
1670 list_for_each_entry_rcu(fa, fah, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001671 if (i < s_i) {
1672 i++;
1673 continue;
1674 }
Robert Olsson19baf832005-06-21 12:43:18 -07001675
Eric W. Biederman15e47302012-09-07 20:12:54 +00001676 if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
Robert Olsson19baf832005-06-21 12:43:18 -07001677 cb->nlh->nlmsg_seq,
1678 RTM_NEWROUTE,
1679 tb->tb_id,
1680 fa->fa_type,
Thomas Grafbe403ea2006-08-17 18:15:17 -07001681 xkey,
Robert Olsson19baf832005-06-21 12:43:18 -07001682 plen,
1683 fa->fa_tos,
Stephen Hemminger64347f72008-01-22 21:55:01 -08001684 fa->fa_info, NLM_F_MULTI) < 0) {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001685 cb->args[5] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001686 return -1;
Olof Johansson91b9a272005-08-09 20:24:39 -07001687 }
Robert Olsson19baf832005-06-21 12:43:18 -07001688 i++;
1689 }
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001690 cb->args[5] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001691 return skb->len;
1692}
1693
Alexander Duyckadaf9812014-12-31 10:55:47 -08001694static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb,
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001695 struct sk_buff *skb, struct netlink_callback *cb)
Robert Olsson19baf832005-06-21 12:43:18 -07001696{
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001697 struct leaf_info *li;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001698 int i, s_i;
Robert Olsson19baf832005-06-21 12:43:18 -07001699
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001700 s_i = cb->args[4];
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001701 i = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001702
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001703 /* rcu_read_lock is hold by caller */
Sasha Levinb67bfe02013-02-27 17:06:00 -08001704 hlist_for_each_entry_rcu(li, &l->list, hlist) {
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001705 if (i < s_i) {
1706 i++;
Robert Olsson19baf832005-06-21 12:43:18 -07001707 continue;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001708 }
Robert Olsson19baf832005-06-21 12:43:18 -07001709
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001710 if (i > s_i)
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001711 cb->args[5] = 0;
Olof Johansson91b9a272005-08-09 20:24:39 -07001712
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001713 if (list_empty(&li->falh))
Robert Olsson19baf832005-06-21 12:43:18 -07001714 continue;
1715
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001716 if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001717 cb->args[4] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001718 return -1;
1719 }
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001720 i++;
Robert Olsson19baf832005-06-21 12:43:18 -07001721 }
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001722
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001723 cb->args[4] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001724 return skb->len;
1725}
1726
Stephen Hemminger16c6cf82009-09-20 10:35:36 +00001727int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
1728 struct netlink_callback *cb)
Robert Olsson19baf832005-06-21 12:43:18 -07001729{
Alexander Duyckadaf9812014-12-31 10:55:47 -08001730 struct tnode *l;
Robert Olsson19baf832005-06-21 12:43:18 -07001731 struct trie *t = (struct trie *) tb->tb_data;
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001732 t_key key = cb->args[2];
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001733 int count = cb->args[3];
Robert Olsson19baf832005-06-21 12:43:18 -07001734
Robert Olsson2373ce12005-08-25 13:01:29 -07001735 rcu_read_lock();
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001736 /* Dump starting at last key.
1737 * Note: 0.0.0.0/0 (ie default) is first key.
1738 */
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001739 if (count == 0)
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001740 l = trie_firstleaf(t);
1741 else {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001742 /* Normally, continue from last key, but if that is missing
1743 * fallback to using slow rescan
1744 */
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001745 l = fib_find_node(t, key);
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001746 if (!l)
1747 l = trie_leafindex(t, count);
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001748 }
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001749
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001750 while (l) {
1751 cb->args[2] = l->key;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001752 if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001753 cb->args[3] = count;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001754 rcu_read_unlock();
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001755 return -1;
Robert Olsson19baf832005-06-21 12:43:18 -07001756 }
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001757
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001758 ++count;
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001759 l = trie_nextleaf(l);
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001760 memset(&cb->args[4], 0,
1761 sizeof(cb->args) - 4*sizeof(cb->args[0]));
Robert Olsson19baf832005-06-21 12:43:18 -07001762 }
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001763 cb->args[3] = count;
Robert Olsson2373ce12005-08-25 13:01:29 -07001764 rcu_read_unlock();
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001765
Robert Olsson19baf832005-06-21 12:43:18 -07001766 return skb->len;
Robert Olsson19baf832005-06-21 12:43:18 -07001767}
1768
David S. Miller5348ba82011-02-01 15:30:56 -08001769void __init fib_trie_init(void)
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08001770{
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001771 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
1772 sizeof(struct fib_alias),
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -08001773 0, SLAB_PANIC, NULL);
1774
1775 trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
Alexander Duyckadaf9812014-12-31 10:55:47 -08001776 max(sizeof(struct tnode),
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -08001777 sizeof(struct leaf_info)),
1778 0, SLAB_PANIC, NULL);
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08001779}
Robert Olsson19baf832005-06-21 12:43:18 -07001780
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08001781
David S. Miller5348ba82011-02-01 15:30:56 -08001782struct fib_table *fib_trie_table(u32 id)
Robert Olsson19baf832005-06-21 12:43:18 -07001783{
1784 struct fib_table *tb;
1785 struct trie *t;
1786
Robert Olsson19baf832005-06-21 12:43:18 -07001787 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
1788 GFP_KERNEL);
1789 if (tb == NULL)
1790 return NULL;
1791
1792 tb->tb_id = id;
Denis V. Lunev971b8932007-12-08 00:32:23 -08001793 tb->tb_default = -1;
David S. Miller21d8c492011-04-14 14:49:37 -07001794 tb->tb_num_default = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001795
1796 t = (struct trie *) tb->tb_data;
Alexander Duyck8274a972014-12-31 10:55:29 -08001797 RCU_INIT_POINTER(t->trie, NULL);
1798#ifdef CONFIG_IP_FIB_TRIE_STATS
1799 t->stats = alloc_percpu(struct trie_use_stats);
1800 if (!t->stats) {
1801 kfree(tb);
1802 tb = NULL;
1803 }
1804#endif
Robert Olsson19baf832005-06-21 12:43:18 -07001805
Robert Olsson19baf832005-06-21 12:43:18 -07001806 return tb;
1807}
1808
Robert Olsson19baf832005-06-21 12:43:18 -07001809#ifdef CONFIG_PROC_FS
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001810/* Depth first Trie walk iterator */
1811struct fib_trie_iter {
Denis V. Lunev1c340b22008-01-10 03:27:17 -08001812 struct seq_net_private p;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07001813 struct fib_table *tb;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001814 struct tnode *tnode;
Eric Dumazeta034ee32010-09-09 23:32:28 +00001815 unsigned int index;
1816 unsigned int depth;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001817};
Robert Olsson19baf832005-06-21 12:43:18 -07001818
Alexander Duyckadaf9812014-12-31 10:55:47 -08001819static struct tnode *fib_trie_get_next(struct fib_trie_iter *iter)
Robert Olsson19baf832005-06-21 12:43:18 -07001820{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001821 struct tnode *tn = iter->tnode;
Eric Dumazeta034ee32010-09-09 23:32:28 +00001822 unsigned int cindex = iter->index;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001823 struct tnode *p;
1824
Eric W. Biederman6640e692007-01-24 14:42:04 -08001825 /* A single entry routing table */
1826 if (!tn)
1827 return NULL;
1828
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001829 pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
1830 iter->tnode, iter->index, iter->depth);
1831rescan:
1832 while (cindex < (1<<tn->bits)) {
Alexander Duyckadaf9812014-12-31 10:55:47 -08001833 struct tnode *n = tnode_get_child_rcu(tn, cindex);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001834
1835 if (n) {
1836 if (IS_LEAF(n)) {
1837 iter->tnode = tn;
1838 iter->index = cindex + 1;
1839 } else {
1840 /* push down one level */
Alexander Duyckadaf9812014-12-31 10:55:47 -08001841 iter->tnode = n;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001842 iter->index = 0;
1843 ++iter->depth;
1844 }
1845 return n;
1846 }
1847
1848 ++cindex;
1849 }
1850
1851 /* Current node exhausted, pop back up */
Alexander Duyckadaf9812014-12-31 10:55:47 -08001852 p = node_parent_rcu(tn);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001853 if (p) {
1854 cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
1855 tn = p;
1856 --iter->depth;
1857 goto rescan;
1858 }
1859
1860 /* got root? */
Robert Olsson19baf832005-06-21 12:43:18 -07001861 return NULL;
1862}
1863
Alexander Duyckadaf9812014-12-31 10:55:47 -08001864static struct tnode *fib_trie_get_first(struct fib_trie_iter *iter,
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001865 struct trie *t)
Robert Olsson19baf832005-06-21 12:43:18 -07001866{
Alexander Duyckadaf9812014-12-31 10:55:47 -08001867 struct tnode *n;
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08001868
Stephen Hemminger132adf52007-03-08 20:44:43 -08001869 if (!t)
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08001870 return NULL;
1871
1872 n = rcu_dereference(t->trie);
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07001873 if (!n)
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08001874 return NULL;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001875
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07001876 if (IS_TNODE(n)) {
Alexander Duyckadaf9812014-12-31 10:55:47 -08001877 iter->tnode = n;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07001878 iter->index = 0;
1879 iter->depth = 1;
1880 } else {
1881 iter->tnode = NULL;
1882 iter->index = 0;
1883 iter->depth = 0;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001884 }
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07001885
1886 return n;
Robert Olsson19baf832005-06-21 12:43:18 -07001887}
1888
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001889static void trie_collect_stats(struct trie *t, struct trie_stat *s)
Robert Olsson19baf832005-06-21 12:43:18 -07001890{
Alexander Duyckadaf9812014-12-31 10:55:47 -08001891 struct tnode *n;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001892 struct fib_trie_iter iter;
Robert Olsson19baf832005-06-21 12:43:18 -07001893
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001894 memset(s, 0, sizeof(*s));
Robert Olsson19baf832005-06-21 12:43:18 -07001895
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001896 rcu_read_lock();
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07001897 for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001898 if (IS_LEAF(n)) {
Stephen Hemminger93672292008-01-22 21:54:05 -08001899 struct leaf_info *li;
Stephen Hemminger93672292008-01-22 21:54:05 -08001900
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001901 s->leaves++;
1902 s->totdepth += iter.depth;
1903 if (iter.depth > s->maxdepth)
1904 s->maxdepth = iter.depth;
Stephen Hemminger93672292008-01-22 21:54:05 -08001905
Alexander Duyckadaf9812014-12-31 10:55:47 -08001906 hlist_for_each_entry_rcu(li, &n->list, hlist)
Stephen Hemminger93672292008-01-22 21:54:05 -08001907 ++s->prefixes;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001908 } else {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001909 int i;
Robert Olsson19baf832005-06-21 12:43:18 -07001910
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001911 s->tnodes++;
Alexander Duyckadaf9812014-12-31 10:55:47 -08001912 if (n->bits < MAX_STAT_DEPTH)
1913 s->nodesizes[n->bits]++;
Robert Olsson06ef9212006-03-20 21:35:01 -08001914
Alexander Duyckadaf9812014-12-31 10:55:47 -08001915 for (i = 0; i < tnode_child_length(n); i++)
1916 if (!rcu_access_pointer(n->child[i]))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001917 s->nullpointers++;
1918 }
1919 }
1920 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07001921}
1922
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001923/*
Robert Olsson19baf832005-06-21 12:43:18 -07001924 * This outputs /proc/net/fib_triestats
Robert Olsson19baf832005-06-21 12:43:18 -07001925 */
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001926static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
Robert Olsson19baf832005-06-21 12:43:18 -07001927{
Eric Dumazeta034ee32010-09-09 23:32:28 +00001928 unsigned int i, max, pointers, bytes, avdepth;
Robert Olsson19baf832005-06-21 12:43:18 -07001929
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001930 if (stat->leaves)
1931 avdepth = stat->totdepth*100 / stat->leaves;
1932 else
1933 avdepth = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001934
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001935 seq_printf(seq, "\tAver depth: %u.%02d\n",
1936 avdepth / 100, avdepth % 100);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001937 seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth);
Robert Olsson19baf832005-06-21 12:43:18 -07001938
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001939 seq_printf(seq, "\tLeaves: %u\n", stat->leaves);
Alexander Duyckadaf9812014-12-31 10:55:47 -08001940 bytes = sizeof(struct tnode) * stat->leaves;
Stephen Hemminger93672292008-01-22 21:54:05 -08001941
1942 seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes);
1943 bytes += sizeof(struct leaf_info) * stat->prefixes;
1944
Stephen Hemminger187b5182008-01-12 20:55:55 -08001945 seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001946 bytes += sizeof(struct tnode) * stat->tnodes;
Robert Olsson19baf832005-06-21 12:43:18 -07001947
Robert Olsson06ef9212006-03-20 21:35:01 -08001948 max = MAX_STAT_DEPTH;
1949 while (max > 0 && stat->nodesizes[max-1] == 0)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001950 max--;
Robert Olsson19baf832005-06-21 12:43:18 -07001951
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001952 pointers = 0;
Jerry Snitselaarf585a992013-07-22 12:01:58 -07001953 for (i = 1; i < max; i++)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001954 if (stat->nodesizes[i] != 0) {
Stephen Hemminger187b5182008-01-12 20:55:55 -08001955 seq_printf(seq, " %u: %u", i, stat->nodesizes[i]);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001956 pointers += (1<<i) * stat->nodesizes[i];
1957 }
1958 seq_putc(seq, '\n');
Stephen Hemminger187b5182008-01-12 20:55:55 -08001959 seq_printf(seq, "\tPointers: %u\n", pointers);
Robert Olsson19baf832005-06-21 12:43:18 -07001960
Alexander Duyckadaf9812014-12-31 10:55:47 -08001961 bytes += sizeof(struct tnode *) * pointers;
Stephen Hemminger187b5182008-01-12 20:55:55 -08001962 seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
1963 seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024);
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08001964}
Robert Olsson19baf832005-06-21 12:43:18 -07001965
1966#ifdef CONFIG_IP_FIB_TRIE_STATS
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08001967static void trie_show_usage(struct seq_file *seq,
Alexander Duyck8274a972014-12-31 10:55:29 -08001968 const struct trie_use_stats __percpu *stats)
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08001969{
Alexander Duyck8274a972014-12-31 10:55:29 -08001970 struct trie_use_stats s = { 0 };
1971 int cpu;
1972
1973 /* loop through all of the CPUs and gather up the stats */
1974 for_each_possible_cpu(cpu) {
1975 const struct trie_use_stats *pcpu = per_cpu_ptr(stats, cpu);
1976
1977 s.gets += pcpu->gets;
1978 s.backtrack += pcpu->backtrack;
1979 s.semantic_match_passed += pcpu->semantic_match_passed;
1980 s.semantic_match_miss += pcpu->semantic_match_miss;
1981 s.null_node_hit += pcpu->null_node_hit;
1982 s.resize_node_skipped += pcpu->resize_node_skipped;
1983 }
1984
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08001985 seq_printf(seq, "\nCounters:\n---------\n");
Alexander Duyck8274a972014-12-31 10:55:29 -08001986 seq_printf(seq, "gets = %u\n", s.gets);
1987 seq_printf(seq, "backtracks = %u\n", s.backtrack);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001988 seq_printf(seq, "semantic match passed = %u\n",
Alexander Duyck8274a972014-12-31 10:55:29 -08001989 s.semantic_match_passed);
1990 seq_printf(seq, "semantic match miss = %u\n", s.semantic_match_miss);
1991 seq_printf(seq, "null node hit= %u\n", s.null_node_hit);
1992 seq_printf(seq, "skipped node resize = %u\n\n", s.resize_node_skipped);
Robert Olsson19baf832005-06-21 12:43:18 -07001993}
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08001994#endif /* CONFIG_IP_FIB_TRIE_STATS */
1995
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07001996static void fib_table_print(struct seq_file *seq, struct fib_table *tb)
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08001997{
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07001998 if (tb->tb_id == RT_TABLE_LOCAL)
1999 seq_puts(seq, "Local:\n");
2000 else if (tb->tb_id == RT_TABLE_MAIN)
2001 seq_puts(seq, "Main:\n");
2002 else
2003 seq_printf(seq, "Id %d:\n", tb->tb_id);
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002004}
Robert Olsson19baf832005-06-21 12:43:18 -07002005
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002006
Robert Olsson19baf832005-06-21 12:43:18 -07002007static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2008{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002009 struct net *net = (struct net *)seq->private;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002010 unsigned int h;
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08002011
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002012 seq_printf(seq,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002013 "Basic info: size of leaf:"
2014 " %Zd bytes, size of tnode: %Zd bytes.\n",
Alexander Duyckadaf9812014-12-31 10:55:47 -08002015 sizeof(struct tnode), sizeof(struct tnode));
Olof Johansson91b9a272005-08-09 20:24:39 -07002016
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002017 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2018 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002019 struct fib_table *tb;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002020
Sasha Levinb67bfe02013-02-27 17:06:00 -08002021 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002022 struct trie *t = (struct trie *) tb->tb_data;
2023 struct trie_stat stat;
2024
2025 if (!t)
2026 continue;
2027
2028 fib_table_print(seq, tb);
2029
2030 trie_collect_stats(t, &stat);
2031 trie_show_stats(seq, &stat);
2032#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -08002033 trie_show_usage(seq, t->stats);
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002034#endif
2035 }
2036 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002037
Robert Olsson19baf832005-06-21 12:43:18 -07002038 return 0;
2039}
2040
Robert Olsson19baf832005-06-21 12:43:18 -07002041static int fib_triestat_seq_open(struct inode *inode, struct file *file)
2042{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002043 return single_open_net(inode, file, fib_triestat_seq_show);
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002044}
2045
Arjan van de Ven9a321442007-02-12 00:55:35 -08002046static const struct file_operations fib_triestat_fops = {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002047 .owner = THIS_MODULE,
2048 .open = fib_triestat_seq_open,
2049 .read = seq_read,
2050 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002051 .release = single_release_net,
Robert Olsson19baf832005-06-21 12:43:18 -07002052};
2053
Alexander Duyckadaf9812014-12-31 10:55:47 -08002054static struct tnode *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
Robert Olsson19baf832005-06-21 12:43:18 -07002055{
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002056 struct fib_trie_iter *iter = seq->private;
2057 struct net *net = seq_file_net(seq);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002058 loff_t idx = 0;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002059 unsigned int h;
Robert Olsson19baf832005-06-21 12:43:18 -07002060
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002061 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2062 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002063 struct fib_table *tb;
2064
Sasha Levinb67bfe02013-02-27 17:06:00 -08002065 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
Alexander Duyckadaf9812014-12-31 10:55:47 -08002066 struct tnode *n;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002067
2068 for (n = fib_trie_get_first(iter,
2069 (struct trie *) tb->tb_data);
2070 n; n = fib_trie_get_next(iter))
2071 if (pos == idx++) {
2072 iter->tb = tb;
2073 return n;
2074 }
2075 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002076 }
Robert Olsson19baf832005-06-21 12:43:18 -07002077
Robert Olsson19baf832005-06-21 12:43:18 -07002078 return NULL;
2079}
2080
2081static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -08002082 __acquires(RCU)
Robert Olsson19baf832005-06-21 12:43:18 -07002083{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002084 rcu_read_lock();
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002085 return fib_trie_get_idx(seq, *pos);
Robert Olsson19baf832005-06-21 12:43:18 -07002086}
2087
2088static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2089{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002090 struct fib_trie_iter *iter = seq->private;
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002091 struct net *net = seq_file_net(seq);
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002092 struct fib_table *tb = iter->tb;
2093 struct hlist_node *tb_node;
2094 unsigned int h;
Alexander Duyckadaf9812014-12-31 10:55:47 -08002095 struct tnode *n;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002096
Robert Olsson19baf832005-06-21 12:43:18 -07002097 ++*pos;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002098 /* next node in same table */
2099 n = fib_trie_get_next(iter);
2100 if (n)
2101 return n;
Olof Johansson91b9a272005-08-09 20:24:39 -07002102
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002103 /* walk rest of this hash chain */
2104 h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
Eric Dumazet0a5c0472011-03-31 01:51:35 -07002105 while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) {
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002106 tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
2107 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2108 if (n)
2109 goto found;
2110 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002111
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002112 /* new hash chain */
2113 while (++h < FIB_TABLE_HASHSZ) {
2114 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
Sasha Levinb67bfe02013-02-27 17:06:00 -08002115 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002116 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2117 if (n)
2118 goto found;
2119 }
2120 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002121 return NULL;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002122
2123found:
2124 iter->tb = tb;
2125 return n;
Robert Olsson19baf832005-06-21 12:43:18 -07002126}
2127
2128static void fib_trie_seq_stop(struct seq_file *seq, void *v)
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -08002129 __releases(RCU)
Robert Olsson19baf832005-06-21 12:43:18 -07002130{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002131 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07002132}
2133
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002134static void seq_indent(struct seq_file *seq, int n)
2135{
Eric Dumazeta034ee32010-09-09 23:32:28 +00002136 while (n-- > 0)
2137 seq_puts(seq, " ");
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002138}
Robert Olsson19baf832005-06-21 12:43:18 -07002139
Eric Dumazet28d36e32008-01-14 23:09:56 -08002140static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002141{
Stephen Hemminger132adf52007-03-08 20:44:43 -08002142 switch (s) {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002143 case RT_SCOPE_UNIVERSE: return "universe";
2144 case RT_SCOPE_SITE: return "site";
2145 case RT_SCOPE_LINK: return "link";
2146 case RT_SCOPE_HOST: return "host";
2147 case RT_SCOPE_NOWHERE: return "nowhere";
2148 default:
Eric Dumazet28d36e32008-01-14 23:09:56 -08002149 snprintf(buf, len, "scope=%d", s);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002150 return buf;
2151 }
2152}
2153
Jan Engelhardt36cbd3d2009-08-05 10:42:58 -07002154static const char *const rtn_type_names[__RTN_MAX] = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002155 [RTN_UNSPEC] = "UNSPEC",
2156 [RTN_UNICAST] = "UNICAST",
2157 [RTN_LOCAL] = "LOCAL",
2158 [RTN_BROADCAST] = "BROADCAST",
2159 [RTN_ANYCAST] = "ANYCAST",
2160 [RTN_MULTICAST] = "MULTICAST",
2161 [RTN_BLACKHOLE] = "BLACKHOLE",
2162 [RTN_UNREACHABLE] = "UNREACHABLE",
2163 [RTN_PROHIBIT] = "PROHIBIT",
2164 [RTN_THROW] = "THROW",
2165 [RTN_NAT] = "NAT",
2166 [RTN_XRESOLVE] = "XRESOLVE",
2167};
2168
Eric Dumazeta034ee32010-09-09 23:32:28 +00002169static inline const char *rtn_type(char *buf, size_t len, unsigned int t)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002170{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002171 if (t < __RTN_MAX && rtn_type_names[t])
2172 return rtn_type_names[t];
Eric Dumazet28d36e32008-01-14 23:09:56 -08002173 snprintf(buf, len, "type %u", t);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002174 return buf;
2175}
2176
2177/* Pretty print the trie */
Robert Olsson19baf832005-06-21 12:43:18 -07002178static int fib_trie_seq_show(struct seq_file *seq, void *v)
2179{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002180 const struct fib_trie_iter *iter = seq->private;
Alexander Duyckadaf9812014-12-31 10:55:47 -08002181 struct tnode *n = v;
Robert Olsson19baf832005-06-21 12:43:18 -07002182
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002183 if (!node_parent_rcu(n))
2184 fib_table_print(seq, iter->tb);
Robert Olsson095b8502007-01-26 19:06:01 -08002185
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002186 if (IS_TNODE(n)) {
Alexander Duyckadaf9812014-12-31 10:55:47 -08002187 __be32 prf = htonl(n->key);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002188
Alexander Duyckadaf9812014-12-31 10:55:47 -08002189 seq_indent(seq, iter->depth - 1);
Harvey Harrison673d57e2008-10-31 00:53:57 -07002190 seq_printf(seq, " +-- %pI4/%d %d %d %d\n",
Alexander Duyckadaf9812014-12-31 10:55:47 -08002191 &prf, n->pos, n->bits, n->full_children,
2192 n->empty_children);
Olof Johansson91b9a272005-08-09 20:24:39 -07002193 } else {
Stephen Hemminger13280422008-01-22 21:54:37 -08002194 struct leaf_info *li;
Alexander Duyckadaf9812014-12-31 10:55:47 -08002195 __be32 val = htonl(n->key);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002196
2197 seq_indent(seq, iter->depth);
Harvey Harrison673d57e2008-10-31 00:53:57 -07002198 seq_printf(seq, " |-- %pI4\n", &val);
Eric Dumazet28d36e32008-01-14 23:09:56 -08002199
Alexander Duyckadaf9812014-12-31 10:55:47 -08002200 hlist_for_each_entry_rcu(li, &n->list, hlist) {
Stephen Hemminger13280422008-01-22 21:54:37 -08002201 struct fib_alias *fa;
Eric Dumazet28d36e32008-01-14 23:09:56 -08002202
Stephen Hemminger13280422008-01-22 21:54:37 -08002203 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
2204 char buf1[32], buf2[32];
Eric Dumazet28d36e32008-01-14 23:09:56 -08002205
Stephen Hemminger13280422008-01-22 21:54:37 -08002206 seq_indent(seq, iter->depth+1);
2207 seq_printf(seq, " /%d %s %s", li->plen,
2208 rtn_scope(buf1, sizeof(buf1),
David S. Miller37e826c2011-03-24 18:06:47 -07002209 fa->fa_info->fib_scope),
Stephen Hemminger13280422008-01-22 21:54:37 -08002210 rtn_type(buf2, sizeof(buf2),
2211 fa->fa_type));
2212 if (fa->fa_tos)
Denis V. Lunevb9c4d822008-02-05 02:58:45 -08002213 seq_printf(seq, " tos=%d", fa->fa_tos);
Stephen Hemminger13280422008-01-22 21:54:37 -08002214 seq_putc(seq, '\n');
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002215 }
2216 }
Robert Olsson19baf832005-06-21 12:43:18 -07002217 }
2218
2219 return 0;
2220}
2221
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002222static const struct seq_operations fib_trie_seq_ops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002223 .start = fib_trie_seq_start,
2224 .next = fib_trie_seq_next,
2225 .stop = fib_trie_seq_stop,
2226 .show = fib_trie_seq_show,
Robert Olsson19baf832005-06-21 12:43:18 -07002227};
2228
2229static int fib_trie_seq_open(struct inode *inode, struct file *file)
2230{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002231 return seq_open_net(inode, file, &fib_trie_seq_ops,
2232 sizeof(struct fib_trie_iter));
Robert Olsson19baf832005-06-21 12:43:18 -07002233}
2234
Arjan van de Ven9a321442007-02-12 00:55:35 -08002235static const struct file_operations fib_trie_fops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002236 .owner = THIS_MODULE,
2237 .open = fib_trie_seq_open,
2238 .read = seq_read,
2239 .llseek = seq_lseek,
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002240 .release = seq_release_net,
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002241};
2242
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002243struct fib_route_iter {
2244 struct seq_net_private p;
2245 struct trie *main_trie;
2246 loff_t pos;
2247 t_key key;
2248};
2249
Alexander Duyckadaf9812014-12-31 10:55:47 -08002250static struct tnode *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos)
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002251{
Alexander Duyckadaf9812014-12-31 10:55:47 -08002252 struct tnode *l = NULL;
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002253 struct trie *t = iter->main_trie;
2254
2255 /* use cache location of last found key */
2256 if (iter->pos > 0 && pos >= iter->pos && (l = fib_find_node(t, iter->key)))
2257 pos -= iter->pos;
2258 else {
2259 iter->pos = 0;
2260 l = trie_firstleaf(t);
2261 }
2262
2263 while (l && pos-- > 0) {
2264 iter->pos++;
2265 l = trie_nextleaf(l);
2266 }
2267
2268 if (l)
2269 iter->key = pos; /* remember it */
2270 else
2271 iter->pos = 0; /* forget it */
2272
2273 return l;
2274}
2275
2276static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
2277 __acquires(RCU)
2278{
2279 struct fib_route_iter *iter = seq->private;
2280 struct fib_table *tb;
2281
2282 rcu_read_lock();
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002283 tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002284 if (!tb)
2285 return NULL;
2286
2287 iter->main_trie = (struct trie *) tb->tb_data;
2288 if (*pos == 0)
2289 return SEQ_START_TOKEN;
2290 else
2291 return fib_route_get_idx(iter, *pos - 1);
2292}
2293
2294static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2295{
2296 struct fib_route_iter *iter = seq->private;
Alexander Duyckadaf9812014-12-31 10:55:47 -08002297 struct tnode *l = v;
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002298
2299 ++*pos;
2300 if (v == SEQ_START_TOKEN) {
2301 iter->pos = 0;
2302 l = trie_firstleaf(iter->main_trie);
2303 } else {
2304 iter->pos++;
2305 l = trie_nextleaf(l);
2306 }
2307
2308 if (l)
2309 iter->key = l->key;
2310 else
2311 iter->pos = 0;
2312 return l;
2313}
2314
2315static void fib_route_seq_stop(struct seq_file *seq, void *v)
2316 __releases(RCU)
2317{
2318 rcu_read_unlock();
2319}
2320
Eric Dumazeta034ee32010-09-09 23:32:28 +00002321static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002322{
Eric Dumazeta034ee32010-09-09 23:32:28 +00002323 unsigned int flags = 0;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002324
Eric Dumazeta034ee32010-09-09 23:32:28 +00002325 if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT)
2326 flags = RTF_REJECT;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002327 if (fi && fi->fib_nh->nh_gw)
2328 flags |= RTF_GATEWAY;
Al Viro32ab5f82006-09-26 22:21:45 -07002329 if (mask == htonl(0xFFFFFFFF))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002330 flags |= RTF_HOST;
2331 flags |= RTF_UP;
2332 return flags;
2333}
2334
2335/*
2336 * This outputs /proc/net/route.
2337 * The format of the file is not supposed to be changed
Eric Dumazeta034ee32010-09-09 23:32:28 +00002338 * and needs to be same as fib_hash output to avoid breaking
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002339 * legacy utilities
2340 */
2341static int fib_route_seq_show(struct seq_file *seq, void *v)
2342{
Alexander Duyckadaf9812014-12-31 10:55:47 -08002343 struct tnode *l = v;
Stephen Hemminger13280422008-01-22 21:54:37 -08002344 struct leaf_info *li;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002345
2346 if (v == SEQ_START_TOKEN) {
2347 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
2348 "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
2349 "\tWindow\tIRTT");
2350 return 0;
2351 }
2352
Sasha Levinb67bfe02013-02-27 17:06:00 -08002353 hlist_for_each_entry_rcu(li, &l->list, hlist) {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002354 struct fib_alias *fa;
Al Viro32ab5f82006-09-26 22:21:45 -07002355 __be32 mask, prefix;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002356
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002357 mask = inet_make_mask(li->plen);
2358 prefix = htonl(l->key);
2359
2360 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
Herbert Xu1371e372005-10-15 09:42:39 +10002361 const struct fib_info *fi = fa->fa_info;
Eric Dumazeta034ee32010-09-09 23:32:28 +00002362 unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002363
2364 if (fa->fa_type == RTN_BROADCAST
2365 || fa->fa_type == RTN_MULTICAST)
2366 continue;
2367
Tetsuo Handa652586d2013-11-14 14:31:57 -08002368 seq_setwidth(seq, 127);
2369
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002370 if (fi)
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002371 seq_printf(seq,
2372 "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
Tetsuo Handa652586d2013-11-14 14:31:57 -08002373 "%d\t%08X\t%d\t%u\t%u",
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002374 fi->fib_dev ? fi->fib_dev->name : "*",
2375 prefix,
2376 fi->fib_nh->nh_gw, flags, 0, 0,
2377 fi->fib_priority,
2378 mask,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002379 (fi->fib_advmss ?
2380 fi->fib_advmss + 40 : 0),
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002381 fi->fib_window,
Tetsuo Handa652586d2013-11-14 14:31:57 -08002382 fi->fib_rtt >> 3);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002383 else
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002384 seq_printf(seq,
2385 "*\t%08X\t%08X\t%04X\t%d\t%u\t"
Tetsuo Handa652586d2013-11-14 14:31:57 -08002386 "%d\t%08X\t%d\t%u\t%u",
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002387 prefix, 0, flags, 0, 0, 0,
Tetsuo Handa652586d2013-11-14 14:31:57 -08002388 mask, 0, 0, 0);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002389
Tetsuo Handa652586d2013-11-14 14:31:57 -08002390 seq_pad(seq, '\n');
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002391 }
2392 }
2393
2394 return 0;
2395}
2396
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002397static const struct seq_operations fib_route_seq_ops = {
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002398 .start = fib_route_seq_start,
2399 .next = fib_route_seq_next,
2400 .stop = fib_route_seq_stop,
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002401 .show = fib_route_seq_show,
2402};
2403
2404static int fib_route_seq_open(struct inode *inode, struct file *file)
2405{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002406 return seq_open_net(inode, file, &fib_route_seq_ops,
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002407 sizeof(struct fib_route_iter));
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002408}
2409
Arjan van de Ven9a321442007-02-12 00:55:35 -08002410static const struct file_operations fib_route_fops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002411 .owner = THIS_MODULE,
2412 .open = fib_route_seq_open,
2413 .read = seq_read,
2414 .llseek = seq_lseek,
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002415 .release = seq_release_net,
Robert Olsson19baf832005-06-21 12:43:18 -07002416};
2417
Denis V. Lunev61a02652008-01-10 03:21:09 -08002418int __net_init fib_proc_init(struct net *net)
Robert Olsson19baf832005-06-21 12:43:18 -07002419{
Gao fengd4beaa62013-02-18 01:34:54 +00002420 if (!proc_create("fib_trie", S_IRUGO, net->proc_net, &fib_trie_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002421 goto out1;
2422
Gao fengd4beaa62013-02-18 01:34:54 +00002423 if (!proc_create("fib_triestat", S_IRUGO, net->proc_net,
2424 &fib_triestat_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002425 goto out2;
2426
Gao fengd4beaa62013-02-18 01:34:54 +00002427 if (!proc_create("route", S_IRUGO, net->proc_net, &fib_route_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002428 goto out3;
2429
Robert Olsson19baf832005-06-21 12:43:18 -07002430 return 0;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002431
2432out3:
Gao fengece31ff2013-02-18 01:34:56 +00002433 remove_proc_entry("fib_triestat", net->proc_net);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002434out2:
Gao fengece31ff2013-02-18 01:34:56 +00002435 remove_proc_entry("fib_trie", net->proc_net);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002436out1:
2437 return -ENOMEM;
Robert Olsson19baf832005-06-21 12:43:18 -07002438}
2439
Denis V. Lunev61a02652008-01-10 03:21:09 -08002440void __net_exit fib_proc_exit(struct net *net)
Robert Olsson19baf832005-06-21 12:43:18 -07002441{
Gao fengece31ff2013-02-18 01:34:56 +00002442 remove_proc_entry("fib_trie", net->proc_net);
2443 remove_proc_entry("fib_triestat", net->proc_net);
2444 remove_proc_entry("route", net->proc_net);
Robert Olsson19baf832005-06-21 12:43:18 -07002445}
2446
2447#endif /* CONFIG_PROC_FS */