blob: dd55e5c26f468f71518cc5956f4b84480af5d9c8 [file] [log] [blame]
Jason A. Donenfelde7096c12019-12-09 00:27:34 +01001// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
4 */
5
6#include "ratelimiter.h"
7#include <linux/siphash.h>
8#include <linux/mm.h>
9#include <linux/slab.h>
10#include <net/ip.h>
11
12static struct kmem_cache *entry_cache;
13static hsiphash_key_t key;
14static spinlock_t table_lock = __SPIN_LOCK_UNLOCKED("ratelimiter_table_lock");
15static DEFINE_MUTEX(init_lock);
16static u64 init_refcnt; /* Protected by init_lock, hence not atomic. */
17static atomic_t total_entries = ATOMIC_INIT(0);
18static unsigned int max_entries, table_size;
19static void wg_ratelimiter_gc_entries(struct work_struct *);
20static DECLARE_DEFERRABLE_WORK(gc_work, wg_ratelimiter_gc_entries);
21static struct hlist_head *table_v4;
22#if IS_ENABLED(CONFIG_IPV6)
23static struct hlist_head *table_v6;
24#endif
25
26struct ratelimiter_entry {
27 u64 last_time_ns, tokens, ip;
28 void *net;
29 spinlock_t lock;
30 struct hlist_node hash;
31 struct rcu_head rcu;
32};
33
34enum {
35 PACKETS_PER_SECOND = 20,
36 PACKETS_BURSTABLE = 5,
37 PACKET_COST = NSEC_PER_SEC / PACKETS_PER_SECOND,
38 TOKEN_MAX = PACKET_COST * PACKETS_BURSTABLE
39};
40
41static void entry_free(struct rcu_head *rcu)
42{
43 kmem_cache_free(entry_cache,
44 container_of(rcu, struct ratelimiter_entry, rcu));
45 atomic_dec(&total_entries);
46}
47
48static void entry_uninit(struct ratelimiter_entry *entry)
49{
50 hlist_del_rcu(&entry->hash);
51 call_rcu(&entry->rcu, entry_free);
52}
53
54/* Calling this function with a NULL work uninits all entries. */
55static void wg_ratelimiter_gc_entries(struct work_struct *work)
56{
57 const u64 now = ktime_get_coarse_boottime_ns();
58 struct ratelimiter_entry *entry;
59 struct hlist_node *temp;
60 unsigned int i;
61
62 for (i = 0; i < table_size; ++i) {
63 spin_lock(&table_lock);
64 hlist_for_each_entry_safe(entry, temp, &table_v4[i], hash) {
65 if (unlikely(!work) ||
66 now - entry->last_time_ns > NSEC_PER_SEC)
67 entry_uninit(entry);
68 }
69#if IS_ENABLED(CONFIG_IPV6)
70 hlist_for_each_entry_safe(entry, temp, &table_v6[i], hash) {
71 if (unlikely(!work) ||
72 now - entry->last_time_ns > NSEC_PER_SEC)
73 entry_uninit(entry);
74 }
75#endif
76 spin_unlock(&table_lock);
77 if (likely(work))
78 cond_resched();
79 }
80 if (likely(work))
81 queue_delayed_work(system_power_efficient_wq, &gc_work, HZ);
82}
83
84bool wg_ratelimiter_allow(struct sk_buff *skb, struct net *net)
85{
86 /* We only take the bottom half of the net pointer, so that we can hash
87 * 3 words in the end. This way, siphash's len param fits into the final
88 * u32, and we don't incur an extra round.
89 */
90 const u32 net_word = (unsigned long)net;
91 struct ratelimiter_entry *entry;
92 struct hlist_head *bucket;
93 u64 ip;
94
95 if (skb->protocol == htons(ETH_P_IP)) {
96 ip = (u64 __force)ip_hdr(skb)->saddr;
97 bucket = &table_v4[hsiphash_2u32(net_word, ip, &key) &
98 (table_size - 1)];
99 }
100#if IS_ENABLED(CONFIG_IPV6)
101 else if (skb->protocol == htons(ETH_P_IPV6)) {
102 /* Only use 64 bits, so as to ratelimit the whole /64. */
103 memcpy(&ip, &ipv6_hdr(skb)->saddr, sizeof(ip));
104 bucket = &table_v6[hsiphash_3u32(net_word, ip >> 32, ip, &key) &
105 (table_size - 1)];
106 }
107#endif
108 else
109 return false;
110 rcu_read_lock();
111 hlist_for_each_entry_rcu(entry, bucket, hash) {
112 if (entry->net == net && entry->ip == ip) {
113 u64 now, tokens;
114 bool ret;
115 /* Quasi-inspired by nft_limit.c, but this is actually a
116 * slightly different algorithm. Namely, we incorporate
117 * the burst as part of the maximum tokens, rather than
118 * as part of the rate.
119 */
120 spin_lock(&entry->lock);
121 now = ktime_get_coarse_boottime_ns();
122 tokens = min_t(u64, TOKEN_MAX,
123 entry->tokens + now -
124 entry->last_time_ns);
125 entry->last_time_ns = now;
126 ret = tokens >= PACKET_COST;
127 entry->tokens = ret ? tokens - PACKET_COST : tokens;
128 spin_unlock(&entry->lock);
129 rcu_read_unlock();
130 return ret;
131 }
132 }
133 rcu_read_unlock();
134
135 if (atomic_inc_return(&total_entries) > max_entries)
136 goto err_oom;
137
138 entry = kmem_cache_alloc(entry_cache, GFP_KERNEL);
139 if (unlikely(!entry))
140 goto err_oom;
141
142 entry->net = net;
143 entry->ip = ip;
144 INIT_HLIST_NODE(&entry->hash);
145 spin_lock_init(&entry->lock);
146 entry->last_time_ns = ktime_get_coarse_boottime_ns();
147 entry->tokens = TOKEN_MAX - PACKET_COST;
148 spin_lock(&table_lock);
149 hlist_add_head_rcu(&entry->hash, bucket);
150 spin_unlock(&table_lock);
151 return true;
152
153err_oom:
154 atomic_dec(&total_entries);
155 return false;
156}
157
158int wg_ratelimiter_init(void)
159{
160 mutex_lock(&init_lock);
161 if (++init_refcnt != 1)
162 goto out;
163
164 entry_cache = KMEM_CACHE(ratelimiter_entry, 0);
165 if (!entry_cache)
166 goto err;
167
168 /* xt_hashlimit.c uses a slightly different algorithm for ratelimiting,
169 * but what it shares in common is that it uses a massive hashtable. So,
170 * we borrow their wisdom about good table sizes on different systems
171 * dependent on RAM. This calculation here comes from there.
172 */
173 table_size = (totalram_pages() > (1U << 30) / PAGE_SIZE) ? 8192 :
174 max_t(unsigned long, 16, roundup_pow_of_two(
175 (totalram_pages() << PAGE_SHIFT) /
176 (1U << 14) / sizeof(struct hlist_head)));
177 max_entries = table_size * 8;
178
Gustavo A. R. Silva4e3fd722021-11-29 10:39:28 -0500179 table_v4 = kvcalloc(table_size, sizeof(*table_v4), GFP_KERNEL);
Jason A. Donenfelde7096c12019-12-09 00:27:34 +0100180 if (unlikely(!table_v4))
181 goto err_kmemcache;
182
183#if IS_ENABLED(CONFIG_IPV6)
Gustavo A. R. Silva4e3fd722021-11-29 10:39:28 -0500184 table_v6 = kvcalloc(table_size, sizeof(*table_v6), GFP_KERNEL);
Jason A. Donenfelde7096c12019-12-09 00:27:34 +0100185 if (unlikely(!table_v6)) {
186 kvfree(table_v4);
187 goto err_kmemcache;
188 }
189#endif
190
191 queue_delayed_work(system_power_efficient_wq, &gc_work, HZ);
192 get_random_bytes(&key, sizeof(key));
193out:
194 mutex_unlock(&init_lock);
195 return 0;
196
197err_kmemcache:
198 kmem_cache_destroy(entry_cache);
199err:
200 --init_refcnt;
201 mutex_unlock(&init_lock);
202 return -ENOMEM;
203}
204
205void wg_ratelimiter_uninit(void)
206{
207 mutex_lock(&init_lock);
208 if (!init_refcnt || --init_refcnt)
209 goto out;
210
211 cancel_delayed_work_sync(&gc_work);
212 wg_ratelimiter_gc_entries(NULL);
213 rcu_barrier();
214 kvfree(table_v4);
215#if IS_ENABLED(CONFIG_IPV6)
216 kvfree(table_v6);
217#endif
218 kmem_cache_destroy(entry_cache);
219out:
220 mutex_unlock(&init_lock);
221}
222
223#include "selftest/ratelimiter.c"