Blame - drivers/net/wireguard/ratelimiter.c - SHIFTPHONES/kernel/common

blob: 3fedd1d21f5ee019917a7280294cfc7398e65b11 [file] [log] [blame]

Jason A. Donenfeld	e7096c1	2019-12-09 00:27:34 +0100	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2	/*
				3	* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
				4	*/
				5
				6	#include "ratelimiter.h"
				7	#include <linux/siphash.h>
				8	#include <linux/mm.h>
				9	#include <linux/slab.h>
				10	#include <net/ip.h>
				11
				12	static struct kmem_cache *entry_cache;
				13	static hsiphash_key_t key;
				14	static spinlock_t table_lock = __SPIN_LOCK_UNLOCKED("ratelimiter_table_lock");
				15	static DEFINE_MUTEX(init_lock);
				16	static u64 init_refcnt; /* Protected by init_lock, hence not atomic. */
				17	static atomic_t total_entries = ATOMIC_INIT(0);
				18	static unsigned int max_entries, table_size;
				19	static void wg_ratelimiter_gc_entries(struct work_struct *);
				20	static DECLARE_DEFERRABLE_WORK(gc_work, wg_ratelimiter_gc_entries);
				21	static struct hlist_head *table_v4;
				22	#if IS_ENABLED(CONFIG_IPV6)
				23	static struct hlist_head *table_v6;
				24	#endif
				25
				26	struct ratelimiter_entry {
				27	u64 last_time_ns, tokens, ip;
				28	void *net;
				29	spinlock_t lock;
				30	struct hlist_node hash;
				31	struct rcu_head rcu;
				32	};
				33
				34	enum {
				35	PACKETS_PER_SECOND = 20,
				36	PACKETS_BURSTABLE = 5,
				37	PACKET_COST = NSEC_PER_SEC / PACKETS_PER_SECOND,
				38	TOKEN_MAX = PACKET_COST * PACKETS_BURSTABLE
				39	};
				40
				41	static void entry_free(struct rcu_head *rcu)
				42	{
				43	kmem_cache_free(entry_cache,
				44	container_of(rcu, struct ratelimiter_entry, rcu));
				45	atomic_dec(&total_entries);
				46	}
				47
				48	static void entry_uninit(struct ratelimiter_entry *entry)
				49	{
				50	hlist_del_rcu(&entry->hash);
				51	call_rcu(&entry->rcu, entry_free);
				52	}
				53
				54	/* Calling this function with a NULL work uninits all entries. */
				55	static void wg_ratelimiter_gc_entries(struct work_struct *work)
				56	{
				57	const u64 now = ktime_get_coarse_boottime_ns();
				58	struct ratelimiter_entry *entry;
				59	struct hlist_node *temp;
				60	unsigned int i;
				61
				62	for (i = 0; i < table_size; ++i) {
				63	spin_lock(&table_lock);
				64	hlist_for_each_entry_safe(entry, temp, &table_v4[i], hash) {
				65	if (unlikely(!work) \|\|
				66	now - entry->last_time_ns > NSEC_PER_SEC)
				67	entry_uninit(entry);
				68	}
				69	#if IS_ENABLED(CONFIG_IPV6)
				70	hlist_for_each_entry_safe(entry, temp, &table_v6[i], hash) {
				71	if (unlikely(!work) \|\|
				72	now - entry->last_time_ns > NSEC_PER_SEC)
				73	entry_uninit(entry);
				74	}
				75	#endif
				76	spin_unlock(&table_lock);
				77	if (likely(work))
				78	cond_resched();
				79	}
				80	if (likely(work))
				81	queue_delayed_work(system_power_efficient_wq, &gc_work, HZ);
				82	}
				83
				84	bool wg_ratelimiter_allow(struct sk_buff skb, struct net net)
				85	{
				86	/* We only take the bottom half of the net pointer, so that we can hash
				87	* 3 words in the end. This way, siphash's len param fits into the final
				88	* u32, and we don't incur an extra round.
				89	*/
				90	const u32 net_word = (unsigned long)net;
				91	struct ratelimiter_entry *entry;
				92	struct hlist_head *bucket;
				93	u64 ip;
				94
				95	if (skb->protocol == htons(ETH_P_IP)) {
				96	ip = (u64 __force)ip_hdr(skb)->saddr;
				97	bucket = &table_v4[hsiphash_2u32(net_word, ip, &key) &
				98	(table_size - 1)];
				99	}
				100	#if IS_ENABLED(CONFIG_IPV6)
				101	else if (skb->protocol == htons(ETH_P_IPV6)) {
				102	/* Only use 64 bits, so as to ratelimit the whole /64. */
				103	memcpy(&ip, &ipv6_hdr(skb)->saddr, sizeof(ip));
				104	bucket = &table_v6[hsiphash_3u32(net_word, ip >> 32, ip, &key) &
				105	(table_size - 1)];
				106	}
				107	#endif
				108	else
				109	return false;
				110	rcu_read_lock();
				111	hlist_for_each_entry_rcu(entry, bucket, hash) {
				112	if (entry->net == net && entry->ip == ip) {
				113	u64 now, tokens;
				114	bool ret;
				115	/* Quasi-inspired by nft_limit.c, but this is actually a
				116	* slightly different algorithm. Namely, we incorporate
				117	* the burst as part of the maximum tokens, rather than
				118	* as part of the rate.
				119	*/
				120	spin_lock(&entry->lock);
				121	now = ktime_get_coarse_boottime_ns();
				122	tokens = min_t(u64, TOKEN_MAX,
				123	entry->tokens + now -
				124	entry->last_time_ns);
				125	entry->last_time_ns = now;
				126	ret = tokens >= PACKET_COST;
				127	entry->tokens = ret ? tokens - PACKET_COST : tokens;
				128	spin_unlock(&entry->lock);
				129	rcu_read_unlock();
				130	return ret;
				131	}
				132	}
				133	rcu_read_unlock();
				134
				135	if (atomic_inc_return(&total_entries) > max_entries)
				136	goto err_oom;
				137
				138	entry = kmem_cache_alloc(entry_cache, GFP_KERNEL);
				139	if (unlikely(!entry))
				140	goto err_oom;
				141
				142	entry->net = net;
				143	entry->ip = ip;
				144	INIT_HLIST_NODE(&entry->hash);
				145	spin_lock_init(&entry->lock);
				146	entry->last_time_ns = ktime_get_coarse_boottime_ns();
				147	entry->tokens = TOKEN_MAX - PACKET_COST;
				148	spin_lock(&table_lock);
				149	hlist_add_head_rcu(&entry->hash, bucket);
				150	spin_unlock(&table_lock);
				151	return true;
				152
				153	err_oom:
				154	atomic_dec(&total_entries);
				155	return false;
				156	}
				157
				158	int wg_ratelimiter_init(void)
				159	{
				160	mutex_lock(&init_lock);
				161	if (++init_refcnt != 1)
				162	goto out;
				163
				164	entry_cache = KMEM_CACHE(ratelimiter_entry, 0);
				165	if (!entry_cache)
				166	goto err;
				167
				168	/* xt_hashlimit.c uses a slightly different algorithm for ratelimiting,
				169	* but what it shares in common is that it uses a massive hashtable. So,
				170	* we borrow their wisdom about good table sizes on different systems
				171	* dependent on RAM. This calculation here comes from there.
				172	*/
				173	table_size = (totalram_pages() > (1U << 30) / PAGE_SIZE) ? 8192 :
				174	max_t(unsigned long, 16, roundup_pow_of_two(
				175	(totalram_pages() << PAGE_SHIFT) /
				176	(1U << 14) / sizeof(struct hlist_head)));
				177	max_entries = table_size * 8;
				178
				179	table_v4 = kvzalloc(table_size * sizeof(*table_v4), GFP_KERNEL);
				180	if (unlikely(!table_v4))
				181	goto err_kmemcache;
				182
				183	#if IS_ENABLED(CONFIG_IPV6)
				184	table_v6 = kvzalloc(table_size * sizeof(*table_v6), GFP_KERNEL);
				185	if (unlikely(!table_v6)) {
				186	kvfree(table_v4);
				187	goto err_kmemcache;
				188	}
				189	#endif
				190
				191	queue_delayed_work(system_power_efficient_wq, &gc_work, HZ);
				192	get_random_bytes(&key, sizeof(key));
				193	out:
				194	mutex_unlock(&init_lock);
				195	return 0;
				196
				197	err_kmemcache:
				198	kmem_cache_destroy(entry_cache);
				199	err:
				200	--init_refcnt;
				201	mutex_unlock(&init_lock);
				202	return -ENOMEM;
				203	}
				204
				205	void wg_ratelimiter_uninit(void)
				206	{
				207	mutex_lock(&init_lock);
				208	if (!init_refcnt \|\| --init_refcnt)
				209	goto out;
				210
				211	cancel_delayed_work_sync(&gc_work);
				212	wg_ratelimiter_gc_entries(NULL);
				213	rcu_barrier();
				214	kvfree(table_v4);
				215	#if IS_ENABLED(CONFIG_IPV6)
				216	kvfree(table_v6);
				217	#endif
				218	kmem_cache_destroy(entry_cache);
				219	out:
				220	mutex_unlock(&init_lock);
				221	}
				222
				223	#include "selftest/ratelimiter.c"