Blame - mm/kfence/core.c - SHIFTPHONES/mainline/linux

blob: 5ad40e3add457db1e788314e8e46eb0b2dfb4e61 [file] [log] [blame]

Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2	/*
				3	* KFENCE guarded object allocator and fault handling.
				4	*
				5	* Copyright (C) 2020, Google LLC.
				6	*/
				7
				8	#define pr_fmt(fmt) "kfence: " fmt
				9
				10	#include <linux/atomic.h>
				11	#include <linux/bug.h>
				12	#include <linux/debugfs.h>
Marco Elver	08f6b10	2021-11-05 13:45:34 -0700	[diff] [blame]	13	#include <linux/hash.h>
Marco Elver	407f1d8	2021-05-04 18:40:21 -0700	[diff] [blame]	14	#include <linux/irq_work.h>
Marco Elver	08f6b10	2021-11-05 13:45:34 -0700	[diff] [blame]	15	#include <linux/jhash.h>
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	16	#include <linux/kcsan-checks.h>
				17	#include <linux/kfence.h>
Marco Elver	9551158	2021-03-24 21:37:47 -0700	[diff] [blame]	18	#include <linux/kmemleak.h>
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	19	#include <linux/list.h>
				20	#include <linux/lockdep.h>
Marco Elver	08f6b10	2021-11-05 13:45:34 -0700	[diff] [blame]	21	#include <linux/log2.h>
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	22	#include <linux/memblock.h>
				23	#include <linux/moduleparam.h>
				24	#include <linux/random.h>
				25	#include <linux/rcupdate.h>
Marco Elver	4bbf04a	2021-09-07 19:56:21 -0700	[diff] [blame]	26	#include <linux/sched/clock.h>
Marco Elver	37c9284	2021-05-04 18:40:24 -0700	[diff] [blame]	27	#include <linux/sched/sysctl.h>
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	28	#include <linux/seq_file.h>
				29	#include <linux/slab.h>
				30	#include <linux/spinlock.h>
				31	#include <linux/string.h>
				32
				33	#include <asm/kfence.h>
				34
				35	#include "kfence.h"
				36
				37	/* Disables KFENCE on the first warning assuming an irrecoverable error. */
				38	#define KFENCE_WARN_ON(cond) \
				39	({ \
				40	const bool __cond = WARN_ON(cond); \
				41	if (unlikely(__cond)) \
				42	WRITE_ONCE(kfence_enabled, false); \
				43	__cond; \
				44	})
				45
				46	/* === Data ================================================================= */
				47
				48	static bool kfence_enabled __read_mostly;
				49
				50	static unsigned long kfence_sample_interval __read_mostly = CONFIG_KFENCE_SAMPLE_INTERVAL;
				51
				52	#ifdef MODULE_PARAM_PREFIX
				53	#undef MODULE_PARAM_PREFIX
				54	#endif
				55	#define MODULE_PARAM_PREFIX "kfence."
				56
				57	static int param_set_sample_interval(const char val, const struct kernel_param kp)
				58	{
				59	unsigned long num;
				60	int ret = kstrtoul(val, 0, &num);
				61
				62	if (ret < 0)
				63	return ret;
				64
				65	if (!num) /* Using 0 to indicate KFENCE is disabled. */
				66	WRITE_ONCE(kfence_enabled, false);
				67	else if (!READ_ONCE(kfence_enabled) && system_state != SYSTEM_BOOTING)
				68	return -EINVAL; /* Cannot (re-)enable KFENCE on-the-fly. */
				69
				70	((unsigned long )kp->arg) = num;
				71	return 0;
				72	}
				73
				74	static int param_get_sample_interval(char buffer, const struct kernel_param kp)
				75	{
				76	if (!READ_ONCE(kfence_enabled))
				77	return sprintf(buffer, "0\n");
				78
				79	return param_get_ulong(buffer, kp);
				80	}
				81
				82	static const struct kernel_param_ops sample_interval_param_ops = {
				83	.set = param_set_sample_interval,
				84	.get = param_get_sample_interval,
				85	};
				86	module_param_cb(sample_interval, &sample_interval_param_ops, &kfence_sample_interval, 0600);
				87
Marco Elver	08f6b10	2021-11-05 13:45:34 -0700	[diff] [blame]	88	/* Pool usage% threshold when currently covered allocations are skipped. */
				89	static unsigned long kfence_skip_covered_thresh __read_mostly = 75;
				90	module_param_named(skip_covered_thresh, kfence_skip_covered_thresh, ulong, 0644);
				91
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	92	/* The pool of pages used for guard pages and objects. */
				93	char *__kfence_pool __ro_after_init;
				94	EXPORT_SYMBOL(__kfence_pool); /* Export for test modules. */
				95
				96	/*
				97	* Per-object metadata, with one-to-one mapping of object metadata to
				98	* backing pages (in __kfence_pool).
				99	*/
				100	static_assert(CONFIG_KFENCE_NUM_OBJECTS > 0);
				101	struct kfence_metadata kfence_metadata[CONFIG_KFENCE_NUM_OBJECTS];
				102
				103	/* Freelist with available objects. */
				104	static struct list_head kfence_freelist = LIST_HEAD_INIT(kfence_freelist);
				105	static DEFINE_RAW_SPINLOCK(kfence_freelist_lock); /* Lock protecting freelist. */
				106
Marco Elver	07e8481	2021-11-05 13:45:46 -0700	[diff] [blame]	107	/*
				108	* The static key to set up a KFENCE allocation; or if static keys are not used
				109	* to gate allocations, to avoid a load and compare if KFENCE is disabled.
				110	*/
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	111	DEFINE_STATIC_KEY_FALSE(kfence_allocation_key);
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	112
				113	/* Gates the allocation, ensuring only one succeeds in a given period. */
				114	atomic_t kfence_allocation_gate = ATOMIC_INIT(1);
				115
Marco Elver	08f6b10	2021-11-05 13:45:34 -0700	[diff] [blame]	116	/*
				117	* A Counting Bloom filter of allocation coverage: limits currently covered
				118	* allocations of the same source filling up the pool.
				119	*
				120	* Assuming a range of 15%-85% unique allocations in the pool at any point in
				121	* time, the below parameters provide a probablity of 0.02-0.33 for false
				122	* positive hits respectively:
				123	*
				124	* P(alloc_traces) = (1 - e^(-HNUM * (alloc_traces / SIZE)) ^ HNUM
				125	*/
				126	#define ALLOC_COVERED_HNUM 2
				127	#define ALLOC_COVERED_ORDER (const_ilog2(CONFIG_KFENCE_NUM_OBJECTS) + 2)
				128	#define ALLOC_COVERED_SIZE (1 << ALLOC_COVERED_ORDER)
				129	#define ALLOC_COVERED_HNEXT(h) hash_32(h, ALLOC_COVERED_ORDER)
				130	#define ALLOC_COVERED_MASK (ALLOC_COVERED_SIZE - 1)
				131	static atomic_t alloc_covered[ALLOC_COVERED_SIZE];
				132
				133	/* Stack depth used to determine uniqueness of an allocation. */
				134	#define UNIQUE_ALLOC_STACK_DEPTH ((size_t)8)
				135
				136	/*
				137	* Randomness for stack hashes, making the same collisions across reboots and
				138	* different machines less likely.
				139	*/
				140	static u32 stack_hash_seed __ro_after_init;
				141
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	142	/* Statistics counters for debugfs. */
				143	enum kfence_counter_id {
				144	KFENCE_COUNTER_ALLOCATED,
				145	KFENCE_COUNTER_ALLOCS,
				146	KFENCE_COUNTER_FREES,
				147	KFENCE_COUNTER_ZOMBIES,
				148	KFENCE_COUNTER_BUGS,
Marco Elver	9a19aeb	2021-11-05 13:45:28 -0700	[diff] [blame]	149	KFENCE_COUNTER_SKIP_INCOMPAT,
				150	KFENCE_COUNTER_SKIP_CAPACITY,
Marco Elver	08f6b10	2021-11-05 13:45:34 -0700	[diff] [blame]	151	KFENCE_COUNTER_SKIP_COVERED,
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	152	KFENCE_COUNTER_COUNT,
				153	};
				154	static atomic_long_t counters[KFENCE_COUNTER_COUNT];
				155	static const char *const counter_names[] = {
				156	[KFENCE_COUNTER_ALLOCATED] = "currently allocated",
				157	[KFENCE_COUNTER_ALLOCS] = "total allocations",
				158	[KFENCE_COUNTER_FREES] = "total frees",
				159	[KFENCE_COUNTER_ZOMBIES] = "zombie allocations",
				160	[KFENCE_COUNTER_BUGS] = "total bugs",
Marco Elver	9a19aeb	2021-11-05 13:45:28 -0700	[diff] [blame]	161	[KFENCE_COUNTER_SKIP_INCOMPAT] = "skipped allocations (incompatible)",
				162	[KFENCE_COUNTER_SKIP_CAPACITY] = "skipped allocations (capacity)",
Marco Elver	08f6b10	2021-11-05 13:45:34 -0700	[diff] [blame]	163	[KFENCE_COUNTER_SKIP_COVERED] = "skipped allocations (covered)",
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	164	};
				165	static_assert(ARRAY_SIZE(counter_names) == KFENCE_COUNTER_COUNT);
				166
				167	/* === Internals ============================================================ */
				168
Marco Elver	08f6b10	2021-11-05 13:45:34 -0700	[diff] [blame]	169	static inline bool should_skip_covered(void)
				170	{
				171	unsigned long thresh = (CONFIG_KFENCE_NUM_OBJECTS * kfence_skip_covered_thresh) / 100;
				172
				173	return atomic_long_read(&counters[KFENCE_COUNTER_ALLOCATED]) > thresh;
				174	}
				175
				176	static u32 get_alloc_stack_hash(unsigned long *stack_entries, size_t num_entries)
				177	{
				178	num_entries = min(num_entries, UNIQUE_ALLOC_STACK_DEPTH);
				179	num_entries = filter_irq_stacks(stack_entries, num_entries);
				180	return jhash(stack_entries, num_entries * sizeof(stack_entries[0]), stack_hash_seed);
				181	}
				182
				183	/*
				184	* Adds (or subtracts) count @val for allocation stack trace hash
				185	* @alloc_stack_hash from Counting Bloom filter.
				186	*/
				187	static void alloc_covered_add(u32 alloc_stack_hash, int val)
				188	{
				189	int i;
				190
				191	for (i = 0; i < ALLOC_COVERED_HNUM; i++) {
				192	atomic_add(val, &alloc_covered[alloc_stack_hash & ALLOC_COVERED_MASK]);
				193	alloc_stack_hash = ALLOC_COVERED_HNEXT(alloc_stack_hash);
				194	}
				195	}
				196
				197	/*
				198	* Returns true if the allocation stack trace hash @alloc_stack_hash is
				199	* currently contained (non-zero count) in Counting Bloom filter.
				200	*/
				201	static bool alloc_covered_contains(u32 alloc_stack_hash)
				202	{
				203	int i;
				204
				205	for (i = 0; i < ALLOC_COVERED_HNUM; i++) {
				206	if (!atomic_read(&alloc_covered[alloc_stack_hash & ALLOC_COVERED_MASK]))
				207	return false;
				208	alloc_stack_hash = ALLOC_COVERED_HNEXT(alloc_stack_hash);
				209	}
				210
				211	return true;
				212	}
				213
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	214	static bool kfence_protect(unsigned long addr)
				215	{
				216	return !KFENCE_WARN_ON(!kfence_protect_page(ALIGN_DOWN(addr, PAGE_SIZE), true));
				217	}
				218
				219	static bool kfence_unprotect(unsigned long addr)
				220	{
				221	return !KFENCE_WARN_ON(!kfence_protect_page(ALIGN_DOWN(addr, PAGE_SIZE), false));
				222	}
				223
				224	static inline struct kfence_metadata *addr_to_metadata(unsigned long addr)
				225	{
				226	long index;
				227
				228	/* The checks do not affect performance; only called from slow-paths. */
				229
				230	if (!is_kfence_address((void *)addr))
				231	return NULL;
				232
				233	/*
				234	* May be an invalid index if called with an address at the edge of
				235	* __kfence_pool, in which case we would report an "invalid access"
				236	* error.
				237	*/
				238	index = (addr - (unsigned long)__kfence_pool) / (PAGE_SIZE * 2) - 1;
				239	if (index < 0 \|\| index >= CONFIG_KFENCE_NUM_OBJECTS)
				240	return NULL;
				241
				242	return &kfence_metadata[index];
				243	}
				244
				245	static inline unsigned long metadata_to_pageaddr(const struct kfence_metadata *meta)
				246	{
				247	unsigned long offset = (meta - kfence_metadata + 1) * PAGE_SIZE * 2;
				248	unsigned long pageaddr = (unsigned long)&__kfence_pool[offset];
				249
				250	/* The checks do not affect performance; only called from slow-paths. */
				251
				252	/* Only call with a pointer into kfence_metadata. */
				253	if (KFENCE_WARN_ON(meta < kfence_metadata \|\|
				254	meta >= kfence_metadata + CONFIG_KFENCE_NUM_OBJECTS))
				255	return 0;
				256
				257	/*
				258	* This metadata object only ever maps to 1 page; verify that the stored
				259	* address is in the expected range.
				260	*/
				261	if (KFENCE_WARN_ON(ALIGN_DOWN(meta->addr, PAGE_SIZE) != pageaddr))
				262	return 0;
				263
				264	return pageaddr;
				265	}
				266
				267	/*
				268	* Update the object's metadata state, including updating the alloc/free stacks
				269	* depending on the state transition.
				270	*/
Marco Elver	a9ab52b	2021-11-05 13:45:31 -0700	[diff] [blame]	271	static noinline void
				272	metadata_update_state(struct kfence_metadata *meta, enum kfence_object_state next,
				273	unsigned long *stack_entries, size_t num_stack_entries)
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	274	{
				275	struct kfence_track *track =
				276	next == KFENCE_OBJECT_FREED ? &meta->free_track : &meta->alloc_track;
				277
				278	lockdep_assert_held(&meta->lock);
				279
Marco Elver	a9ab52b	2021-11-05 13:45:31 -0700	[diff] [blame]	280	if (stack_entries) {
				281	memcpy(track->stack_entries, stack_entries,
				282	num_stack_entries * sizeof(stack_entries[0]));
				283	} else {
				284	/*
				285	* Skip over 1 (this) functions; noinline ensures we do not
				286	* accidentally skip over the caller by never inlining.
				287	*/
				288	num_stack_entries = stack_trace_save(track->stack_entries, KFENCE_STACK_DEPTH, 1);
				289	}
				290	track->num_stack_entries = num_stack_entries;
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	291	track->pid = task_pid_nr(current);
Marco Elver	4bbf04a	2021-09-07 19:56:21 -0700	[diff] [blame]	292	track->cpu = raw_smp_processor_id();
				293	track->ts_nsec = local_clock(); /* Same source as printk timestamps. */
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	294
				295	/*
				296	* Pairs with READ_ONCE() in
				297	* kfence_shutdown_cache(),
				298	* kfence_handle_page_fault().
				299	*/
				300	WRITE_ONCE(meta->state, next);
				301	}
				302
				303	/* Write canary byte to @addr. */
				304	static inline bool set_canary_byte(u8 *addr)
				305	{
				306	*addr = KFENCE_CANARY_PATTERN(addr);
				307	return true;
				308	}
				309
				310	/* Check canary byte at @addr. */
				311	static inline bool check_canary_byte(u8 *addr)
				312	{
Marco Elver	4933295	2021-11-05 13:45:43 -0700	[diff] [blame]	313	struct kfence_metadata *meta;
				314	unsigned long flags;
				315
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	316	if (likely(*addr == KFENCE_CANARY_PATTERN(addr)))
				317	return true;
				318
				319	atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]);
Marco Elver	4933295	2021-11-05 13:45:43 -0700	[diff] [blame]	320
				321	meta = addr_to_metadata((unsigned long)addr);
				322	raw_spin_lock_irqsave(&meta->lock, flags);
				323	kfence_report_error((unsigned long)addr, false, NULL, meta, KFENCE_ERROR_CORRUPTION);
				324	raw_spin_unlock_irqrestore(&meta->lock, flags);
				325
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	326	return false;
				327	}
				328
				329	/* __always_inline this to ensure we won't do an indirect call to fn. */
				330	static __always_inline void for_each_canary(const struct kfence_metadata meta, bool (fn)(u8 *))
				331	{
				332	const unsigned long pageaddr = ALIGN_DOWN(meta->addr, PAGE_SIZE);
				333	unsigned long addr;
				334
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	335	/*
				336	* We'll iterate over each canary byte per-side until fn() returns
				337	* false. However, we'll still iterate over the canary bytes to the
				338	* right of the object even if there was an error in the canary bytes to
				339	* the left of the object. Specifically, if check_canary_byte()
				340	* generates an error, showing both sides might give more clues as to
				341	* what the error is about when displaying which bytes were corrupted.
				342	*/
				343
				344	/* Apply to left of object. */
				345	for (addr = pageaddr; addr < meta->addr; addr++) {
				346	if (!fn((u8 *)addr))
				347	break;
				348	}
				349
				350	/* Apply to right of object. */
				351	for (addr = meta->addr + meta->size; addr < pageaddr + PAGE_SIZE; addr++) {
				352	if (!fn((u8 *)addr))
				353	break;
				354	}
				355	}
				356
Marco Elver	a9ab52b	2021-11-05 13:45:31 -0700	[diff] [blame]	357	static void kfence_guarded_alloc(struct kmem_cache cache, size_t size, gfp_t gfp,
Marco Elver	08f6b10	2021-11-05 13:45:34 -0700	[diff] [blame]	358	unsigned long *stack_entries, size_t num_stack_entries,
				359	u32 alloc_stack_hash)
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	360	{
				361	struct kfence_metadata *meta = NULL;
				362	unsigned long flags;
Vlastimil Babka	8dae0cf	2021-11-03 18:19:48 +0100	[diff] [blame]	363	struct slab *slab;
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	364	void *addr;
				365
				366	/* Try to obtain a free object. */
				367	raw_spin_lock_irqsave(&kfence_freelist_lock, flags);
				368	if (!list_empty(&kfence_freelist)) {
				369	meta = list_entry(kfence_freelist.next, struct kfence_metadata, list);
				370	list_del_init(&meta->list);
				371	}
				372	raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags);
Marco Elver	9a19aeb	2021-11-05 13:45:28 -0700	[diff] [blame]	373	if (!meta) {
				374	atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_CAPACITY]);
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	375	return NULL;
Marco Elver	9a19aeb	2021-11-05 13:45:28 -0700	[diff] [blame]	376	}
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	377
				378	if (unlikely(!raw_spin_trylock_irqsave(&meta->lock, flags))) {
				379	/*
				380	* This is extremely unlikely -- we are reporting on a
				381	* use-after-free, which locked meta->lock, and the reporting
				382	* code via printk calls kmalloc() which ends up in
				383	* kfence_alloc() and tries to grab the same object that we're
				384	* reporting on. While it has never been observed, lockdep does
				385	* report that there is a possibility of deadlock. Fix it by
				386	* using trylock and bailing out gracefully.
				387	*/
				388	raw_spin_lock_irqsave(&kfence_freelist_lock, flags);
				389	/* Put the object back on the freelist. */
				390	list_add_tail(&meta->list, &kfence_freelist);
				391	raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags);
				392
				393	return NULL;
				394	}
				395
				396	meta->addr = metadata_to_pageaddr(meta);
				397	/* Unprotect if we're reusing this page. */
				398	if (meta->state == KFENCE_OBJECT_FREED)
				399	kfence_unprotect(meta->addr);
				400
				401	/*
				402	* Note: for allocations made before RNG initialization, will always
				403	* return zero. We still benefit from enabling KFENCE as early as
				404	* possible, even when the RNG is not yet available, as this will allow
				405	* KFENCE to detect bugs due to earlier allocations. The only downside
				406	* is that the out-of-bounds accesses detected are deterministic for
				407	* such allocations.
				408	*/
				409	if (prandom_u32_max(2)) {
				410	/* Allocate on the "right" side, re-calculate address. */
				411	meta->addr += PAGE_SIZE - size;
				412	meta->addr = ALIGN_DOWN(meta->addr, cache->align);
				413	}
				414
				415	addr = (void *)meta->addr;
				416
				417	/* Update remaining metadata. */
Marco Elver	a9ab52b	2021-11-05 13:45:31 -0700	[diff] [blame]	418	metadata_update_state(meta, KFENCE_OBJECT_ALLOCATED, stack_entries, num_stack_entries);
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	419	/* Pairs with READ_ONCE() in kfence_shutdown_cache(). */
				420	WRITE_ONCE(meta->cache, cache);
				421	meta->size = size;
Marco Elver	08f6b10	2021-11-05 13:45:34 -0700	[diff] [blame]	422	meta->alloc_stack_hash = alloc_stack_hash;
Marco Elver	4933295	2021-11-05 13:45:43 -0700	[diff] [blame]	423	raw_spin_unlock_irqrestore(&meta->lock, flags);
Marco Elver	08f6b10	2021-11-05 13:45:34 -0700	[diff] [blame]	424
Marco Elver	4933295	2021-11-05 13:45:43 -0700	[diff] [blame]	425	alloc_covered_add(alloc_stack_hash, 1);
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	426
Vlastimil Babka	8dae0cf	2021-11-03 18:19:48 +0100	[diff] [blame]	427	/* Set required slab fields. */
				428	slab = virt_to_slab((void *)meta->addr);
				429	slab->slab_cache = cache;
Vlastimil Babka	401fb12	2021-11-04 11:30:58 +0100	[diff] [blame]	430	#if defined(CONFIG_SLUB)
				431	slab->objects = 1;
				432	#elif defined(CONFIG_SLAB)
				433	slab->s_mem = addr;
				434	#endif
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	435
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	436	/* Memory initialization. */
Marco Elver	4933295	2021-11-05 13:45:43 -0700	[diff] [blame]	437	for_each_canary(meta, set_canary_byte);
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	438
				439	/*
				440	* We check slab_want_init_on_alloc() ourselves, rather than letting
				441	* SL*B do the initialization, as otherwise we might overwrite KFENCE's
				442	* redzone.
				443	*/
				444	if (unlikely(slab_want_init_on_alloc(gfp, cache)))
				445	memzero_explicit(addr, size);
				446	if (cache->ctor)
				447	cache->ctor(addr);
				448
				449	if (CONFIG_KFENCE_STRESS_TEST_FAULTS && !prandom_u32_max(CONFIG_KFENCE_STRESS_TEST_FAULTS))
				450	kfence_protect(meta->addr); /* Random "faults" by protecting the object. */
				451
				452	atomic_long_inc(&counters[KFENCE_COUNTER_ALLOCATED]);
				453	atomic_long_inc(&counters[KFENCE_COUNTER_ALLOCS]);
				454
				455	return addr;
				456	}
				457
				458	static void kfence_guarded_free(void addr, struct kfence_metadata meta, bool zombie)
				459	{
				460	struct kcsan_scoped_access assert_page_exclusive;
				461	unsigned long flags;
Marco Elver	4933295	2021-11-05 13:45:43 -0700	[diff] [blame]	462	bool init;
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	463
				464	raw_spin_lock_irqsave(&meta->lock, flags);
				465
				466	if (meta->state != KFENCE_OBJECT_ALLOCATED \|\| meta->addr != (unsigned long)addr) {
				467	/* Invalid or double-free, bail out. */
				468	atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]);
Marco Elver	bc8fbc5	2021-02-25 17:19:31 -0800	[diff] [blame]	469	kfence_report_error((unsigned long)addr, false, NULL, meta,
				470	KFENCE_ERROR_INVALID_FREE);
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	471	raw_spin_unlock_irqrestore(&meta->lock, flags);
				472	return;
				473	}
				474
				475	/* Detect racy use-after-free, or incorrect reallocation of this page by KFENCE. */
				476	kcsan_begin_scoped_access((void *)ALIGN_DOWN((unsigned long)addr, PAGE_SIZE), PAGE_SIZE,
				477	KCSAN_ACCESS_SCOPED \| KCSAN_ACCESS_WRITE \| KCSAN_ACCESS_ASSERT,
				478	&assert_page_exclusive);
				479
				480	if (CONFIG_KFENCE_STRESS_TEST_FAULTS)
				481	kfence_unprotect((unsigned long)addr); /* To check canary bytes. */
				482
				483	/* Restore page protection if there was an OOB access. */
				484	if (meta->unprotected_page) {
Marco Elver	94868a1	2021-05-04 18:40:18 -0700	[diff] [blame]	485	memzero_explicit((void *)ALIGN_DOWN(meta->unprotected_page, PAGE_SIZE), PAGE_SIZE);
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	486	kfence_protect(meta->unprotected_page);
				487	meta->unprotected_page = 0;
				488	}
				489
Marco Elver	4933295	2021-11-05 13:45:43 -0700	[diff] [blame]	490	/* Mark the object as freed. */
				491	metadata_update_state(meta, KFENCE_OBJECT_FREED, NULL, 0);
				492	init = slab_want_init_on_free(meta->cache);
				493	raw_spin_unlock_irqrestore(&meta->lock, flags);
				494
				495	alloc_covered_add(meta->alloc_stack_hash, -1);
				496
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	497	/* Check canary bytes for memory corruption. */
				498	for_each_canary(meta, check_canary_byte);
				499
				500	/*
				501	* Clear memory if init-on-free is set. While we protect the page, the
				502	* data is still there, and after a use-after-free is detected, we
				503	* unprotect the page, so the data is still accessible.
				504	*/
Marco Elver	4933295	2021-11-05 13:45:43 -0700	[diff] [blame]	505	if (!zombie && unlikely(init))
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	506	memzero_explicit(addr, meta->size);
				507
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	508	/* Protect to detect use-after-frees. */
				509	kfence_protect((unsigned long)addr);
				510
				511	kcsan_end_scoped_access(&assert_page_exclusive);
				512	if (!zombie) {
				513	/* Add it to the tail of the freelist for reuse. */
				514	raw_spin_lock_irqsave(&kfence_freelist_lock, flags);
				515	KFENCE_WARN_ON(!list_empty(&meta->list));
				516	list_add_tail(&meta->list, &kfence_freelist);
				517	raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags);
				518
				519	atomic_long_dec(&counters[KFENCE_COUNTER_ALLOCATED]);
				520	atomic_long_inc(&counters[KFENCE_COUNTER_FREES]);
				521	} else {
				522	/* See kfence_shutdown_cache(). */
				523	atomic_long_inc(&counters[KFENCE_COUNTER_ZOMBIES]);
				524	}
				525	}
				526
				527	static void rcu_guarded_free(struct rcu_head *h)
				528	{
				529	struct kfence_metadata *meta = container_of(h, struct kfence_metadata, rcu_head);
				530
				531	kfence_guarded_free((void *)meta->addr, meta, false);
				532	}
				533
				534	static bool __init kfence_init_pool(void)
				535	{
				536	unsigned long addr = (unsigned long)__kfence_pool;
				537	struct page *pages;
				538	int i;
				539
				540	if (!__kfence_pool)
				541	return false;
				542
				543	if (!arch_kfence_init_pool())
				544	goto err;
				545
				546	pages = virt_to_page(addr);
				547
				548	/*
				549	* Set up object pages: they must have PG_slab set, to avoid freeing
				550	* these as real pages.
				551	*
				552	* We also want to avoid inserting kfence_free() in the kfree()
				553	* fast-path in SLUB, and therefore need to ensure kfree() correctly
				554	* enters __slab_free() slow-path.
				555	*/
				556	for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) {
				557	if (!i \|\| (i % 2))
				558	continue;
				559
				560	/* Verify we do not have a compound head page. */
				561	if (WARN_ON(compound_head(&pages[i]) != &pages[i]))
				562	goto err;
				563
				564	__SetPageSlab(&pages[i]);
				565	}
				566
				567	/*
				568	* Protect the first 2 pages. The first page is mostly unnecessary, and
				569	* merely serves as an extended guard page. However, adding one
				570	* additional page in the beginning gives us an even number of pages,
				571	* which simplifies the mapping of address to metadata index.
				572	*/
				573	for (i = 0; i < 2; i++) {
				574	if (unlikely(!kfence_protect(addr)))
				575	goto err;
				576
				577	addr += PAGE_SIZE;
				578	}
				579
				580	for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
				581	struct kfence_metadata *meta = &kfence_metadata[i];
				582
				583	/* Initialize metadata. */
				584	INIT_LIST_HEAD(&meta->list);
				585	raw_spin_lock_init(&meta->lock);
				586	meta->state = KFENCE_OBJECT_UNUSED;
				587	meta->addr = addr; /* Initialize for validation in metadata_to_pageaddr(). */
				588	list_add_tail(&meta->list, &kfence_freelist);
				589
				590	/* Protect the right redzone. */
				591	if (unlikely(!kfence_protect(addr + PAGE_SIZE)))
				592	goto err;
				593
				594	addr += 2 * PAGE_SIZE;
				595	}
				596
Marco Elver	9551158	2021-03-24 21:37:47 -0700	[diff] [blame]	597	/*
				598	* The pool is live and will never be deallocated from this point on.
				599	* Remove the pool object from the kmemleak object tree, as it would
				600	* otherwise overlap with allocations returned by kfence_alloc(), which
				601	* are registered with kmemleak through the slab post-alloc hook.
				602	*/
				603	kmemleak_free(__kfence_pool);
				604
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	605	return true;
				606
				607	err:
				608	/*
				609	* Only release unprotected pages, and do not try to go back and change
				610	* page attributes due to risk of failing to do so as well. If changing
				611	* page attributes for some pages fails, it is very likely that it also
				612	* fails for the first page, and therefore expect addr==__kfence_pool in
				613	* most failure cases.
				614	*/
				615	memblock_free_late(__pa(addr), KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool));
				616	__kfence_pool = NULL;
				617	return false;
				618	}
				619
				620	/* === DebugFS Interface ==================================================== */
				621
				622	static int stats_show(struct seq_file seq, void v)
				623	{
				624	int i;
				625
				626	seq_printf(seq, "enabled: %i\n", READ_ONCE(kfence_enabled));
				627	for (i = 0; i < KFENCE_COUNTER_COUNT; i++)
				628	seq_printf(seq, "%s: %ld\n", counter_names[i], atomic_long_read(&counters[i]));
				629
				630	return 0;
				631	}
				632	DEFINE_SHOW_ATTRIBUTE(stats);
				633
				634	/*
				635	* debugfs seq_file operations for /sys/kernel/debug/kfence/objects.
				636	* start_object() and next_object() return the object index + 1, because NULL is used
				637	* to stop iteration.
				638	*/
				639	static void start_object(struct seq_file seq, loff_t *pos)
				640	{
				641	if (*pos < CONFIG_KFENCE_NUM_OBJECTS)
				642	return (void )((long)pos + 1);
				643	return NULL;
				644	}
				645
				646	static void stop_object(struct seq_file seq, void v)
				647	{
				648	}
				649
				650	static void next_object(struct seq_file seq, void v, loff_t pos)
				651	{
				652	++*pos;
				653	if (*pos < CONFIG_KFENCE_NUM_OBJECTS)
				654	return (void )((long)pos + 1);
				655	return NULL;
				656	}
				657
				658	static int show_object(struct seq_file seq, void v)
				659	{
				660	struct kfence_metadata *meta = &kfence_metadata[(long)v - 1];
				661	unsigned long flags;
				662
				663	raw_spin_lock_irqsave(&meta->lock, flags);
				664	kfence_print_object(seq, meta);
				665	raw_spin_unlock_irqrestore(&meta->lock, flags);
				666	seq_puts(seq, "---------------------------------\n");
				667
				668	return 0;
				669	}
				670
				671	static const struct seq_operations object_seqops = {
				672	.start = start_object,
				673	.next = next_object,
				674	.stop = stop_object,
				675	.show = show_object,
				676	};
				677
				678	static int open_objects(struct inode inode, struct file file)
				679	{
				680	return seq_open(file, &object_seqops);
				681	}
				682
				683	static const struct file_operations objects_fops = {
				684	.open = open_objects,
				685	.read = seq_read,
				686	.llseek = seq_lseek,
Baokun Li	0129ab1	2021-12-24 21:12:32 -0800	[diff] [blame]	687	.release = seq_release,
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	688	};
				689
				690	static int __init kfence_debugfs_init(void)
				691	{
				692	struct dentry *kfence_dir = debugfs_create_dir("kfence", NULL);
				693
				694	debugfs_create_file("stats", 0444, kfence_dir, NULL, &stats_fops);
				695	debugfs_create_file("objects", 0400, kfence_dir, NULL, &objects_fops);
				696	return 0;
				697	}
				698
				699	late_initcall(kfence_debugfs_init);
				700
				701	/* === Allocation Gate Timer ================================================ */
				702
Marco Elver	407f1d8	2021-05-04 18:40:21 -0700	[diff] [blame]	703	#ifdef CONFIG_KFENCE_STATIC_KEYS
				704	/* Wait queue to wake up allocation-gate timer task. */
				705	static DECLARE_WAIT_QUEUE_HEAD(allocation_wait);
				706
				707	static void wake_up_kfence_timer(struct irq_work *work)
				708	{
				709	wake_up(&allocation_wait);
				710	}
				711	static DEFINE_IRQ_WORK(wake_up_kfence_timer_work, wake_up_kfence_timer);
				712	#endif
				713
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	714	/*
				715	* Set up delayed work, which will enable and disable the static key. We need to
				716	* use a work queue (rather than a simple timer), since enabling and disabling a
				717	* static key cannot be done from an interrupt.
				718	*
				719	* Note: Toggling a static branch currently causes IPIs, and here we'll end up
				720	* with a total of 2 IPIs to all CPUs. If this ends up a problem in future (with
				721	* more aggressive sampling intervals), we could get away with a variant that
				722	* avoids IPIs, at the cost of not immediately capturing allocations if the
				723	* instructions remain cached.
				724	*/
				725	static struct delayed_work kfence_timer;
				726	static void toggle_allocation_gate(struct work_struct *work)
				727	{
				728	if (!READ_ONCE(kfence_enabled))
				729	return;
				730
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	731	atomic_set(&kfence_allocation_gate, 0);
				732	#ifdef CONFIG_KFENCE_STATIC_KEYS
Marco Elver	407f1d8	2021-05-04 18:40:21 -0700	[diff] [blame]	733	/* Enable static key, and await allocation to happen. */
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	734	static_branch_enable(&kfence_allocation_key);
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	735
Marco Elver	37c9284	2021-05-04 18:40:24 -0700	[diff] [blame]	736	if (sysctl_hung_task_timeout_secs) {
				737	/*
				738	* During low activity with no allocations we might wait a
				739	* while; let's avoid the hung task warning.
				740	*/
Marco Elver	8fd0e99	2021-06-04 20:01:11 -0700	[diff] [blame]	741	wait_event_idle_timeout(allocation_wait, atomic_read(&kfence_allocation_gate),
				742	sysctl_hung_task_timeout_secs * HZ / 2);
Marco Elver	37c9284	2021-05-04 18:40:24 -0700	[diff] [blame]	743	} else {
Marco Elver	8fd0e99	2021-06-04 20:01:11 -0700	[diff] [blame]	744	wait_event_idle(allocation_wait, atomic_read(&kfence_allocation_gate));
Marco Elver	37c9284	2021-05-04 18:40:24 -0700	[diff] [blame]	745	}
Marco Elver	407f1d8	2021-05-04 18:40:21 -0700	[diff] [blame]	746
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	747	/* Disable static key and reset timer. */
				748	static_branch_disable(&kfence_allocation_key);
				749	#endif
Marco Elver	ff06e45	2021-06-30 18:54:03 -0700	[diff] [blame]	750	queue_delayed_work(system_unbound_wq, &kfence_timer,
Marco Elver	36f0b35	2021-05-04 18:40:27 -0700	[diff] [blame]	751	msecs_to_jiffies(kfence_sample_interval));
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	752	}
				753	static DECLARE_DELAYED_WORK(kfence_timer, toggle_allocation_gate);
				754
				755	/* === Public interface ===================================================== */
				756
				757	void __init kfence_alloc_pool(void)
				758	{
				759	if (!kfence_sample_interval)
				760	return;
				761
				762	__kfence_pool = memblock_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
				763
				764	if (!__kfence_pool)
				765	pr_err("failed to allocate pool\n");
				766	}
				767
				768	void __init kfence_init(void)
				769	{
				770	/* Setting kfence_sample_interval to 0 on boot disables KFENCE. */
				771	if (!kfence_sample_interval)
				772	return;
				773
Marco Elver	08f6b10	2021-11-05 13:45:34 -0700	[diff] [blame]	774	stack_hash_seed = (u32)random_get_entropy();
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	775	if (!kfence_init_pool()) {
				776	pr_err("%s failed\n", __func__);
				777	return;
				778	}
				779
Marco Elver	07e8481	2021-11-05 13:45:46 -0700	[diff] [blame]	780	if (!IS_ENABLED(CONFIG_KFENCE_STATIC_KEYS))
				781	static_branch_enable(&kfence_allocation_key);
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	782	WRITE_ONCE(kfence_enabled, true);
Marco Elver	ff06e45	2021-06-30 18:54:03 -0700	[diff] [blame]	783	queue_delayed_work(system_unbound_wq, &kfence_timer, 0);
Marco Elver	35beccf	2021-02-25 17:19:40 -0800	[diff] [blame]	784	pr_info("initialized - using %lu bytes for %d objects at 0x%p-0x%p\n", KFENCE_POOL_SIZE,
				785	CONFIG_KFENCE_NUM_OBJECTS, (void *)__kfence_pool,
				786	(void *)(__kfence_pool + KFENCE_POOL_SIZE));
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	787	}
				788
				789	void kfence_shutdown_cache(struct kmem_cache *s)
				790	{
				791	unsigned long flags;
				792	struct kfence_metadata *meta;
				793	int i;
				794
				795	for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
				796	bool in_use;
				797
				798	meta = &kfence_metadata[i];
				799
				800	/*
				801	* If we observe some inconsistent cache and state pair where we
				802	* should have returned false here, cache destruction is racing
				803	* with either kmem_cache_alloc() or kmem_cache_free(). Taking
				804	* the lock will not help, as different critical section
				805	* serialization will have the same outcome.
				806	*/
				807	if (READ_ONCE(meta->cache) != s \|\|
				808	READ_ONCE(meta->state) != KFENCE_OBJECT_ALLOCATED)
				809	continue;
				810
				811	raw_spin_lock_irqsave(&meta->lock, flags);
				812	in_use = meta->cache == s && meta->state == KFENCE_OBJECT_ALLOCATED;
				813	raw_spin_unlock_irqrestore(&meta->lock, flags);
				814
				815	if (in_use) {
				816	/*
				817	* This cache still has allocations, and we should not
				818	* release them back into the freelist so they can still
				819	* safely be used and retain the kernel's default
				820	* behaviour of keeping the allocations alive (leak the
				821	* cache); however, they effectively become "zombie
				822	* allocations" as the KFENCE objects are the only ones
				823	* still in use and the owning cache is being destroyed.
				824	*
				825	* We mark them freed, so that any subsequent use shows
				826	* more useful error messages that will include stack
				827	* traces of the user of the object, the original
				828	* allocation, and caller to shutdown_cache().
				829	*/
				830	kfence_guarded_free((void )meta->addr, meta, /zombie=*/true);
				831	}
				832	}
				833
				834	for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
				835	meta = &kfence_metadata[i];
				836
				837	/* See above. */
				838	if (READ_ONCE(meta->cache) != s \|\| READ_ONCE(meta->state) != KFENCE_OBJECT_FREED)
				839	continue;
				840
				841	raw_spin_lock_irqsave(&meta->lock, flags);
				842	if (meta->cache == s && meta->state == KFENCE_OBJECT_FREED)
				843	meta->cache = NULL;
				844	raw_spin_unlock_irqrestore(&meta->lock, flags);
				845	}
				846	}
				847
				848	void __kfence_alloc(struct kmem_cache s, size_t size, gfp_t flags)
				849	{
Marco Elver	a9ab52b	2021-11-05 13:45:31 -0700	[diff] [blame]	850	unsigned long stack_entries[KFENCE_STACK_DEPTH];
				851	size_t num_stack_entries;
Marco Elver	08f6b10	2021-11-05 13:45:34 -0700	[diff] [blame]	852	u32 alloc_stack_hash;
Marco Elver	a9ab52b	2021-11-05 13:45:31 -0700	[diff] [blame]	853
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	854	/*
Alexander Potapenko	235a85c	2021-07-23 15:50:11 -0700	[diff] [blame]	855	* Perform size check before switching kfence_allocation_gate, so that
				856	* we don't disable KFENCE without making an allocation.
				857	*/
Marco Elver	9a19aeb	2021-11-05 13:45:28 -0700	[diff] [blame]	858	if (size > PAGE_SIZE) {
				859	atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_INCOMPAT]);
Alexander Potapenko	235a85c	2021-07-23 15:50:11 -0700	[diff] [blame]	860	return NULL;
Marco Elver	9a19aeb	2021-11-05 13:45:28 -0700	[diff] [blame]	861	}
Alexander Potapenko	235a85c	2021-07-23 15:50:11 -0700	[diff] [blame]	862
				863	/*
Alexander Potapenko	236e9f1	2021-07-23 15:50:14 -0700	[diff] [blame]	864	* Skip allocations from non-default zones, including DMA. We cannot
				865	* guarantee that pages in the KFENCE pool will have the requested
				866	* properties (e.g. reside in DMAable memory).
				867	*/
				868	if ((flags & GFP_ZONEMASK) \|\|
Marco Elver	9a19aeb	2021-11-05 13:45:28 -0700	[diff] [blame]	869	(s->flags & (SLAB_CACHE_DMA \| SLAB_CACHE_DMA32))) {
				870	atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_INCOMPAT]);
Alexander Potapenko	236e9f1	2021-07-23 15:50:14 -0700	[diff] [blame]	871	return NULL;
Marco Elver	9a19aeb	2021-11-05 13:45:28 -0700	[diff] [blame]	872	}
Alexander Potapenko	236e9f1	2021-07-23 15:50:14 -0700	[diff] [blame]	873
Marco Elver	07e8481	2021-11-05 13:45:46 -0700	[diff] [blame]	874	if (atomic_inc_return(&kfence_allocation_gate) > 1)
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	875	return NULL;
Marco Elver	407f1d8	2021-05-04 18:40:21 -0700	[diff] [blame]	876	#ifdef CONFIG_KFENCE_STATIC_KEYS
				877	/*
				878	* waitqueue_active() is fully ordered after the update of
				879	* kfence_allocation_gate per atomic_inc_return().
				880	*/
				881	if (waitqueue_active(&allocation_wait)) {
				882	/*
				883	* Calling wake_up() here may deadlock when allocations happen
				884	* from within timer code. Use an irq_work to defer it.
				885	*/
				886	irq_work_queue(&wake_up_kfence_timer_work);
				887	}
				888	#endif
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	889
				890	if (!READ_ONCE(kfence_enabled))
				891	return NULL;
				892
Marco Elver	a9ab52b	2021-11-05 13:45:31 -0700	[diff] [blame]	893	num_stack_entries = stack_trace_save(stack_entries, KFENCE_STACK_DEPTH, 0);
				894
Marco Elver	08f6b10	2021-11-05 13:45:34 -0700	[diff] [blame]	895	/*
				896	* Do expensive check for coverage of allocation in slow-path after
				897	* allocation_gate has already become non-zero, even though it might
				898	* mean not making any allocation within a given sample interval.
				899	*
				900	* This ensures reasonable allocation coverage when the pool is almost
				901	* full, including avoiding long-lived allocations of the same source
				902	* filling up the pool (e.g. pagecache allocations).
				903	*/
				904	alloc_stack_hash = get_alloc_stack_hash(stack_entries, num_stack_entries);
				905	if (should_skip_covered() && alloc_covered_contains(alloc_stack_hash)) {
				906	atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_COVERED]);
				907	return NULL;
				908	}
				909
				910	return kfence_guarded_alloc(s, size, flags, stack_entries, num_stack_entries,
				911	alloc_stack_hash);
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	912	}
				913
				914	size_t kfence_ksize(const void *addr)
				915	{
				916	const struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr);
				917
				918	/*
				919	* Read locklessly -- if there is a race with __kfence_alloc(), this is
				920	* either a use-after-free or invalid access.
				921	*/
				922	return meta ? meta->size : 0;
				923	}
				924
				925	void kfence_object_start(const void addr)
				926	{
				927	const struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr);
				928
				929	/*
				930	* Read locklessly -- if there is a race with __kfence_alloc(), this is
				931	* either a use-after-free or invalid access.
				932	*/
				933	return meta ? (void *)meta->addr : NULL;
				934	}
				935
				936	void __kfence_free(void *addr)
				937	{
				938	struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr);
				939
				940	/*
				941	* If the objects of the cache are SLAB_TYPESAFE_BY_RCU, defer freeing
				942	* the object, as the object page may be recycled for other-typed
				943	* objects once it has been freed. meta->cache may be NULL if the cache
				944	* was destroyed.
				945	*/
				946	if (unlikely(meta->cache && (meta->cache->flags & SLAB_TYPESAFE_BY_RCU)))
				947	call_rcu(&meta->rcu_head, rcu_guarded_free);
				948	else
				949	kfence_guarded_free(addr, meta, false);
				950	}
				951
Marco Elver	bc8fbc5	2021-02-25 17:19:31 -0800	[diff] [blame]	952	bool kfence_handle_page_fault(unsigned long addr, bool is_write, struct pt_regs *regs)
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	953	{
				954	const int page_index = (addr - (unsigned long)__kfence_pool) / PAGE_SIZE;
				955	struct kfence_metadata *to_report = NULL;
				956	enum kfence_error_type error_type;
				957	unsigned long flags;
				958
				959	if (!is_kfence_address((void *)addr))
				960	return false;
				961
				962	if (!READ_ONCE(kfence_enabled)) /* If disabled at runtime ... */
				963	return kfence_unprotect(addr); /* ... unprotect and proceed. */
				964
				965	atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]);
				966
				967	if (page_index % 2) {
				968	/* This is a redzone, report a buffer overflow. */
				969	struct kfence_metadata *meta;
				970	int distance = 0;
				971
				972	meta = addr_to_metadata(addr - PAGE_SIZE);
				973	if (meta && READ_ONCE(meta->state) == KFENCE_OBJECT_ALLOCATED) {
				974	to_report = meta;
				975	/* Data race ok; distance calculation approximate. */
				976	distance = addr - data_race(meta->addr + meta->size);
				977	}
				978
				979	meta = addr_to_metadata(addr + PAGE_SIZE);
				980	if (meta && READ_ONCE(meta->state) == KFENCE_OBJECT_ALLOCATED) {
				981	/* Data race ok; distance calculation approximate. */
				982	if (!to_report \|\| distance > data_race(meta->addr) - addr)
				983	to_report = meta;
				984	}
				985
				986	if (!to_report)
				987	goto out;
				988
				989	raw_spin_lock_irqsave(&to_report->lock, flags);
				990	to_report->unprotected_page = addr;
				991	error_type = KFENCE_ERROR_OOB;
				992
				993	/*
				994	* If the object was freed before we took the look we can still
				995	* report this as an OOB -- the report will simply show the
				996	* stacktrace of the free as well.
				997	*/
				998	} else {
				999	to_report = addr_to_metadata(addr);
				1000	if (!to_report)
				1001	goto out;
				1002
				1003	raw_spin_lock_irqsave(&to_report->lock, flags);
				1004	error_type = KFENCE_ERROR_UAF;
				1005	/*
				1006	* We may race with __kfence_alloc(), and it is possible that a
				1007	* freed object may be reallocated. We simply report this as a
				1008	* use-after-free, with the stack trace showing the place where
				1009	* the object was re-allocated.
				1010	*/
				1011	}
				1012
				1013	out:
				1014	if (to_report) {
Marco Elver	bc8fbc5	2021-02-25 17:19:31 -0800	[diff] [blame]	1015	kfence_report_error(addr, is_write, regs, to_report, error_type);
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	1016	raw_spin_unlock_irqrestore(&to_report->lock, flags);
				1017	} else {
				1018	/* This may be a UAF or OOB access, but we can't be sure. */
Marco Elver	bc8fbc5	2021-02-25 17:19:31 -0800	[diff] [blame]	1019	kfence_report_error(addr, is_write, regs, NULL, KFENCE_ERROR_INVALID);
Alexander Potapenko	0ce20dd	2021-02-25 17:18:53 -0800	[diff] [blame]	1020	}
				1021
				1022	return kfence_unprotect(addr); /* Unprotect and let access proceed. */
				1023	}