blob: deec894a1345ac49c95dc43cf36a6f58d4b3090f [file] [log] [blame]
Greg Kroah-Hartmanb2441312017-11-01 15:07:57 +01001// SPDX-License-Identifier: GPL-2.0
Christoph Lameter81819f02007-05-06 14:49:36 -07002/*
3 * SLUB: A slab allocator that limits cache line use instead of queuing
4 * objects in per cpu and per node lists.
5 *
Bhaskar Chowdhurydc842072021-04-29 22:54:51 -07006 * The allocator synchronizes using per slab locks or atomic operations
Christoph Lameter881db7f2011-06-01 12:25:53 -05007 * and only uses a centralized lock to manage a pool of partial slabs.
Christoph Lameter81819f02007-05-06 14:49:36 -07008 *
Christoph Lametercde53532008-07-04 09:59:22 -07009 * (C) 2007 SGI, Christoph Lameter
Christoph Lameter881db7f2011-06-01 12:25:53 -050010 * (C) 2011 Linux Foundation, Christoph Lameter
Christoph Lameter81819f02007-05-06 14:49:36 -070011 */
12
13#include <linux/mm.h>
Nick Piggin1eb5ac62009-05-05 19:13:44 +100014#include <linux/swap.h> /* struct reclaim_state */
Christoph Lameter81819f02007-05-06 14:49:36 -070015#include <linux/module.h>
16#include <linux/bit_spinlock.h>
17#include <linux/interrupt.h>
18#include <linux/bitops.h>
19#include <linux/slab.h>
Christoph Lameter97d06602012-07-06 15:25:11 -050020#include "slab.h"
Alexey Dobriyan7b3c3a52008-10-06 02:42:17 +040021#include <linux/proc_fs.h>
Christoph Lameter81819f02007-05-06 14:49:36 -070022#include <linux/seq_file.h>
Andrey Ryabinina79316c2015-02-13 14:39:38 -080023#include <linux/kasan.h>
Christoph Lameter81819f02007-05-06 14:49:36 -070024#include <linux/cpu.h>
25#include <linux/cpuset.h>
26#include <linux/mempolicy.h>
27#include <linux/ctype.h>
Thomas Gleixner3ac7fe52008-04-30 00:55:01 -070028#include <linux/debugobjects.h>
Christoph Lameter81819f02007-05-06 14:49:36 -070029#include <linux/kallsyms.h>
Alexander Potapenkob89fb5e2021-02-25 17:19:16 -080030#include <linux/kfence.h>
Yasunori Gotob9049e22007-10-21 16:41:37 -070031#include <linux/memory.h>
Roman Zippelf8bd2252008-05-01 04:34:31 -070032#include <linux/math64.h>
Akinobu Mita773ff602008-12-23 19:37:01 +090033#include <linux/fault-inject.h>
Pekka Enbergbfa71452011-07-07 22:47:01 +030034#include <linux/stacktrace.h>
Christoph Lameter4de900b2012-01-30 15:53:51 -060035#include <linux/prefetch.h>
Glauber Costa2633d7a2012-12-18 14:22:34 -080036#include <linux/memcontrol.h>
Kees Cook2482ddec2017-09-06 16:19:18 -070037#include <linux/random.h>
Christoph Lameter81819f02007-05-06 14:49:36 -070038
Richard Kennedy4a923792010-10-21 10:29:19 +010039#include <trace/events/kmem.h>
40
Mel Gorman072bb0a2012-07-31 16:43:58 -070041#include "internal.h"
42
Christoph Lameter81819f02007-05-06 14:49:36 -070043/*
44 * Lock order:
Christoph Lameter18004c52012-07-06 15:25:12 -050045 * 1. slab_mutex (Global Mutex)
Christoph Lameter881db7f2011-06-01 12:25:53 -050046 * 2. node->list_lock
47 * 3. slab_lock(page) (Only on some arches and for debugging)
Christoph Lameter81819f02007-05-06 14:49:36 -070048 *
Christoph Lameter18004c52012-07-06 15:25:12 -050049 * slab_mutex
Christoph Lameter881db7f2011-06-01 12:25:53 -050050 *
Christoph Lameter18004c52012-07-06 15:25:12 -050051 * The role of the slab_mutex is to protect the list of all the slabs
Christoph Lameter881db7f2011-06-01 12:25:53 -050052 * and to synchronize major metadata changes to slab cache structures.
53 *
54 * The slab_lock is only used for debugging and on arches that do not
Matthew Wilcoxb7ccc7f2018-06-07 17:08:46 -070055 * have the ability to do a cmpxchg_double. It only protects:
Christoph Lameter881db7f2011-06-01 12:25:53 -050056 * A. page->freelist -> List of object free in a page
Matthew Wilcoxb7ccc7f2018-06-07 17:08:46 -070057 * B. page->inuse -> Number of objects in use
58 * C. page->objects -> Number of objects in page
59 * D. page->frozen -> frozen state
Christoph Lameter881db7f2011-06-01 12:25:53 -050060 *
61 * If a slab is frozen then it is exempt from list management. It is not
Liu Xiang632b2ef2019-05-13 17:16:28 -070062 * on any list except per cpu partial list. The processor that froze the
63 * slab is the one who can perform list operations on the page. Other
64 * processors may put objects onto the freelist but the processor that
65 * froze the slab is the only one that can retrieve the objects from the
66 * page's freelist.
Christoph Lameter81819f02007-05-06 14:49:36 -070067 *
68 * The list_lock protects the partial and full list on each node and
69 * the partial slab counter. If taken then no new slabs may be added or
70 * removed from the lists nor make the number of partial slabs be modified.
71 * (Note that the total number of slabs is an atomic value that may be
72 * modified without taking the list lock).
73 *
74 * The list_lock is a centralized lock and thus we avoid taking it as
75 * much as possible. As long as SLUB does not have to handle partial
76 * slabs, operations can continue without any centralized lock. F.e.
77 * allocating a long series of objects that fill up slabs does not require
78 * the list lock.
Christoph Lameter81819f02007-05-06 14:49:36 -070079 * Interrupts are disabled during allocation and deallocation in order to
80 * make the slab allocator safe to use in the context of an irq. In addition
81 * interrupts are disabled to ensure that the processor does not change
82 * while handling per_cpu slabs, due to kernel preemption.
83 *
84 * SLUB assigns one slab for allocation to each processor.
85 * Allocations only occur from these slabs called cpu slabs.
86 *
Christoph Lameter672bba32007-05-09 02:32:39 -070087 * Slabs with free elements are kept on a partial list and during regular
88 * operations no list for full slabs is used. If an object in a full slab is
Christoph Lameter81819f02007-05-06 14:49:36 -070089 * freed then the slab will show up again on the partial lists.
Christoph Lameter672bba32007-05-09 02:32:39 -070090 * We track full slabs for debugging purposes though because otherwise we
91 * cannot scan all objects.
Christoph Lameter81819f02007-05-06 14:49:36 -070092 *
93 * Slabs are freed when they become empty. Teardown and setup is
94 * minimal so we rely on the page allocators per cpu caches for
95 * fast frees and allocs.
96 *
Yu Zhaoaed68142019-11-30 17:49:34 -080097 * page->frozen The slab is frozen and exempt from list processing.
Christoph Lameter4b6f0752007-05-16 22:10:53 -070098 * This means that the slab is dedicated to a purpose
99 * such as satisfying allocations for a specific
100 * processor. Objects may be freed in the slab while
101 * it is frozen but slab_free will then skip the usual
102 * list operations. It is up to the processor holding
103 * the slab to integrate the slab into the slab lists
104 * when the slab is no longer needed.
105 *
106 * One use of this flag is to mark slabs that are
107 * used for allocations. Then such a slab becomes a cpu
108 * slab. The cpu slab may be equipped with an additional
Christoph Lameterdfb4f092007-10-16 01:26:05 -0700109 * freelist that allows lockless access to
Christoph Lameter894b8782007-05-10 03:15:16 -0700110 * free objects in addition to the regular freelist
111 * that requires the slab lock.
Christoph Lameter81819f02007-05-06 14:49:36 -0700112 *
Yu Zhaoaed68142019-11-30 17:49:34 -0800113 * SLAB_DEBUG_FLAGS Slab requires special handling due to debug
Christoph Lameter81819f02007-05-06 14:49:36 -0700114 * options set. This moves slab handling out of
Christoph Lameter894b8782007-05-10 03:15:16 -0700115 * the fast path and disables lockless freelists.
Christoph Lameter81819f02007-05-06 14:49:36 -0700116 */
117
Vlastimil Babkaca0cab62020-08-06 23:18:51 -0700118#ifdef CONFIG_SLUB_DEBUG
119#ifdef CONFIG_SLUB_DEBUG_ON
120DEFINE_STATIC_KEY_TRUE(slub_debug_enabled);
121#else
122DEFINE_STATIC_KEY_FALSE(slub_debug_enabled);
123#endif
124#endif
125
Vlastimil Babka59052e82020-08-06 23:18:55 -0700126static inline bool kmem_cache_debug(struct kmem_cache *s)
127{
128 return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS);
Christoph Lameteraf537b02010-07-09 14:07:14 -0500129}
Christoph Lameter5577bd82007-05-16 22:10:56 -0700130
Geert Uytterhoeven117d54d2016-08-04 15:31:55 -0700131void *fixup_red_left(struct kmem_cache *s, void *p)
Joonsoo Kimd86bd1b2016-03-15 14:55:12 -0700132{
Vlastimil Babka59052e82020-08-06 23:18:55 -0700133 if (kmem_cache_debug_flags(s, SLAB_RED_ZONE))
Joonsoo Kimd86bd1b2016-03-15 14:55:12 -0700134 p += s->red_left_pad;
135
136 return p;
137}
138
Joonsoo Kim345c9052013-06-19 14:05:52 +0900139static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
140{
141#ifdef CONFIG_SLUB_CPU_PARTIAL
142 return !kmem_cache_debug(s);
143#else
144 return false;
145#endif
146}
147
Christoph Lameter81819f02007-05-06 14:49:36 -0700148/*
149 * Issues still to be resolved:
150 *
Christoph Lameter81819f02007-05-06 14:49:36 -0700151 * - Support PAGE_ALLOC_DEBUG. Should be easy to do.
152 *
Christoph Lameter81819f02007-05-06 14:49:36 -0700153 * - Variable sizing of the per node arrays
154 */
155
156/* Enable to test recovery from slab corruption on boot */
157#undef SLUB_RESILIENCY_TEST
158
Christoph Lameterb789ef52011-06-01 12:25:49 -0500159/* Enable to log cmpxchg failures */
160#undef SLUB_DEBUG_CMPXCHG
161
Christoph Lameter81819f02007-05-06 14:49:36 -0700162/*
Bhaskar Chowdhurydc842072021-04-29 22:54:51 -0700163 * Minimum number of partial slabs. These will be left on the partial
Christoph Lameter2086d262007-05-06 14:49:46 -0700164 * lists even if they are empty. kmem_cache_shrink may reclaim them.
165 */
Christoph Lameter76be8952007-12-21 14:37:37 -0800166#define MIN_PARTIAL 5
Christoph Lametere95eed52007-05-06 14:49:44 -0700167
Christoph Lameter2086d262007-05-06 14:49:46 -0700168/*
169 * Maximum number of desirable partial slabs.
170 * The existence of more partial slabs makes kmem_cache_shrink
Zhi Yong Wu721ae222013-11-08 20:47:37 +0800171 * sort the partial list by the number of objects in use.
Christoph Lameter2086d262007-05-06 14:49:46 -0700172 */
173#define MAX_PARTIAL 10
174
Laura Abbottbecfda62016-03-15 14:55:06 -0700175#define DEBUG_DEFAULT_FLAGS (SLAB_CONSISTENCY_CHECKS | SLAB_RED_ZONE | \
Christoph Lameter81819f02007-05-06 14:49:36 -0700176 SLAB_POISON | SLAB_STORE_USER)
Christoph Lameter672bba32007-05-09 02:32:39 -0700177
Christoph Lameter81819f02007-05-06 14:49:36 -0700178/*
Laura Abbott149daaf2016-03-15 14:55:09 -0700179 * These debug flags cannot use CMPXCHG because there might be consistency
180 * issues when checking or reading debug information
181 */
182#define SLAB_NO_CMPXCHG (SLAB_CONSISTENCY_CHECKS | SLAB_STORE_USER | \
183 SLAB_TRACE)
184
185
186/*
David Rientjes3de47212009-07-27 18:30:35 -0700187 * Debugging flags that require metadata to be stored in the slab. These get
188 * disabled when slub_debug=O is used and a cache's min order increases with
189 * metadata.
David Rientjesfa5ec8a2009-07-07 00:14:14 -0700190 */
David Rientjes3de47212009-07-27 18:30:35 -0700191#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
David Rientjesfa5ec8a2009-07-07 00:14:14 -0700192
Cyrill Gorcunov210b5c02008-10-22 23:00:38 +0400193#define OO_SHIFT 16
194#define OO_MASK ((1 << OO_SHIFT) - 1)
Christoph Lameter50d5c412011-06-01 12:25:45 -0500195#define MAX_OBJS_PER_PAGE 32767 /* since page.objects is u15 */
Cyrill Gorcunov210b5c02008-10-22 23:00:38 +0400196
Christoph Lameter81819f02007-05-06 14:49:36 -0700197/* Internal SLUB flags */
Alexey Dobriyand50112e2017-11-15 17:32:18 -0800198/* Poison object */
Alexey Dobriyan4fd0b462017-11-15 17:32:21 -0800199#define __OBJECT_POISON ((slab_flags_t __force)0x80000000U)
Alexey Dobriyand50112e2017-11-15 17:32:18 -0800200/* Use cmpxchg_double */
Alexey Dobriyan4fd0b462017-11-15 17:32:21 -0800201#define __CMPXCHG_DOUBLE ((slab_flags_t __force)0x40000000U)
Christoph Lameter81819f02007-05-06 14:49:36 -0700202
Christoph Lameter02cbc872007-05-09 02:32:43 -0700203/*
204 * Tracking user of a slab.
205 */
Ben Greeard6543e32011-07-07 11:36:36 -0700206#define TRACK_ADDRS_COUNT 16
Christoph Lameter02cbc872007-05-09 02:32:43 -0700207struct track {
Eduard - Gabriel Munteanuce71e272008-08-19 20:43:25 +0300208 unsigned long addr; /* Called from address */
Ben Greeard6543e32011-07-07 11:36:36 -0700209#ifdef CONFIG_STACKTRACE
210 unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */
211#endif
Christoph Lameter02cbc872007-05-09 02:32:43 -0700212 int cpu; /* Was running on cpu */
213 int pid; /* Pid context */
214 unsigned long when; /* When did the operation occur */
215};
216
217enum track_item { TRACK_ALLOC, TRACK_FREE };
218
Christoph Lameterab4d5ed2010-10-05 13:57:26 -0500219#ifdef CONFIG_SYSFS
Christoph Lameter81819f02007-05-06 14:49:36 -0700220static int sysfs_slab_add(struct kmem_cache *);
221static int sysfs_slab_alias(struct kmem_cache *, const char *);
Christoph Lameter81819f02007-05-06 14:49:36 -0700222#else
Christoph Lameter0c710012007-07-17 04:03:24 -0700223static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
224static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
225 { return 0; }
Christoph Lameter81819f02007-05-06 14:49:36 -0700226#endif
227
Christoph Lameter4fdccdf2011-03-22 13:35:00 -0500228static inline void stat(const struct kmem_cache *s, enum stat_item si)
Christoph Lameter8ff12cf2008-02-07 17:47:41 -0800229{
230#ifdef CONFIG_SLUB_STATS
Christoph Lameter88da03a2014-04-07 15:39:42 -0700231 /*
232 * The rmw is racy on a preemptible kernel but this is acceptable, so
233 * avoid this_cpu_add()'s irq-disable overhead.
234 */
235 raw_cpu_inc(s->cpu_slab->stat[si]);
Christoph Lameter8ff12cf2008-02-07 17:47:41 -0800236#endif
237}
238
Vlastimil Babka7e1fa932021-02-24 12:01:12 -0800239/*
240 * Tracks for which NUMA nodes we have kmem_cache_nodes allocated.
241 * Corresponds to node_state[N_NORMAL_MEMORY], but can temporarily
242 * differ during memory hotplug/hotremove operations.
243 * Protected by slab_mutex.
244 */
245static nodemask_t slab_nodes;
246
Christoph Lameter81819f02007-05-06 14:49:36 -0700247/********************************************************************
248 * Core slab cache functions
249 *******************************************************************/
250
Kees Cook2482ddec2017-09-06 16:19:18 -0700251/*
252 * Returns freelist pointer (ptr). With hardening, this is obfuscated
253 * with an XOR of the address where the pointer is held and a per-cache
254 * random number.
255 */
256static inline void *freelist_ptr(const struct kmem_cache *s, void *ptr,
257 unsigned long ptr_addr)
258{
259#ifdef CONFIG_SLAB_FREELIST_HARDENED
Andrey Konovalovd36a63a2019-02-20 22:19:32 -0800260 /*
Andrey Konovalovaa1ef4d2020-12-22 12:02:17 -0800261 * When CONFIG_KASAN_SW/HW_TAGS is enabled, ptr_addr might be tagged.
Andrey Konovalovd36a63a2019-02-20 22:19:32 -0800262 * Normally, this doesn't cause any issues, as both set_freepointer()
263 * and get_freepointer() are called with a pointer with the same tag.
264 * However, there are some issues with CONFIG_SLUB_DEBUG code. For
265 * example, when __free_slub() iterates over objects in a cache, it
266 * passes untagged pointers to check_object(). check_object() in turns
267 * calls get_freepointer() with an untagged pointer, which causes the
268 * freepointer to be restored incorrectly.
269 */
270 return (void *)((unsigned long)ptr ^ s->random ^
Kees Cook1ad53d92020-04-01 21:04:23 -0700271 swab((unsigned long)kasan_reset_tag((void *)ptr_addr)));
Kees Cook2482ddec2017-09-06 16:19:18 -0700272#else
273 return ptr;
274#endif
275}
276
277/* Returns the freelist pointer recorded at location ptr_addr. */
278static inline void *freelist_dereference(const struct kmem_cache *s,
279 void *ptr_addr)
280{
281 return freelist_ptr(s, (void *)*(unsigned long *)(ptr_addr),
282 (unsigned long)ptr_addr);
283}
284
Christoph Lameter7656c722007-05-09 02:32:40 -0700285static inline void *get_freepointer(struct kmem_cache *s, void *object)
286{
Andrey Konovalovaa1ef4d2020-12-22 12:02:17 -0800287 object = kasan_reset_tag(object);
Kees Cook2482ddec2017-09-06 16:19:18 -0700288 return freelist_dereference(s, object + s->offset);
Christoph Lameter7656c722007-05-09 02:32:40 -0700289}
290
Eric Dumazet0ad95002011-12-16 16:25:34 +0100291static void prefetch_freepointer(const struct kmem_cache *s, void *object)
292{
Vlastimil Babka0882ff92018-08-17 15:44:44 -0700293 prefetch(object + s->offset);
Eric Dumazet0ad95002011-12-16 16:25:34 +0100294}
295
Christoph Lameter1393d9a2011-05-16 15:26:08 -0500296static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
297{
Kees Cook2482ddec2017-09-06 16:19:18 -0700298 unsigned long freepointer_addr;
Christoph Lameter1393d9a2011-05-16 15:26:08 -0500299 void *p;
300
Vlastimil Babka8e57f8a2020-01-13 16:29:20 -0800301 if (!debug_pagealloc_enabled_static())
Joonsoo Kim922d5662016-03-17 14:17:53 -0700302 return get_freepointer(s, object);
303
Kees Cook2482ddec2017-09-06 16:19:18 -0700304 freepointer_addr = (unsigned long)object + s->offset;
Christoph Hellwigfe557312020-06-17 09:37:53 +0200305 copy_from_kernel_nofault(&p, (void **)freepointer_addr, sizeof(p));
Kees Cook2482ddec2017-09-06 16:19:18 -0700306 return freelist_ptr(s, p, freepointer_addr);
Christoph Lameter1393d9a2011-05-16 15:26:08 -0500307}
308
Christoph Lameter7656c722007-05-09 02:32:40 -0700309static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
310{
Kees Cook2482ddec2017-09-06 16:19:18 -0700311 unsigned long freeptr_addr = (unsigned long)object + s->offset;
312
Alexander Popovce6fa912017-09-06 16:19:22 -0700313#ifdef CONFIG_SLAB_FREELIST_HARDENED
314 BUG_ON(object == fp); /* naive detection of double free or corruption */
315#endif
316
Andrey Konovalovaa1ef4d2020-12-22 12:02:17 -0800317 freeptr_addr = (unsigned long)kasan_reset_tag((void *)freeptr_addr);
Kees Cook2482ddec2017-09-06 16:19:18 -0700318 *(void **)freeptr_addr = freelist_ptr(s, fp, freeptr_addr);
Christoph Lameter7656c722007-05-09 02:32:40 -0700319}
320
321/* Loop over all objects in a slab */
Christoph Lameter224a88b2008-04-14 19:11:31 +0300322#define for_each_object(__p, __s, __addr, __objects) \
Joonsoo Kimd86bd1b2016-03-15 14:55:12 -0700323 for (__p = fixup_red_left(__s, __addr); \
324 __p < (__addr) + (__objects) * (__s)->size; \
325 __p += (__s)->size)
Christoph Lameter7656c722007-05-09 02:32:40 -0700326
Matthew Wilcox9736d2a2018-06-07 17:09:10 -0700327static inline unsigned int order_objects(unsigned int order, unsigned int size)
Lai Jiangshanab9a0f12011-03-10 15:21:48 +0800328{
Matthew Wilcox9736d2a2018-06-07 17:09:10 -0700329 return ((unsigned int)PAGE_SIZE << order) / size;
Lai Jiangshanab9a0f12011-03-10 15:21:48 +0800330}
331
Alexey Dobriyan19af27a2018-04-05 16:21:39 -0700332static inline struct kmem_cache_order_objects oo_make(unsigned int order,
Matthew Wilcox9736d2a2018-06-07 17:09:10 -0700333 unsigned int size)
Christoph Lameter834f3d12008-04-14 19:11:31 +0300334{
335 struct kmem_cache_order_objects x = {
Matthew Wilcox9736d2a2018-06-07 17:09:10 -0700336 (order << OO_SHIFT) + order_objects(order, size)
Christoph Lameter834f3d12008-04-14 19:11:31 +0300337 };
338
339 return x;
340}
341
Alexey Dobriyan19af27a2018-04-05 16:21:39 -0700342static inline unsigned int oo_order(struct kmem_cache_order_objects x)
Christoph Lameter834f3d12008-04-14 19:11:31 +0300343{
Cyrill Gorcunov210b5c02008-10-22 23:00:38 +0400344 return x.x >> OO_SHIFT;
Christoph Lameter834f3d12008-04-14 19:11:31 +0300345}
346
Alexey Dobriyan19af27a2018-04-05 16:21:39 -0700347static inline unsigned int oo_objects(struct kmem_cache_order_objects x)
Christoph Lameter834f3d12008-04-14 19:11:31 +0300348{
Cyrill Gorcunov210b5c02008-10-22 23:00:38 +0400349 return x.x & OO_MASK;
Christoph Lameter834f3d12008-04-14 19:11:31 +0300350}
351
Christoph Lameter881db7f2011-06-01 12:25:53 -0500352/*
353 * Per slab locking using the pagelock
354 */
355static __always_inline void slab_lock(struct page *page)
356{
Kirill A. Shutemov48c935a2016-01-15 16:51:24 -0800357 VM_BUG_ON_PAGE(PageTail(page), page);
Christoph Lameter881db7f2011-06-01 12:25:53 -0500358 bit_spin_lock(PG_locked, &page->flags);
359}
360
361static __always_inline void slab_unlock(struct page *page)
362{
Kirill A. Shutemov48c935a2016-01-15 16:51:24 -0800363 VM_BUG_ON_PAGE(PageTail(page), page);
Christoph Lameter881db7f2011-06-01 12:25:53 -0500364 __bit_spin_unlock(PG_locked, &page->flags);
365}
366
Christoph Lameter1d071712011-07-14 12:49:12 -0500367/* Interrupts must be disabled (for the fallback code to work right) */
368static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
Christoph Lameterb789ef52011-06-01 12:25:49 -0500369 void *freelist_old, unsigned long counters_old,
370 void *freelist_new, unsigned long counters_new,
371 const char *n)
372{
Christoph Lameter1d071712011-07-14 12:49:12 -0500373 VM_BUG_ON(!irqs_disabled());
Heiko Carstens25654092012-01-12 17:17:33 -0800374#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
375 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
Christoph Lameterb789ef52011-06-01 12:25:49 -0500376 if (s->flags & __CMPXCHG_DOUBLE) {
Jan Beulichcdcd6292012-01-02 17:02:18 +0000377 if (cmpxchg_double(&page->freelist, &page->counters,
Dan Carpenter0aa9a132014-08-06 16:04:48 -0700378 freelist_old, counters_old,
379 freelist_new, counters_new))
Joe Perches6f6528a2015-04-14 15:44:31 -0700380 return true;
Christoph Lameterb789ef52011-06-01 12:25:49 -0500381 } else
382#endif
383 {
Christoph Lameter881db7f2011-06-01 12:25:53 -0500384 slab_lock(page);
Chen Gangd0e0ac92013-07-15 09:05:29 +0800385 if (page->freelist == freelist_old &&
386 page->counters == counters_old) {
Christoph Lameterb789ef52011-06-01 12:25:49 -0500387 page->freelist = freelist_new;
Matthew Wilcox7d27a042018-06-07 17:08:31 -0700388 page->counters = counters_new;
Christoph Lameter881db7f2011-06-01 12:25:53 -0500389 slab_unlock(page);
Joe Perches6f6528a2015-04-14 15:44:31 -0700390 return true;
Christoph Lameterb789ef52011-06-01 12:25:49 -0500391 }
Christoph Lameter881db7f2011-06-01 12:25:53 -0500392 slab_unlock(page);
Christoph Lameterb789ef52011-06-01 12:25:49 -0500393 }
394
395 cpu_relax();
396 stat(s, CMPXCHG_DOUBLE_FAIL);
397
398#ifdef SLUB_DEBUG_CMPXCHG
Fabian Frederickf9f58282014-06-04 16:06:34 -0700399 pr_info("%s %s: cmpxchg double redo ", n, s->name);
Christoph Lameterb789ef52011-06-01 12:25:49 -0500400#endif
401
Joe Perches6f6528a2015-04-14 15:44:31 -0700402 return false;
Christoph Lameterb789ef52011-06-01 12:25:49 -0500403}
404
Christoph Lameter1d071712011-07-14 12:49:12 -0500405static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
406 void *freelist_old, unsigned long counters_old,
407 void *freelist_new, unsigned long counters_new,
408 const char *n)
409{
Heiko Carstens25654092012-01-12 17:17:33 -0800410#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
411 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
Christoph Lameter1d071712011-07-14 12:49:12 -0500412 if (s->flags & __CMPXCHG_DOUBLE) {
Jan Beulichcdcd6292012-01-02 17:02:18 +0000413 if (cmpxchg_double(&page->freelist, &page->counters,
Dan Carpenter0aa9a132014-08-06 16:04:48 -0700414 freelist_old, counters_old,
415 freelist_new, counters_new))
Joe Perches6f6528a2015-04-14 15:44:31 -0700416 return true;
Christoph Lameter1d071712011-07-14 12:49:12 -0500417 } else
418#endif
419 {
420 unsigned long flags;
421
422 local_irq_save(flags);
423 slab_lock(page);
Chen Gangd0e0ac92013-07-15 09:05:29 +0800424 if (page->freelist == freelist_old &&
425 page->counters == counters_old) {
Christoph Lameter1d071712011-07-14 12:49:12 -0500426 page->freelist = freelist_new;
Matthew Wilcox7d27a042018-06-07 17:08:31 -0700427 page->counters = counters_new;
Christoph Lameter1d071712011-07-14 12:49:12 -0500428 slab_unlock(page);
429 local_irq_restore(flags);
Joe Perches6f6528a2015-04-14 15:44:31 -0700430 return true;
Christoph Lameter1d071712011-07-14 12:49:12 -0500431 }
432 slab_unlock(page);
433 local_irq_restore(flags);
434 }
435
436 cpu_relax();
437 stat(s, CMPXCHG_DOUBLE_FAIL);
438
439#ifdef SLUB_DEBUG_CMPXCHG
Fabian Frederickf9f58282014-06-04 16:06:34 -0700440 pr_info("%s %s: cmpxchg double redo ", n, s->name);
Christoph Lameter1d071712011-07-14 12:49:12 -0500441#endif
442
Joe Perches6f6528a2015-04-14 15:44:31 -0700443 return false;
Christoph Lameter1d071712011-07-14 12:49:12 -0500444}
445
Christoph Lameter41ecc552007-05-09 02:32:44 -0700446#ifdef CONFIG_SLUB_DEBUG
Yu Zhao90e9f6a2020-01-30 22:11:57 -0800447static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)];
448static DEFINE_SPINLOCK(object_map_lock);
449
Christoph Lameter41ecc552007-05-09 02:32:44 -0700450/*
Christoph Lameter5f80b132011-04-15 14:48:13 -0500451 * Determine a map of object in use on a page.
452 *
Christoph Lameter881db7f2011-06-01 12:25:53 -0500453 * Node listlock must be held to guarantee that the page does
Christoph Lameter5f80b132011-04-15 14:48:13 -0500454 * not vanish from under us.
455 */
Yu Zhao90e9f6a2020-01-30 22:11:57 -0800456static unsigned long *get_map(struct kmem_cache *s, struct page *page)
Jules Irenge31364c22020-04-06 20:08:15 -0700457 __acquires(&object_map_lock)
Christoph Lameter5f80b132011-04-15 14:48:13 -0500458{
459 void *p;
460 void *addr = page_address(page);
461
Yu Zhao90e9f6a2020-01-30 22:11:57 -0800462 VM_BUG_ON(!irqs_disabled());
463
464 spin_lock(&object_map_lock);
465
466 bitmap_zero(object_map, page->objects);
467
Christoph Lameter5f80b132011-04-15 14:48:13 -0500468 for (p = page->freelist; p; p = get_freepointer(s, p))
Roman Gushchin4138fdf2020-08-06 23:20:42 -0700469 set_bit(__obj_to_index(s, addr, p), object_map);
Yu Zhao90e9f6a2020-01-30 22:11:57 -0800470
471 return object_map;
472}
473
Jules Irenge81aba9e2020-04-06 20:08:18 -0700474static void put_map(unsigned long *map) __releases(&object_map_lock)
Yu Zhao90e9f6a2020-01-30 22:11:57 -0800475{
476 VM_BUG_ON(map != object_map);
Yu Zhao90e9f6a2020-01-30 22:11:57 -0800477 spin_unlock(&object_map_lock);
Christoph Lameter5f80b132011-04-15 14:48:13 -0500478}
479
Alexey Dobriyan870b1fb2018-04-05 16:21:43 -0700480static inline unsigned int size_from_object(struct kmem_cache *s)
Joonsoo Kimd86bd1b2016-03-15 14:55:12 -0700481{
482 if (s->flags & SLAB_RED_ZONE)
483 return s->size - s->red_left_pad;
484
485 return s->size;
486}
487
488static inline void *restore_red_left(struct kmem_cache *s, void *p)
489{
490 if (s->flags & SLAB_RED_ZONE)
491 p -= s->red_left_pad;
492
493 return p;
494}
495
Christoph Lameter41ecc552007-05-09 02:32:44 -0700496/*
497 * Debug settings:
498 */
Andrey Ryabinin89d3c872015-11-05 18:51:23 -0800499#if defined(CONFIG_SLUB_DEBUG_ON)
Alexey Dobriyand50112e2017-11-15 17:32:18 -0800500static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS;
Christoph Lameterf0630ff2007-07-15 23:38:14 -0700501#else
Alexey Dobriyand50112e2017-11-15 17:32:18 -0800502static slab_flags_t slub_debug;
Christoph Lameterf0630ff2007-07-15 23:38:14 -0700503#endif
Christoph Lameter41ecc552007-05-09 02:32:44 -0700504
Vlastimil Babkae17f1df2020-08-06 23:18:35 -0700505static char *slub_debug_string;
David Rientjesfa5ec8a2009-07-07 00:14:14 -0700506static int disable_higher_order_debug;
Christoph Lameter41ecc552007-05-09 02:32:44 -0700507
Christoph Lameter7656c722007-05-09 02:32:40 -0700508/*
Andrey Ryabinina79316c2015-02-13 14:39:38 -0800509 * slub is about to manipulate internal object metadata. This memory lies
510 * outside the range of the allocated object, so accessing it would normally
511 * be reported by kasan as a bounds error. metadata_access_enable() is used
512 * to tell kasan that these accesses are OK.
513 */
514static inline void metadata_access_enable(void)
515{
516 kasan_disable_current();
517}
518
519static inline void metadata_access_disable(void)
520{
521 kasan_enable_current();
522}
523
524/*
Christoph Lameter81819f02007-05-06 14:49:36 -0700525 * Object debugging
526 */
Joonsoo Kimd86bd1b2016-03-15 14:55:12 -0700527
528/* Verify that a pointer has an address that is valid within a slab page */
529static inline int check_valid_pointer(struct kmem_cache *s,
530 struct page *page, void *object)
531{
532 void *base;
533
534 if (!object)
535 return 1;
536
537 base = page_address(page);
Qian Cai338cfaa2019-02-20 22:19:36 -0800538 object = kasan_reset_tag(object);
Joonsoo Kimd86bd1b2016-03-15 14:55:12 -0700539 object = restore_red_left(s, object);
540 if (object < base || object >= base + page->objects * s->size ||
541 (object - base) % s->size) {
542 return 0;
543 }
544
545 return 1;
546}
547
Daniel Thompsonaa2efd52017-01-24 15:18:02 -0800548static void print_section(char *level, char *text, u8 *addr,
549 unsigned int length)
Christoph Lameter81819f02007-05-06 14:49:36 -0700550{
Andrey Ryabinina79316c2015-02-13 14:39:38 -0800551 metadata_access_enable();
Andrey Konovalovaa1ef4d2020-12-22 12:02:17 -0800552 print_hex_dump(level, kasan_reset_tag(text), DUMP_PREFIX_ADDRESS,
553 16, 1, addr, length, 1);
Andrey Ryabinina79316c2015-02-13 14:39:38 -0800554 metadata_access_disable();
Christoph Lameter81819f02007-05-06 14:49:36 -0700555}
556
Waiman Longcbfc35a2020-05-07 18:36:06 -0700557/*
558 * See comment in calculate_sizes().
559 */
560static inline bool freeptr_outside_object(struct kmem_cache *s)
561{
562 return s->offset >= s->inuse;
563}
564
565/*
566 * Return offset of the end of info block which is inuse + free pointer if
567 * not overlapping with object.
568 */
569static inline unsigned int get_info_end(struct kmem_cache *s)
570{
571 if (freeptr_outside_object(s))
572 return s->inuse + sizeof(void *);
573 else
574 return s->inuse;
575}
576
Christoph Lameter81819f02007-05-06 14:49:36 -0700577static struct track *get_track(struct kmem_cache *s, void *object,
578 enum track_item alloc)
579{
580 struct track *p;
581
Waiman Longcbfc35a2020-05-07 18:36:06 -0700582 p = object + get_info_end(s);
Christoph Lameter81819f02007-05-06 14:49:36 -0700583
Andrey Konovalovaa1ef4d2020-12-22 12:02:17 -0800584 return kasan_reset_tag(p + alloc);
Christoph Lameter81819f02007-05-06 14:49:36 -0700585}
586
587static void set_track(struct kmem_cache *s, void *object,
Eduard - Gabriel Munteanuce71e272008-08-19 20:43:25 +0300588 enum track_item alloc, unsigned long addr)
Christoph Lameter81819f02007-05-06 14:49:36 -0700589{
Akinobu Mita1a00df42009-03-07 00:36:21 +0900590 struct track *p = get_track(s, object, alloc);
Christoph Lameter81819f02007-05-06 14:49:36 -0700591
Christoph Lameter81819f02007-05-06 14:49:36 -0700592 if (addr) {
Ben Greeard6543e32011-07-07 11:36:36 -0700593#ifdef CONFIG_STACKTRACE
Thomas Gleixner79716792019-04-25 11:45:00 +0200594 unsigned int nr_entries;
Ben Greeard6543e32011-07-07 11:36:36 -0700595
Andrey Ryabinina79316c2015-02-13 14:39:38 -0800596 metadata_access_enable();
Andrey Konovalovaa1ef4d2020-12-22 12:02:17 -0800597 nr_entries = stack_trace_save(kasan_reset_tag(p->addrs),
598 TRACK_ADDRS_COUNT, 3);
Andrey Ryabinina79316c2015-02-13 14:39:38 -0800599 metadata_access_disable();
Ben Greeard6543e32011-07-07 11:36:36 -0700600
Thomas Gleixner79716792019-04-25 11:45:00 +0200601 if (nr_entries < TRACK_ADDRS_COUNT)
602 p->addrs[nr_entries] = 0;
Ben Greeard6543e32011-07-07 11:36:36 -0700603#endif
Christoph Lameter81819f02007-05-06 14:49:36 -0700604 p->addr = addr;
605 p->cpu = smp_processor_id();
Alexey Dobriyan88e4ccf2008-06-23 02:58:37 +0400606 p->pid = current->pid;
Christoph Lameter81819f02007-05-06 14:49:36 -0700607 p->when = jiffies;
Thomas Gleixnerb8ca7ff2019-04-10 12:28:05 +0200608 } else {
Christoph Lameter81819f02007-05-06 14:49:36 -0700609 memset(p, 0, sizeof(struct track));
Thomas Gleixnerb8ca7ff2019-04-10 12:28:05 +0200610 }
Christoph Lameter81819f02007-05-06 14:49:36 -0700611}
612
Christoph Lameter81819f02007-05-06 14:49:36 -0700613static void init_tracking(struct kmem_cache *s, void *object)
614{
Christoph Lameter24922682007-07-17 04:03:18 -0700615 if (!(s->flags & SLAB_STORE_USER))
616 return;
617
Eduard - Gabriel Munteanuce71e272008-08-19 20:43:25 +0300618 set_track(s, object, TRACK_FREE, 0UL);
619 set_track(s, object, TRACK_ALLOC, 0UL);
Christoph Lameter81819f02007-05-06 14:49:36 -0700620}
621
Chintan Pandya86609d32018-04-05 16:20:15 -0700622static void print_track(const char *s, struct track *t, unsigned long pr_time)
Christoph Lameter81819f02007-05-06 14:49:36 -0700623{
624 if (!t->addr)
625 return;
626
Yafang Shao96b94ab2021-03-19 18:12:45 +0800627 pr_err("%s in %pS age=%lu cpu=%u pid=%d\n",
Chintan Pandya86609d32018-04-05 16:20:15 -0700628 s, (void *)t->addr, pr_time - t->when, t->cpu, t->pid);
Ben Greeard6543e32011-07-07 11:36:36 -0700629#ifdef CONFIG_STACKTRACE
630 {
631 int i;
632 for (i = 0; i < TRACK_ADDRS_COUNT; i++)
633 if (t->addrs[i])
Fabian Frederickf9f58282014-06-04 16:06:34 -0700634 pr_err("\t%pS\n", (void *)t->addrs[i]);
Ben Greeard6543e32011-07-07 11:36:36 -0700635 else
636 break;
637 }
638#endif
Christoph Lameter81819f02007-05-06 14:49:36 -0700639}
640
Vlastimil Babkae42f1742020-08-06 23:19:05 -0700641void print_tracking(struct kmem_cache *s, void *object)
Christoph Lameter24922682007-07-17 04:03:18 -0700642{
Chintan Pandya86609d32018-04-05 16:20:15 -0700643 unsigned long pr_time = jiffies;
Christoph Lameter24922682007-07-17 04:03:18 -0700644 if (!(s->flags & SLAB_STORE_USER))
645 return;
646
Chintan Pandya86609d32018-04-05 16:20:15 -0700647 print_track("Allocated", get_track(s, object, TRACK_ALLOC), pr_time);
648 print_track("Freed", get_track(s, object, TRACK_FREE), pr_time);
Christoph Lameter24922682007-07-17 04:03:18 -0700649}
650
651static void print_page_info(struct page *page)
652{
Yafang Shao96b94ab2021-03-19 18:12:45 +0800653 pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%#lx(%pGp)\n",
Yafang Shao4a8ef192021-03-19 18:12:44 +0800654 page, page->objects, page->inuse, page->freelist,
655 page->flags, &page->flags);
Christoph Lameter24922682007-07-17 04:03:18 -0700656
657}
658
659static void slab_bug(struct kmem_cache *s, char *fmt, ...)
660{
Fabian Frederickecc42fb2014-06-04 16:06:35 -0700661 struct va_format vaf;
Christoph Lameter24922682007-07-17 04:03:18 -0700662 va_list args;
Christoph Lameter24922682007-07-17 04:03:18 -0700663
664 va_start(args, fmt);
Fabian Frederickecc42fb2014-06-04 16:06:35 -0700665 vaf.fmt = fmt;
666 vaf.va = &args;
Fabian Frederickf9f58282014-06-04 16:06:34 -0700667 pr_err("=============================================================================\n");
Fabian Frederickecc42fb2014-06-04 16:06:35 -0700668 pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
Fabian Frederickf9f58282014-06-04 16:06:34 -0700669 pr_err("-----------------------------------------------------------------------------\n\n");
Dave Jones645df232012-09-18 15:54:12 -0400670
Rusty Russell373d4d02013-01-21 17:17:39 +1030671 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
Fabian Frederickecc42fb2014-06-04 16:06:35 -0700672 va_end(args);
Christoph Lameter24922682007-07-17 04:03:18 -0700673}
674
675static void slab_fix(struct kmem_cache *s, char *fmt, ...)
676{
Fabian Frederickecc42fb2014-06-04 16:06:35 -0700677 struct va_format vaf;
Christoph Lameter24922682007-07-17 04:03:18 -0700678 va_list args;
Christoph Lameter24922682007-07-17 04:03:18 -0700679
680 va_start(args, fmt);
Fabian Frederickecc42fb2014-06-04 16:06:35 -0700681 vaf.fmt = fmt;
682 vaf.va = &args;
683 pr_err("FIX %s: %pV\n", s->name, &vaf);
Christoph Lameter24922682007-07-17 04:03:18 -0700684 va_end(args);
Christoph Lameter24922682007-07-17 04:03:18 -0700685}
686
Dongli Zhang52f23472020-06-01 21:45:47 -0700687static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
Eugeniu Roscadc07a722020-09-04 16:35:30 -0700688 void **freelist, void *nextfree)
Dongli Zhang52f23472020-06-01 21:45:47 -0700689{
690 if ((s->flags & SLAB_CONSISTENCY_CHECKS) &&
Eugeniu Roscadc07a722020-09-04 16:35:30 -0700691 !check_valid_pointer(s, page, nextfree) && freelist) {
692 object_err(s, page, *freelist, "Freechain corrupt");
693 *freelist = NULL;
Dongli Zhang52f23472020-06-01 21:45:47 -0700694 slab_fix(s, "Isolate corrupted freechain");
695 return true;
696 }
697
698 return false;
699}
700
Christoph Lameter24922682007-07-17 04:03:18 -0700701static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
Christoph Lameter81819f02007-05-06 14:49:36 -0700702{
703 unsigned int off; /* Offset of last byte */
Christoph Lametera973e9d2008-03-01 13:40:44 -0800704 u8 *addr = page_address(page);
Christoph Lameter24922682007-07-17 04:03:18 -0700705
706 print_tracking(s, p);
707
708 print_page_info(page);
709
Yafang Shao96b94ab2021-03-19 18:12:45 +0800710 pr_err("Object 0x%p @offset=%tu fp=0x%p\n\n",
Fabian Frederickf9f58282014-06-04 16:06:34 -0700711 p, p - addr, get_freepointer(s, p));
Christoph Lameter24922682007-07-17 04:03:18 -0700712
Joonsoo Kimd86bd1b2016-03-15 14:55:12 -0700713 if (s->flags & SLAB_RED_ZONE)
Daniel Thompsonaa2efd52017-01-24 15:18:02 -0800714 print_section(KERN_ERR, "Redzone ", p - s->red_left_pad,
715 s->red_left_pad);
Joonsoo Kimd86bd1b2016-03-15 14:55:12 -0700716 else if (p > addr + 16)
Daniel Thompsonaa2efd52017-01-24 15:18:02 -0800717 print_section(KERN_ERR, "Bytes b4 ", p - 16, 16);
Christoph Lameter24922682007-07-17 04:03:18 -0700718
Daniel Thompsonaa2efd52017-01-24 15:18:02 -0800719 print_section(KERN_ERR, "Object ", p,
Alexey Dobriyan1b473f22018-04-05 16:21:17 -0700720 min_t(unsigned int, s->object_size, PAGE_SIZE));
Christoph Lameter81819f02007-05-06 14:49:36 -0700721 if (s->flags & SLAB_RED_ZONE)
Daniel Thompsonaa2efd52017-01-24 15:18:02 -0800722 print_section(KERN_ERR, "Redzone ", p + s->object_size,
Christoph Lameter3b0efdf2012-06-13 10:24:57 -0500723 s->inuse - s->object_size);
Christoph Lameter81819f02007-05-06 14:49:36 -0700724
Waiman Longcbfc35a2020-05-07 18:36:06 -0700725 off = get_info_end(s);
Christoph Lameter81819f02007-05-06 14:49:36 -0700726
Christoph Lameter24922682007-07-17 04:03:18 -0700727 if (s->flags & SLAB_STORE_USER)
Christoph Lameter81819f02007-05-06 14:49:36 -0700728 off += 2 * sizeof(struct track);
Christoph Lameter81819f02007-05-06 14:49:36 -0700729
Alexander Potapenko80a92012016-07-28 15:49:07 -0700730 off += kasan_metadata_size(s);
731
Joonsoo Kimd86bd1b2016-03-15 14:55:12 -0700732 if (off != size_from_object(s))
Christoph Lameter81819f02007-05-06 14:49:36 -0700733 /* Beginning of the filler is the free pointer */
Daniel Thompsonaa2efd52017-01-24 15:18:02 -0800734 print_section(KERN_ERR, "Padding ", p + off,
735 size_from_object(s) - off);
Christoph Lameter24922682007-07-17 04:03:18 -0700736
737 dump_stack();
Christoph Lameter81819f02007-05-06 14:49:36 -0700738}
739
Andrey Ryabinin75c66de2015-02-13 14:39:35 -0800740void object_err(struct kmem_cache *s, struct page *page,
Christoph Lameter81819f02007-05-06 14:49:36 -0700741 u8 *object, char *reason)
742{
Christoph Lameter3dc50632008-04-23 12:28:01 -0700743 slab_bug(s, "%s", reason);
Christoph Lameter24922682007-07-17 04:03:18 -0700744 print_trailer(s, page, object);
Christoph Lameter81819f02007-05-06 14:49:36 -0700745}
746
Mathieu Malaterrea38965b2018-06-07 17:05:17 -0700747static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
Chen Gangd0e0ac92013-07-15 09:05:29 +0800748 const char *fmt, ...)
Christoph Lameter81819f02007-05-06 14:49:36 -0700749{
750 va_list args;
751 char buf[100];
752
Christoph Lameter24922682007-07-17 04:03:18 -0700753 va_start(args, fmt);
754 vsnprintf(buf, sizeof(buf), fmt, args);
Christoph Lameter81819f02007-05-06 14:49:36 -0700755 va_end(args);
Christoph Lameter3dc50632008-04-23 12:28:01 -0700756 slab_bug(s, "%s", buf);
Christoph Lameter24922682007-07-17 04:03:18 -0700757 print_page_info(page);
Christoph Lameter81819f02007-05-06 14:49:36 -0700758 dump_stack();
759}
760
Christoph Lameterf7cb1932010-09-29 07:15:01 -0500761static void init_object(struct kmem_cache *s, void *object, u8 val)
Christoph Lameter81819f02007-05-06 14:49:36 -0700762{
Andrey Konovalovaa1ef4d2020-12-22 12:02:17 -0800763 u8 *p = kasan_reset_tag(object);
Christoph Lameter81819f02007-05-06 14:49:36 -0700764
Joonsoo Kimd86bd1b2016-03-15 14:55:12 -0700765 if (s->flags & SLAB_RED_ZONE)
766 memset(p - s->red_left_pad, val, s->red_left_pad);
767
Christoph Lameter81819f02007-05-06 14:49:36 -0700768 if (s->flags & __OBJECT_POISON) {
Christoph Lameter3b0efdf2012-06-13 10:24:57 -0500769 memset(p, POISON_FREE, s->object_size - 1);
770 p[s->object_size - 1] = POISON_END;
Christoph Lameter81819f02007-05-06 14:49:36 -0700771 }
772
773 if (s->flags & SLAB_RED_ZONE)
Christoph Lameter3b0efdf2012-06-13 10:24:57 -0500774 memset(p + s->object_size, val, s->inuse - s->object_size);
Christoph Lameter81819f02007-05-06 14:49:36 -0700775}
776
Christoph Lameter24922682007-07-17 04:03:18 -0700777static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
778 void *from, void *to)
779{
780 slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
781 memset(from, data, to - from);
782}
783
784static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
785 u8 *object, char *what,
Pekka Enberg06428782008-01-07 23:20:27 -0800786 u8 *start, unsigned int value, unsigned int bytes)
Christoph Lameter24922682007-07-17 04:03:18 -0700787{
788 u8 *fault;
789 u8 *end;
Miles Chene1b70dd2019-11-30 17:49:31 -0800790 u8 *addr = page_address(page);
Christoph Lameter24922682007-07-17 04:03:18 -0700791
Andrey Ryabinina79316c2015-02-13 14:39:38 -0800792 metadata_access_enable();
Andrey Konovalovaa1ef4d2020-12-22 12:02:17 -0800793 fault = memchr_inv(kasan_reset_tag(start), value, bytes);
Andrey Ryabinina79316c2015-02-13 14:39:38 -0800794 metadata_access_disable();
Christoph Lameter24922682007-07-17 04:03:18 -0700795 if (!fault)
796 return 1;
797
798 end = start + bytes;
799 while (end > fault && end[-1] == value)
800 end--;
801
802 slab_bug(s, "%s overwritten", what);
Yafang Shao96b94ab2021-03-19 18:12:45 +0800803 pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
Miles Chene1b70dd2019-11-30 17:49:31 -0800804 fault, end - 1, fault - addr,
805 fault[0], value);
Christoph Lameter24922682007-07-17 04:03:18 -0700806 print_trailer(s, page, object);
807
808 restore_bytes(s, what, value, fault, end);
809 return 0;
Christoph Lameter81819f02007-05-06 14:49:36 -0700810}
811
Christoph Lameter81819f02007-05-06 14:49:36 -0700812/*
813 * Object layout:
814 *
815 * object address
816 * Bytes of the object to be managed.
817 * If the freepointer may overlay the object then the free
Waiman Longcbfc35a2020-05-07 18:36:06 -0700818 * pointer is at the middle of the object.
Christoph Lameter672bba32007-05-09 02:32:39 -0700819 *
Christoph Lameter81819f02007-05-06 14:49:36 -0700820 * Poisoning uses 0x6b (POISON_FREE) and the last byte is
821 * 0xa5 (POISON_END)
822 *
Christoph Lameter3b0efdf2012-06-13 10:24:57 -0500823 * object + s->object_size
Christoph Lameter81819f02007-05-06 14:49:36 -0700824 * Padding to reach word boundary. This is also used for Redzoning.
Christoph Lameter672bba32007-05-09 02:32:39 -0700825 * Padding is extended by another word if Redzoning is enabled and
Christoph Lameter3b0efdf2012-06-13 10:24:57 -0500826 * object_size == inuse.
Christoph Lameter672bba32007-05-09 02:32:39 -0700827 *
Christoph Lameter81819f02007-05-06 14:49:36 -0700828 * We fill with 0xbb (RED_INACTIVE) for inactive objects and with
829 * 0xcc (RED_ACTIVE) for objects in use.
830 *
831 * object + s->inuse
Christoph Lameter672bba32007-05-09 02:32:39 -0700832 * Meta data starts here.
833 *
Christoph Lameter81819f02007-05-06 14:49:36 -0700834 * A. Free pointer (if we cannot overwrite object on free)
835 * B. Tracking data for SLAB_STORE_USER
Bhaskar Chowdhurydc842072021-04-29 22:54:51 -0700836 * C. Padding to reach required alignment boundary or at minimum
Christoph Lameter6446faa2008-02-15 23:45:26 -0800837 * one word if debugging is on to be able to detect writes
Christoph Lameter672bba32007-05-09 02:32:39 -0700838 * before the word boundary.
839 *
840 * Padding is done using 0x5a (POISON_INUSE)
Christoph Lameter81819f02007-05-06 14:49:36 -0700841 *
842 * object + s->size
Christoph Lameter672bba32007-05-09 02:32:39 -0700843 * Nothing is used beyond s->size.
Christoph Lameter81819f02007-05-06 14:49:36 -0700844 *
Christoph Lameter3b0efdf2012-06-13 10:24:57 -0500845 * If slabcaches are merged then the object_size and inuse boundaries are mostly
Christoph Lameter672bba32007-05-09 02:32:39 -0700846 * ignored. And therefore no slab options that rely on these boundaries
Christoph Lameter81819f02007-05-06 14:49:36 -0700847 * may be used with merged slabcaches.
848 */
849
Christoph Lameter81819f02007-05-06 14:49:36 -0700850static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
851{
Waiman Longcbfc35a2020-05-07 18:36:06 -0700852 unsigned long off = get_info_end(s); /* The end of info */
Christoph Lameter81819f02007-05-06 14:49:36 -0700853
854 if (s->flags & SLAB_STORE_USER)
855 /* We also have user information there */
856 off += 2 * sizeof(struct track);
857
Alexander Potapenko80a92012016-07-28 15:49:07 -0700858 off += kasan_metadata_size(s);
859
Joonsoo Kimd86bd1b2016-03-15 14:55:12 -0700860 if (size_from_object(s) == off)
Christoph Lameter81819f02007-05-06 14:49:36 -0700861 return 1;
862
Christoph Lameter24922682007-07-17 04:03:18 -0700863 return check_bytes_and_report(s, page, p, "Object padding",
Joonsoo Kimd86bd1b2016-03-15 14:55:12 -0700864 p + off, POISON_INUSE, size_from_object(s) - off);
Christoph Lameter81819f02007-05-06 14:49:36 -0700865}
866
Christoph Lameter39b26462008-04-14 19:11:30 +0300867/* Check the pad bytes at the end of a slab page */
Christoph Lameter81819f02007-05-06 14:49:36 -0700868static int slab_pad_check(struct kmem_cache *s, struct page *page)
869{
Christoph Lameter24922682007-07-17 04:03:18 -0700870 u8 *start;
871 u8 *fault;
872 u8 *end;
Balasubramani Vivekanandan5d682682018-01-31 16:15:43 -0800873 u8 *pad;
Christoph Lameter24922682007-07-17 04:03:18 -0700874 int length;
875 int remainder;
Christoph Lameter81819f02007-05-06 14:49:36 -0700876
877 if (!(s->flags & SLAB_POISON))
878 return 1;
879
Christoph Lametera973e9d2008-03-01 13:40:44 -0800880 start = page_address(page);
Matthew Wilcox (Oracle)a50b8542019-09-23 15:34:25 -0700881 length = page_size(page);
Christoph Lameter39b26462008-04-14 19:11:30 +0300882 end = start + length;
883 remainder = length % s->size;
Christoph Lameter81819f02007-05-06 14:49:36 -0700884 if (!remainder)
885 return 1;
886
Balasubramani Vivekanandan5d682682018-01-31 16:15:43 -0800887 pad = end - remainder;
Andrey Ryabinina79316c2015-02-13 14:39:38 -0800888 metadata_access_enable();
Andrey Konovalovaa1ef4d2020-12-22 12:02:17 -0800889 fault = memchr_inv(kasan_reset_tag(pad), POISON_INUSE, remainder);
Andrey Ryabinina79316c2015-02-13 14:39:38 -0800890 metadata_access_disable();
Christoph Lameter24922682007-07-17 04:03:18 -0700891 if (!fault)
892 return 1;
893 while (end > fault && end[-1] == POISON_INUSE)
894 end--;
895
Miles Chene1b70dd2019-11-30 17:49:31 -0800896 slab_err(s, page, "Padding overwritten. 0x%p-0x%p @offset=%tu",
897 fault, end - 1, fault - start);
Balasubramani Vivekanandan5d682682018-01-31 16:15:43 -0800898 print_section(KERN_ERR, "Padding ", pad, remainder);
Christoph Lameter24922682007-07-17 04:03:18 -0700899
Balasubramani Vivekanandan5d682682018-01-31 16:15:43 -0800900 restore_bytes(s, "slab padding", POISON_INUSE, fault, end);
Christoph Lameter24922682007-07-17 04:03:18 -0700901 return 0;
Christoph Lameter81819f02007-05-06 14:49:36 -0700902}
903
904static int check_object(struct kmem_cache *s, struct page *page,
Christoph Lameterf7cb1932010-09-29 07:15:01 -0500905 void *object, u8 val)
Christoph Lameter81819f02007-05-06 14:49:36 -0700906{
907 u8 *p = object;
Christoph Lameter3b0efdf2012-06-13 10:24:57 -0500908 u8 *endobject = object + s->object_size;
Christoph Lameter81819f02007-05-06 14:49:36 -0700909
910 if (s->flags & SLAB_RED_ZONE) {
Christoph Lameter24922682007-07-17 04:03:18 -0700911 if (!check_bytes_and_report(s, page, object, "Redzone",
Joonsoo Kimd86bd1b2016-03-15 14:55:12 -0700912 object - s->red_left_pad, val, s->red_left_pad))
913 return 0;
914
915 if (!check_bytes_and_report(s, page, object, "Redzone",
Christoph Lameter3b0efdf2012-06-13 10:24:57 -0500916 endobject, val, s->inuse - s->object_size))
Christoph Lameter81819f02007-05-06 14:49:36 -0700917 return 0;
Christoph Lameter81819f02007-05-06 14:49:36 -0700918 } else {
Christoph Lameter3b0efdf2012-06-13 10:24:57 -0500919 if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
Ingo Molnar3adbefe2008-02-05 17:57:39 -0800920 check_bytes_and_report(s, page, p, "Alignment padding",
Chen Gangd0e0ac92013-07-15 09:05:29 +0800921 endobject, POISON_INUSE,
922 s->inuse - s->object_size);
Ingo Molnar3adbefe2008-02-05 17:57:39 -0800923 }
Christoph Lameter81819f02007-05-06 14:49:36 -0700924 }
925
926 if (s->flags & SLAB_POISON) {
Christoph Lameterf7cb1932010-09-29 07:15:01 -0500927 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
Christoph Lameter24922682007-07-17 04:03:18 -0700928 (!check_bytes_and_report(s, page, p, "Poison", p,
Christoph Lameter3b0efdf2012-06-13 10:24:57 -0500929 POISON_FREE, s->object_size - 1) ||
Christoph Lameter24922682007-07-17 04:03:18 -0700930 !check_bytes_and_report(s, page, p, "Poison",
Christoph Lameter3b0efdf2012-06-13 10:24:57 -0500931 p + s->object_size - 1, POISON_END, 1)))
Christoph Lameter81819f02007-05-06 14:49:36 -0700932 return 0;
Christoph Lameter81819f02007-05-06 14:49:36 -0700933 /*
934 * check_pad_bytes cleans up on its own.
935 */
936 check_pad_bytes(s, page, p);
937 }
938
Waiman Longcbfc35a2020-05-07 18:36:06 -0700939 if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE)
Christoph Lameter81819f02007-05-06 14:49:36 -0700940 /*
941 * Object and freepointer overlap. Cannot check
942 * freepointer while object is allocated.
943 */
944 return 1;
945
946 /* Check free pointer validity */
947 if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
948 object_err(s, page, p, "Freepointer corrupt");
949 /*
Nick Andrew9f6c708e2008-12-05 14:08:08 +1100950 * No choice but to zap it and thus lose the remainder
Christoph Lameter81819f02007-05-06 14:49:36 -0700951 * of the free objects in this slab. May cause
Christoph Lameter672bba32007-05-09 02:32:39 -0700952 * another error because the object count is now wrong.
Christoph Lameter81819f02007-05-06 14:49:36 -0700953 */
Christoph Lametera973e9d2008-03-01 13:40:44 -0800954 set_freepointer(s, p, NULL);
Christoph Lameter81819f02007-05-06 14:49:36 -0700955 return 0;
956 }
957 return 1;
958}
959
960static int check_slab(struct kmem_cache *s, struct page *page)
961{
Christoph Lameter39b26462008-04-14 19:11:30 +0300962 int maxobj;
963
Christoph Lameter81819f02007-05-06 14:49:36 -0700964 VM_BUG_ON(!irqs_disabled());
965
966 if (!PageSlab(page)) {
Christoph Lameter24922682007-07-17 04:03:18 -0700967 slab_err(s, page, "Not a valid slab page");
Christoph Lameter81819f02007-05-06 14:49:36 -0700968 return 0;
969 }
Christoph Lameter39b26462008-04-14 19:11:30 +0300970
Matthew Wilcox9736d2a2018-06-07 17:09:10 -0700971 maxobj = order_objects(compound_order(page), s->size);
Christoph Lameter39b26462008-04-14 19:11:30 +0300972 if (page->objects > maxobj) {
973 slab_err(s, page, "objects %u > max %u",
Andrey Ryabininf6edde92014-12-10 15:42:22 -0800974 page->objects, maxobj);
Christoph Lameter39b26462008-04-14 19:11:30 +0300975 return 0;
976 }
977 if (page->inuse > page->objects) {
Christoph Lameter24922682007-07-17 04:03:18 -0700978 slab_err(s, page, "inuse %u > max %u",
Andrey Ryabininf6edde92014-12-10 15:42:22 -0800979 page->inuse, page->objects);
Christoph Lameter81819f02007-05-06 14:49:36 -0700980 return 0;
981 }
982 /* Slab_pad_check fixes things up after itself */
983 slab_pad_check(s, page);
984 return 1;
985}
986
987/*
Christoph Lameter672bba32007-05-09 02:32:39 -0700988 * Determine if a certain object on a page is on the freelist. Must hold the
989 * slab lock to guarantee that the chains are in a consistent state.
Christoph Lameter81819f02007-05-06 14:49:36 -0700990 */
991static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
992{
993 int nr = 0;
Christoph Lameter881db7f2011-06-01 12:25:53 -0500994 void *fp;
Christoph Lameter81819f02007-05-06 14:49:36 -0700995 void *object = NULL;
Andrey Ryabininf6edde92014-12-10 15:42:22 -0800996 int max_objects;
Christoph Lameter81819f02007-05-06 14:49:36 -0700997
Christoph Lameter881db7f2011-06-01 12:25:53 -0500998 fp = page->freelist;
Christoph Lameter39b26462008-04-14 19:11:30 +0300999 while (fp && nr <= page->objects) {
Christoph Lameter81819f02007-05-06 14:49:36 -07001000 if (fp == search)
1001 return 1;
1002 if (!check_valid_pointer(s, page, fp)) {
1003 if (object) {
1004 object_err(s, page, object,
1005 "Freechain corrupt");
Christoph Lametera973e9d2008-03-01 13:40:44 -08001006 set_freepointer(s, object, NULL);
Christoph Lameter81819f02007-05-06 14:49:36 -07001007 } else {
Christoph Lameter24922682007-07-17 04:03:18 -07001008 slab_err(s, page, "Freepointer corrupt");
Christoph Lametera973e9d2008-03-01 13:40:44 -08001009 page->freelist = NULL;
Christoph Lameter39b26462008-04-14 19:11:30 +03001010 page->inuse = page->objects;
Christoph Lameter24922682007-07-17 04:03:18 -07001011 slab_fix(s, "Freelist cleared");
Christoph Lameter81819f02007-05-06 14:49:36 -07001012 return 0;
1013 }
1014 break;
1015 }
1016 object = fp;
1017 fp = get_freepointer(s, object);
1018 nr++;
1019 }
1020
Matthew Wilcox9736d2a2018-06-07 17:09:10 -07001021 max_objects = order_objects(compound_order(page), s->size);
Cyrill Gorcunov210b5c02008-10-22 23:00:38 +04001022 if (max_objects > MAX_OBJS_PER_PAGE)
1023 max_objects = MAX_OBJS_PER_PAGE;
Christoph Lameter224a88b2008-04-14 19:11:31 +03001024
1025 if (page->objects != max_objects) {
Joe Perches756a0252016-03-17 14:19:47 -07001026 slab_err(s, page, "Wrong number of objects. Found %d but should be %d",
1027 page->objects, max_objects);
Christoph Lameter224a88b2008-04-14 19:11:31 +03001028 page->objects = max_objects;
1029 slab_fix(s, "Number of objects adjusted.");
1030 }
Christoph Lameter39b26462008-04-14 19:11:30 +03001031 if (page->inuse != page->objects - nr) {
Joe Perches756a0252016-03-17 14:19:47 -07001032 slab_err(s, page, "Wrong object count. Counter is %d but counted were %d",
1033 page->inuse, page->objects - nr);
Christoph Lameter39b26462008-04-14 19:11:30 +03001034 page->inuse = page->objects - nr;
Christoph Lameter24922682007-07-17 04:03:18 -07001035 slab_fix(s, "Object count adjusted.");
Christoph Lameter81819f02007-05-06 14:49:36 -07001036 }
1037 return search == NULL;
1038}
1039
Christoph Lameter0121c6192008-04-29 16:11:12 -07001040static void trace(struct kmem_cache *s, struct page *page, void *object,
1041 int alloc)
Christoph Lameter3ec09742007-05-16 22:11:00 -07001042{
1043 if (s->flags & SLAB_TRACE) {
Fabian Frederickf9f58282014-06-04 16:06:34 -07001044 pr_info("TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
Christoph Lameter3ec09742007-05-16 22:11:00 -07001045 s->name,
1046 alloc ? "alloc" : "free",
1047 object, page->inuse,
1048 page->freelist);
1049
1050 if (!alloc)
Daniel Thompsonaa2efd52017-01-24 15:18:02 -08001051 print_section(KERN_INFO, "Object ", (void *)object,
Chen Gangd0e0ac92013-07-15 09:05:29 +08001052 s->object_size);
Christoph Lameter3ec09742007-05-16 22:11:00 -07001053
1054 dump_stack();
1055 }
1056}
1057
Christoph Lameter643b1132007-05-06 14:49:42 -07001058/*
Christoph Lameter672bba32007-05-09 02:32:39 -07001059 * Tracking of fully allocated slabs for debugging purposes.
Christoph Lameter643b1132007-05-06 14:49:42 -07001060 */
Christoph Lameter5cc6eee2011-06-01 12:25:50 -05001061static void add_full(struct kmem_cache *s,
1062 struct kmem_cache_node *n, struct page *page)
Christoph Lameter643b1132007-05-06 14:49:42 -07001063{
Christoph Lameter643b1132007-05-06 14:49:42 -07001064 if (!(s->flags & SLAB_STORE_USER))
1065 return;
1066
David Rientjes255d0882014-02-10 14:25:39 -08001067 lockdep_assert_held(&n->list_lock);
Tobin C. Harding916ac052019-05-13 17:16:12 -07001068 list_add(&page->slab_list, &n->full);
Christoph Lameter5cc6eee2011-06-01 12:25:50 -05001069}
Christoph Lameter643b1132007-05-06 14:49:42 -07001070
Peter Zijlstrac65c1872014-01-10 13:23:49 +01001071static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page)
Christoph Lameter5cc6eee2011-06-01 12:25:50 -05001072{
1073 if (!(s->flags & SLAB_STORE_USER))
1074 return;
1075
David Rientjes255d0882014-02-10 14:25:39 -08001076 lockdep_assert_held(&n->list_lock);
Tobin C. Harding916ac052019-05-13 17:16:12 -07001077 list_del(&page->slab_list);
Christoph Lameter643b1132007-05-06 14:49:42 -07001078}
1079
Christoph Lameter0f389ec2008-04-14 18:53:02 +03001080/* Tracking of the number of slabs for debugging purposes */
1081static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1082{
1083 struct kmem_cache_node *n = get_node(s, node);
1084
1085 return atomic_long_read(&n->nr_slabs);
1086}
1087
Alexander Beregalov26c02cf2009-06-11 14:08:48 +04001088static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1089{
1090 return atomic_long_read(&n->nr_slabs);
1091}
1092
Christoph Lameter205ab992008-04-14 19:11:40 +03001093static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
Christoph Lameter0f389ec2008-04-14 18:53:02 +03001094{
1095 struct kmem_cache_node *n = get_node(s, node);
1096
1097 /*
1098 * May be called early in order to allocate a slab for the
1099 * kmem_cache_node structure. Solve the chicken-egg
1100 * dilemma by deferring the increment of the count during
1101 * bootstrap (see early_kmem_cache_node_alloc).
1102 */
Joonsoo Kim338b2642013-01-21 17:01:27 +09001103 if (likely(n)) {
Christoph Lameter0f389ec2008-04-14 18:53:02 +03001104 atomic_long_inc(&n->nr_slabs);
Christoph Lameter205ab992008-04-14 19:11:40 +03001105 atomic_long_add(objects, &n->total_objects);
1106 }
Christoph Lameter0f389ec2008-04-14 18:53:02 +03001107}
Christoph Lameter205ab992008-04-14 19:11:40 +03001108static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
Christoph Lameter0f389ec2008-04-14 18:53:02 +03001109{
1110 struct kmem_cache_node *n = get_node(s, node);
1111
1112 atomic_long_dec(&n->nr_slabs);
Christoph Lameter205ab992008-04-14 19:11:40 +03001113 atomic_long_sub(objects, &n->total_objects);
Christoph Lameter0f389ec2008-04-14 18:53:02 +03001114}
1115
1116/* Object debug checks for alloc/free paths */
Christoph Lameter3ec09742007-05-16 22:11:00 -07001117static void setup_object_debug(struct kmem_cache *s, struct page *page,
1118 void *object)
1119{
Vlastimil Babka8fc8d662020-08-06 23:18:58 -07001120 if (!kmem_cache_debug_flags(s, SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON))
Christoph Lameter3ec09742007-05-16 22:11:00 -07001121 return;
1122
Christoph Lameterf7cb1932010-09-29 07:15:01 -05001123 init_object(s, object, SLUB_RED_INACTIVE);
Christoph Lameter3ec09742007-05-16 22:11:00 -07001124 init_tracking(s, object);
1125}
1126
Matthew Wilcox (Oracle)a50b8542019-09-23 15:34:25 -07001127static
1128void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr)
Andrey Konovalova7101222019-02-20 22:19:23 -08001129{
Vlastimil Babka8fc8d662020-08-06 23:18:58 -07001130 if (!kmem_cache_debug_flags(s, SLAB_POISON))
Andrey Konovalova7101222019-02-20 22:19:23 -08001131 return;
1132
1133 metadata_access_enable();
Andrey Konovalovaa1ef4d2020-12-22 12:02:17 -08001134 memset(kasan_reset_tag(addr), POISON_INUSE, page_size(page));
Andrey Konovalova7101222019-02-20 22:19:23 -08001135 metadata_access_disable();
1136}
1137
Laura Abbottbecfda62016-03-15 14:55:06 -07001138static inline int alloc_consistency_checks(struct kmem_cache *s,
Qian Cai278d7752019-03-05 15:42:10 -08001139 struct page *page, void *object)
Christoph Lameter81819f02007-05-06 14:49:36 -07001140{
1141 if (!check_slab(s, page))
Laura Abbottbecfda62016-03-15 14:55:06 -07001142 return 0;
Christoph Lameter81819f02007-05-06 14:49:36 -07001143
Christoph Lameter81819f02007-05-06 14:49:36 -07001144 if (!check_valid_pointer(s, page, object)) {
1145 object_err(s, page, object, "Freelist Pointer check fails");
Laura Abbottbecfda62016-03-15 14:55:06 -07001146 return 0;
Christoph Lameter81819f02007-05-06 14:49:36 -07001147 }
1148
Christoph Lameterf7cb1932010-09-29 07:15:01 -05001149 if (!check_object(s, page, object, SLUB_RED_INACTIVE))
Laura Abbottbecfda62016-03-15 14:55:06 -07001150 return 0;
1151
1152 return 1;
1153}
1154
1155static noinline int alloc_debug_processing(struct kmem_cache *s,
1156 struct page *page,
1157 void *object, unsigned long addr)
1158{
1159 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
Qian Cai278d7752019-03-05 15:42:10 -08001160 if (!alloc_consistency_checks(s, page, object))
Laura Abbottbecfda62016-03-15 14:55:06 -07001161 goto bad;
1162 }
Christoph Lameter81819f02007-05-06 14:49:36 -07001163
Christoph Lameter3ec09742007-05-16 22:11:00 -07001164 /* Success perform special debug activities for allocs */
1165 if (s->flags & SLAB_STORE_USER)
1166 set_track(s, object, TRACK_ALLOC, addr);
1167 trace(s, page, object, 1);
Christoph Lameterf7cb1932010-09-29 07:15:01 -05001168 init_object(s, object, SLUB_RED_ACTIVE);
Christoph Lameter81819f02007-05-06 14:49:36 -07001169 return 1;
Christoph Lameter3ec09742007-05-16 22:11:00 -07001170
Christoph Lameter81819f02007-05-06 14:49:36 -07001171bad:
1172 if (PageSlab(page)) {
1173 /*
1174 * If this is a slab page then lets do the best we can
1175 * to avoid issues in the future. Marking all objects
Christoph Lameter672bba32007-05-09 02:32:39 -07001176 * as used avoids touching the remaining objects.
Christoph Lameter81819f02007-05-06 14:49:36 -07001177 */
Christoph Lameter24922682007-07-17 04:03:18 -07001178 slab_fix(s, "Marking all objects used");
Christoph Lameter39b26462008-04-14 19:11:30 +03001179 page->inuse = page->objects;
Christoph Lametera973e9d2008-03-01 13:40:44 -08001180 page->freelist = NULL;
Christoph Lameter81819f02007-05-06 14:49:36 -07001181 }
1182 return 0;
1183}
1184
Laura Abbottbecfda62016-03-15 14:55:06 -07001185static inline int free_consistency_checks(struct kmem_cache *s,
1186 struct page *page, void *object, unsigned long addr)
1187{
1188 if (!check_valid_pointer(s, page, object)) {
1189 slab_err(s, page, "Invalid object pointer 0x%p", object);
1190 return 0;
1191 }
1192
1193 if (on_freelist(s, page, object)) {
1194 object_err(s, page, object, "Object already free");
1195 return 0;
1196 }
1197
1198 if (!check_object(s, page, object, SLUB_RED_ACTIVE))
1199 return 0;
1200
1201 if (unlikely(s != page->slab_cache)) {
1202 if (!PageSlab(page)) {
Joe Perches756a0252016-03-17 14:19:47 -07001203 slab_err(s, page, "Attempt to free object(0x%p) outside of slab",
1204 object);
Laura Abbottbecfda62016-03-15 14:55:06 -07001205 } else if (!page->slab_cache) {
1206 pr_err("SLUB <none>: no slab for object 0x%p.\n",
1207 object);
1208 dump_stack();
1209 } else
1210 object_err(s, page, object,
1211 "page slab pointer corrupt.");
1212 return 0;
1213 }
1214 return 1;
1215}
1216
Jesper Dangaard Brouer81084652015-11-20 15:57:46 -08001217/* Supports checking bulk free of a constructed freelist */
Laura Abbott282acb42016-03-15 14:54:59 -07001218static noinline int free_debug_processing(
Jesper Dangaard Brouer81084652015-11-20 15:57:46 -08001219 struct kmem_cache *s, struct page *page,
1220 void *head, void *tail, int bulk_cnt,
Laura Abbott282acb42016-03-15 14:54:59 -07001221 unsigned long addr)
Christoph Lameter81819f02007-05-06 14:49:36 -07001222{
Christoph Lameter19c7ff92012-05-30 12:54:46 -05001223 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
Jesper Dangaard Brouer81084652015-11-20 15:57:46 -08001224 void *object = head;
1225 int cnt = 0;
Kees Cook3f649ab2020-06-03 13:09:38 -07001226 unsigned long flags;
Laura Abbott804aa132016-03-15 14:55:02 -07001227 int ret = 0;
Christoph Lameter5c2e4bb2011-06-01 12:25:54 -05001228
Laura Abbott282acb42016-03-15 14:54:59 -07001229 spin_lock_irqsave(&n->list_lock, flags);
Christoph Lameter881db7f2011-06-01 12:25:53 -05001230 slab_lock(page);
1231
Laura Abbottbecfda62016-03-15 14:55:06 -07001232 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1233 if (!check_slab(s, page))
1234 goto out;
1235 }
Christoph Lameter81819f02007-05-06 14:49:36 -07001236
Jesper Dangaard Brouer81084652015-11-20 15:57:46 -08001237next_object:
1238 cnt++;
1239
Laura Abbottbecfda62016-03-15 14:55:06 -07001240 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1241 if (!free_consistency_checks(s, page, object, addr))
1242 goto out;
Christoph Lameter81819f02007-05-06 14:49:36 -07001243 }
Christoph Lameter3ec09742007-05-16 22:11:00 -07001244
Christoph Lameter3ec09742007-05-16 22:11:00 -07001245 if (s->flags & SLAB_STORE_USER)
1246 set_track(s, object, TRACK_FREE, addr);
1247 trace(s, page, object, 0);
Jesper Dangaard Brouer81084652015-11-20 15:57:46 -08001248 /* Freepointer not overwritten by init_object(), SLAB_POISON moved it */
Christoph Lameterf7cb1932010-09-29 07:15:01 -05001249 init_object(s, object, SLUB_RED_INACTIVE);
Jesper Dangaard Brouer81084652015-11-20 15:57:46 -08001250
1251 /* Reached end of constructed freelist yet? */
1252 if (object != tail) {
1253 object = get_freepointer(s, object);
1254 goto next_object;
1255 }
Laura Abbott804aa132016-03-15 14:55:02 -07001256 ret = 1;
1257
Christoph Lameter5c2e4bb2011-06-01 12:25:54 -05001258out:
Jesper Dangaard Brouer81084652015-11-20 15:57:46 -08001259 if (cnt != bulk_cnt)
1260 slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n",
1261 bulk_cnt, cnt);
1262
Christoph Lameter881db7f2011-06-01 12:25:53 -05001263 slab_unlock(page);
Laura Abbott282acb42016-03-15 14:54:59 -07001264 spin_unlock_irqrestore(&n->list_lock, flags);
Laura Abbott804aa132016-03-15 14:55:02 -07001265 if (!ret)
1266 slab_fix(s, "Object at 0x%p not freed", object);
1267 return ret;
Christoph Lameter81819f02007-05-06 14:49:36 -07001268}
1269
Vlastimil Babkae17f1df2020-08-06 23:18:35 -07001270/*
1271 * Parse a block of slub_debug options. Blocks are delimited by ';'
1272 *
1273 * @str: start of block
1274 * @flags: returns parsed flags, or DEBUG_DEFAULT_FLAGS if none specified
1275 * @slabs: return start of list of slabs, or NULL when there's no list
1276 * @init: assume this is initial parsing and not per-kmem-create parsing
1277 *
1278 * returns the start of next block if there's any, or NULL
1279 */
1280static char *
1281parse_slub_debug_flags(char *str, slab_flags_t *flags, char **slabs, bool init)
1282{
1283 bool higher_order_disable = false;
1284
1285 /* Skip any completely empty blocks */
1286 while (*str && *str == ';')
1287 str++;
1288
1289 if (*str == ',') {
1290 /*
1291 * No options but restriction on slabs. This means full
1292 * debugging for slabs matching a pattern.
1293 */
1294 *flags = DEBUG_DEFAULT_FLAGS;
1295 goto check_slabs;
1296 }
1297 *flags = 0;
1298
1299 /* Determine which debug features should be switched on */
1300 for (; *str && *str != ',' && *str != ';'; str++) {
1301 switch (tolower(*str)) {
1302 case '-':
1303 *flags = 0;
1304 break;
1305 case 'f':
1306 *flags |= SLAB_CONSISTENCY_CHECKS;
1307 break;
1308 case 'z':
1309 *flags |= SLAB_RED_ZONE;
1310 break;
1311 case 'p':
1312 *flags |= SLAB_POISON;
1313 break;
1314 case 'u':
1315 *flags |= SLAB_STORE_USER;
1316 break;
1317 case 't':
1318 *flags |= SLAB_TRACE;
1319 break;
1320 case 'a':
1321 *flags |= SLAB_FAILSLAB;
1322 break;
1323 case 'o':
1324 /*
1325 * Avoid enabling debugging on caches if its minimum
1326 * order would increase as a result.
1327 */
1328 higher_order_disable = true;
1329 break;
1330 default:
1331 if (init)
1332 pr_err("slub_debug option '%c' unknown. skipped\n", *str);
1333 }
1334 }
1335check_slabs:
1336 if (*str == ',')
1337 *slabs = ++str;
1338 else
1339 *slabs = NULL;
1340
1341 /* Skip over the slab list */
1342 while (*str && *str != ';')
1343 str++;
1344
1345 /* Skip any completely empty blocks */
1346 while (*str && *str == ';')
1347 str++;
1348
1349 if (init && higher_order_disable)
1350 disable_higher_order_debug = 1;
1351
1352 if (*str)
1353 return str;
1354 else
1355 return NULL;
1356}
1357
Christoph Lameter41ecc552007-05-09 02:32:44 -07001358static int __init setup_slub_debug(char *str)
1359{
Vlastimil Babkae17f1df2020-08-06 23:18:35 -07001360 slab_flags_t flags;
1361 char *saved_str;
1362 char *slab_list;
1363 bool global_slub_debug_changed = false;
1364 bool slab_list_specified = false;
1365
Christoph Lameterf0630ff2007-07-15 23:38:14 -07001366 slub_debug = DEBUG_DEFAULT_FLAGS;
1367 if (*str++ != '=' || !*str)
1368 /*
1369 * No options specified. Switch on full debugging.
1370 */
1371 goto out;
Christoph Lameter41ecc552007-05-09 02:32:44 -07001372
Vlastimil Babkae17f1df2020-08-06 23:18:35 -07001373 saved_str = str;
1374 while (str) {
1375 str = parse_slub_debug_flags(str, &flags, &slab_list, true);
Christoph Lameterf0630ff2007-07-15 23:38:14 -07001376
Vlastimil Babkae17f1df2020-08-06 23:18:35 -07001377 if (!slab_list) {
1378 slub_debug = flags;
1379 global_slub_debug_changed = true;
1380 } else {
1381 slab_list_specified = true;
Christoph Lameterf0630ff2007-07-15 23:38:14 -07001382 }
1383 }
1384
Vlastimil Babkae17f1df2020-08-06 23:18:35 -07001385 /*
1386 * For backwards compatibility, a single list of flags with list of
1387 * slabs means debugging is only enabled for those slabs, so the global
1388 * slub_debug should be 0. We can extended that to multiple lists as
1389 * long as there is no option specifying flags without a slab list.
1390 */
1391 if (slab_list_specified) {
1392 if (!global_slub_debug_changed)
1393 slub_debug = 0;
1394 slub_debug_string = saved_str;
1395 }
Christoph Lameterf0630ff2007-07-15 23:38:14 -07001396out:
Vlastimil Babkaca0cab62020-08-06 23:18:51 -07001397 if (slub_debug != 0 || slub_debug_string)
1398 static_branch_enable(&slub_debug_enabled);
Alexander Potapenko64713842019-07-11 20:59:19 -07001399 if ((static_branch_unlikely(&init_on_alloc) ||
1400 static_branch_unlikely(&init_on_free)) &&
1401 (slub_debug & SLAB_POISON))
1402 pr_info("mem auto-init: SLAB_POISON will take precedence over init_on_alloc/init_on_free\n");
Christoph Lameter41ecc552007-05-09 02:32:44 -07001403 return 1;
1404}
1405
1406__setup("slub_debug", setup_slub_debug);
1407
Aaron Tomlinc5fd3ca2018-10-26 15:03:15 -07001408/*
1409 * kmem_cache_flags - apply debugging options to the cache
1410 * @object_size: the size of an object without meta data
1411 * @flags: flags to set
1412 * @name: name of the cache
Aaron Tomlinc5fd3ca2018-10-26 15:03:15 -07001413 *
1414 * Debug option(s) are applied to @flags. In addition to the debug
1415 * option(s), if a slab name (or multiple) is specified i.e.
1416 * slub_debug=<Debug-Options>,<slab name1>,<slab name2> ...
1417 * then only the select slabs will receive the debug option(s).
1418 */
Alexey Dobriyan0293d1f2018-04-05 16:21:24 -07001419slab_flags_t kmem_cache_flags(unsigned int object_size,
Nikolay Borisov37540002021-02-24 12:00:58 -08001420 slab_flags_t flags, const char *name)
Christoph Lameter41ecc552007-05-09 02:32:44 -07001421{
Aaron Tomlinc5fd3ca2018-10-26 15:03:15 -07001422 char *iter;
1423 size_t len;
Vlastimil Babkae17f1df2020-08-06 23:18:35 -07001424 char *next_block;
1425 slab_flags_t block_flags;
Johannes Bergca220592021-02-24 12:01:04 -08001426 slab_flags_t slub_debug_local = slub_debug;
1427
1428 /*
1429 * If the slab cache is for debugging (e.g. kmemleak) then
1430 * don't store user (stack trace) information by default,
1431 * but let the user enable it via the command line below.
1432 */
1433 if (flags & SLAB_NOLEAKTRACE)
1434 slub_debug_local &= ~SLAB_STORE_USER;
Aaron Tomlinc5fd3ca2018-10-26 15:03:15 -07001435
Aaron Tomlinc5fd3ca2018-10-26 15:03:15 -07001436 len = strlen(name);
Vlastimil Babkae17f1df2020-08-06 23:18:35 -07001437 next_block = slub_debug_string;
1438 /* Go through all blocks of debug options, see if any matches our slab's name */
1439 while (next_block) {
1440 next_block = parse_slub_debug_flags(next_block, &block_flags, &iter, false);
1441 if (!iter)
1442 continue;
1443 /* Found a block that has a slab list, search it */
1444 while (*iter) {
1445 char *end, *glob;
1446 size_t cmplen;
Aaron Tomlinc5fd3ca2018-10-26 15:03:15 -07001447
Vlastimil Babkae17f1df2020-08-06 23:18:35 -07001448 end = strchrnul(iter, ',');
1449 if (next_block && next_block < end)
1450 end = next_block - 1;
Aaron Tomlinc5fd3ca2018-10-26 15:03:15 -07001451
Vlastimil Babkae17f1df2020-08-06 23:18:35 -07001452 glob = strnchr(iter, end - iter, '*');
1453 if (glob)
1454 cmplen = glob - iter;
1455 else
1456 cmplen = max_t(size_t, len, (end - iter));
Aaron Tomlinc5fd3ca2018-10-26 15:03:15 -07001457
Vlastimil Babkae17f1df2020-08-06 23:18:35 -07001458 if (!strncmp(name, iter, cmplen)) {
1459 flags |= block_flags;
1460 return flags;
1461 }
1462
1463 if (!*end || *end == ';')
1464 break;
1465 iter = end + 1;
Aaron Tomlinc5fd3ca2018-10-26 15:03:15 -07001466 }
Aaron Tomlinc5fd3ca2018-10-26 15:03:15 -07001467 }
Christoph Lameterba0268a2007-09-11 15:24:11 -07001468
Johannes Bergca220592021-02-24 12:01:04 -08001469 return flags | slub_debug_local;
Christoph Lameter41ecc552007-05-09 02:32:44 -07001470}
Jesper Dangaard Brouerb4a64712015-11-20 15:57:41 -08001471#else /* !CONFIG_SLUB_DEBUG */
Christoph Lameter3ec09742007-05-16 22:11:00 -07001472static inline void setup_object_debug(struct kmem_cache *s,
1473 struct page *page, void *object) {}
Matthew Wilcox (Oracle)a50b8542019-09-23 15:34:25 -07001474static inline
1475void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr) {}
Christoph Lameter41ecc552007-05-09 02:32:44 -07001476
Christoph Lameter3ec09742007-05-16 22:11:00 -07001477static inline int alloc_debug_processing(struct kmem_cache *s,
Eduard - Gabriel Munteanuce71e272008-08-19 20:43:25 +03001478 struct page *page, void *object, unsigned long addr) { return 0; }
Christoph Lameter41ecc552007-05-09 02:32:44 -07001479
Laura Abbott282acb42016-03-15 14:54:59 -07001480static inline int free_debug_processing(
Jesper Dangaard Brouer81084652015-11-20 15:57:46 -08001481 struct kmem_cache *s, struct page *page,
1482 void *head, void *tail, int bulk_cnt,
Laura Abbott282acb42016-03-15 14:54:59 -07001483 unsigned long addr) { return 0; }
Christoph Lameter41ecc552007-05-09 02:32:44 -07001484
Christoph Lameter41ecc552007-05-09 02:32:44 -07001485static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1486 { return 1; }
1487static inline int check_object(struct kmem_cache *s, struct page *page,
Christoph Lameterf7cb1932010-09-29 07:15:01 -05001488 void *object, u8 val) { return 1; }
Christoph Lameter5cc6eee2011-06-01 12:25:50 -05001489static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1490 struct page *page) {}
Peter Zijlstrac65c1872014-01-10 13:23:49 +01001491static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
1492 struct page *page) {}
Alexey Dobriyan0293d1f2018-04-05 16:21:24 -07001493slab_flags_t kmem_cache_flags(unsigned int object_size,
Nikolay Borisov37540002021-02-24 12:00:58 -08001494 slab_flags_t flags, const char *name)
Christoph Lameterba0268a2007-09-11 15:24:11 -07001495{
1496 return flags;
1497}
Christoph Lameter41ecc552007-05-09 02:32:44 -07001498#define slub_debug 0
Christoph Lameter0f389ec2008-04-14 18:53:02 +03001499
Ingo Molnarfdaa45e2009-09-15 11:00:26 +02001500#define disable_higher_order_debug 0
1501
Christoph Lameter0f389ec2008-04-14 18:53:02 +03001502static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1503 { return 0; }
Alexander Beregalov26c02cf2009-06-11 14:08:48 +04001504static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1505 { return 0; }
Christoph Lameter205ab992008-04-14 19:11:40 +03001506static inline void inc_slabs_node(struct kmem_cache *s, int node,
1507 int objects) {}
1508static inline void dec_slabs_node(struct kmem_cache *s, int node,
1509 int objects) {}
Christoph Lameter7d550c52010-08-25 14:07:16 -05001510
Dongli Zhang52f23472020-06-01 21:45:47 -07001511static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
Eugeniu Roscadc07a722020-09-04 16:35:30 -07001512 void **freelist, void *nextfree)
Dongli Zhang52f23472020-06-01 21:45:47 -07001513{
1514 return false;
1515}
Andrey Ryabinin02e72cc2014-08-06 16:04:18 -07001516#endif /* CONFIG_SLUB_DEBUG */
1517
1518/*
1519 * Hooks for other subsystems that check memory allocations. In a typical
1520 * production configuration these hooks all should produce no code at all.
1521 */
Andrey Konovalov01165232018-12-28 00:29:37 -08001522static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
Roman Bobnievd56791b2013-10-08 15:58:57 -07001523{
Andrey Konovalov53128242019-02-20 22:19:11 -08001524 ptr = kasan_kmalloc_large(ptr, size, flags);
Andrey Konovalova2f77572019-02-20 22:19:16 -08001525 /* As ptr might get tagged, call kmemleak hook after KASAN. */
Roman Bobnievd56791b2013-10-08 15:58:57 -07001526 kmemleak_alloc(ptr, size, 1, flags);
Andrey Konovalov53128242019-02-20 22:19:11 -08001527 return ptr;
Roman Bobnievd56791b2013-10-08 15:58:57 -07001528}
1529
Dmitry Vyukovee3ce772018-02-06 15:36:27 -08001530static __always_inline void kfree_hook(void *x)
Roman Bobnievd56791b2013-10-08 15:58:57 -07001531{
1532 kmemleak_free(x);
Andrey Konovalov027b37b2021-02-24 12:05:46 -08001533 kasan_kfree_large(x);
Roman Bobnievd56791b2013-10-08 15:58:57 -07001534}
1535
Andrey Konovalovd57a9642021-04-29 23:00:09 -07001536static __always_inline bool slab_free_hook(struct kmem_cache *s,
1537 void *x, bool init)
Roman Bobnievd56791b2013-10-08 15:58:57 -07001538{
1539 kmemleak_free_recursive(x, s->flags);
Christoph Lameter7d550c52010-08-25 14:07:16 -05001540
Andrey Ryabinin02e72cc2014-08-06 16:04:18 -07001541 /*
1542 * Trouble is that we may no longer disable interrupts in the fast path
1543 * So in order to make the debug calls that expect irqs to be
1544 * disabled we need to disable interrupts temporarily.
1545 */
Levin, Alexander (Sasha Levin)4675ff02017-11-15 17:36:02 -08001546#ifdef CONFIG_LOCKDEP
Andrey Ryabinin02e72cc2014-08-06 16:04:18 -07001547 {
1548 unsigned long flags;
1549
1550 local_irq_save(flags);
Andrey Ryabinin02e72cc2014-08-06 16:04:18 -07001551 debug_check_no_locks_freed(x, s->object_size);
1552 local_irq_restore(flags);
1553 }
1554#endif
1555 if (!(s->flags & SLAB_DEBUG_OBJECTS))
1556 debug_check_no_obj_freed(x, s->object_size);
Andrey Ryabinin0316bec2015-02-13 14:39:42 -08001557
Marco Elvercfbe1632020-08-06 23:19:12 -07001558 /* Use KCSAN to help debug racy use-after-free. */
1559 if (!(s->flags & SLAB_TYPESAFE_BY_RCU))
1560 __kcsan_check_access(x, s->object_size,
1561 KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT);
1562
Andrey Konovalovd57a9642021-04-29 23:00:09 -07001563 /*
1564 * As memory initialization might be integrated into KASAN,
1565 * kasan_slab_free and initialization memset's must be
1566 * kept together to avoid discrepancies in behavior.
1567 *
1568 * The initialization memset's clear the object and the metadata,
1569 * but don't touch the SLAB redzone.
1570 */
1571 if (init) {
1572 int rsize;
1573
1574 if (!kasan_has_integrated_init())
1575 memset(kasan_reset_tag(x), 0, s->object_size);
1576 rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad : 0;
1577 memset((char *)kasan_reset_tag(x) + s->inuse, 0,
1578 s->size - s->inuse - rsize);
1579 }
1580 /* KASAN might put x into memory quarantine, delaying its reuse. */
1581 return kasan_slab_free(s, x, init);
Andrey Ryabinin02e72cc2014-08-06 16:04:18 -07001582}
Christoph Lameter205ab992008-04-14 19:11:40 +03001583
Andrey Konovalovc3895392018-04-10 16:30:31 -07001584static inline bool slab_free_freelist_hook(struct kmem_cache *s,
1585 void **head, void **tail)
Jesper Dangaard Brouer81084652015-11-20 15:57:46 -08001586{
Alexander Potapenko64713842019-07-11 20:59:19 -07001587
1588 void *object;
1589 void *next = *head;
1590 void *old_tail = *tail ? *tail : *head;
Alexander Potapenko64713842019-07-11 20:59:19 -07001591
Alexander Potapenkob89fb5e2021-02-25 17:19:16 -08001592 if (is_kfence_address(next)) {
Andrey Konovalovd57a9642021-04-29 23:00:09 -07001593 slab_free_hook(s, next, false);
Alexander Potapenkob89fb5e2021-02-25 17:19:16 -08001594 return true;
1595 }
1596
Laura Abbottaea4df42019-11-15 17:34:50 -08001597 /* Head and tail of the reconstructed freelist */
1598 *head = NULL;
1599 *tail = NULL;
Laura Abbott1b7e8162019-07-31 15:32:40 -04001600
Laura Abbottaea4df42019-11-15 17:34:50 -08001601 do {
1602 object = next;
1603 next = get_freepointer(s, object);
1604
Andrey Konovalovc3895392018-04-10 16:30:31 -07001605 /* If object's reuse doesn't have to be delayed */
Andrey Konovalovd57a9642021-04-29 23:00:09 -07001606 if (!slab_free_hook(s, object, slab_want_init_on_free(s))) {
Andrey Konovalovc3895392018-04-10 16:30:31 -07001607 /* Move object to the new freelist */
1608 set_freepointer(s, object, *head);
1609 *head = object;
1610 if (!*tail)
1611 *tail = object;
1612 }
1613 } while (object != old_tail);
1614
1615 if (*head == *tail)
1616 *tail = NULL;
1617
1618 return *head != NULL;
Jesper Dangaard Brouer81084652015-11-20 15:57:46 -08001619}
1620
Andrey Konovalov4d176712018-12-28 00:30:23 -08001621static void *setup_object(struct kmem_cache *s, struct page *page,
Thomas Gleixner588f8ba2015-09-04 15:45:48 -07001622 void *object)
1623{
1624 setup_object_debug(s, page, object);
Andrey Konovalov4d176712018-12-28 00:30:23 -08001625 object = kasan_init_slab_obj(s, object);
Thomas Gleixner588f8ba2015-09-04 15:45:48 -07001626 if (unlikely(s->ctor)) {
1627 kasan_unpoison_object_data(s, object);
1628 s->ctor(object);
1629 kasan_poison_object_data(s, object);
1630 }
Andrey Konovalov4d176712018-12-28 00:30:23 -08001631 return object;
Thomas Gleixner588f8ba2015-09-04 15:45:48 -07001632}
1633
Christoph Lameter81819f02007-05-06 14:49:36 -07001634/*
1635 * Slab allocation and freeing
1636 */
Vladimir Davydov5dfb4172014-06-04 16:06:38 -07001637static inline struct page *alloc_slab_page(struct kmem_cache *s,
1638 gfp_t flags, int node, struct kmem_cache_order_objects oo)
Christoph Lameter65c33762008-04-14 19:11:40 +03001639{
Vladimir Davydov5dfb4172014-06-04 16:06:38 -07001640 struct page *page;
Alexey Dobriyan19af27a2018-04-05 16:21:39 -07001641 unsigned int order = oo_order(oo);
Christoph Lameter65c33762008-04-14 19:11:40 +03001642
Christoph Lameter2154a332010-07-09 14:07:10 -05001643 if (node == NUMA_NO_NODE)
Vladimir Davydov5dfb4172014-06-04 16:06:38 -07001644 page = alloc_pages(flags, order);
Christoph Lameter65c33762008-04-14 19:11:40 +03001645 else
Vlastimil Babka96db8002015-09-08 15:03:50 -07001646 page = __alloc_pages_node(node, flags, order);
Vladimir Davydov5dfb4172014-06-04 16:06:38 -07001647
Vladimir Davydov5dfb4172014-06-04 16:06:38 -07001648 return page;
Christoph Lameter65c33762008-04-14 19:11:40 +03001649}
1650
Thomas Garnier210e7a42016-07-26 15:21:59 -07001651#ifdef CONFIG_SLAB_FREELIST_RANDOM
1652/* Pre-initialize the random sequence cache */
1653static int init_cache_random_seq(struct kmem_cache *s)
1654{
Alexey Dobriyan19af27a2018-04-05 16:21:39 -07001655 unsigned int count = oo_objects(s->oo);
Thomas Garnier210e7a42016-07-26 15:21:59 -07001656 int err;
Thomas Garnier210e7a42016-07-26 15:21:59 -07001657
Sean Reesa8100072017-02-08 14:30:59 -08001658 /* Bailout if already initialised */
1659 if (s->random_seq)
1660 return 0;
1661
Thomas Garnier210e7a42016-07-26 15:21:59 -07001662 err = cache_random_seq_create(s, count, GFP_KERNEL);
1663 if (err) {
1664 pr_err("SLUB: Unable to initialize free list for %s\n",
1665 s->name);
1666 return err;
1667 }
1668
1669 /* Transform to an offset on the set of pages */
1670 if (s->random_seq) {
Alexey Dobriyan19af27a2018-04-05 16:21:39 -07001671 unsigned int i;
1672
Thomas Garnier210e7a42016-07-26 15:21:59 -07001673 for (i = 0; i < count; i++)
1674 s->random_seq[i] *= s->size;
1675 }
1676 return 0;
1677}
1678
1679/* Initialize each random sequence freelist per cache */
1680static void __init init_freelist_randomization(void)
1681{
1682 struct kmem_cache *s;
1683
1684 mutex_lock(&slab_mutex);
1685
1686 list_for_each_entry(s, &slab_caches, list)
1687 init_cache_random_seq(s);
1688
1689 mutex_unlock(&slab_mutex);
1690}
1691
1692/* Get the next entry on the pre-computed freelist randomized */
1693static void *next_freelist_entry(struct kmem_cache *s, struct page *page,
1694 unsigned long *pos, void *start,
1695 unsigned long page_limit,
1696 unsigned long freelist_count)
1697{
1698 unsigned int idx;
1699
1700 /*
1701 * If the target page allocation failed, the number of objects on the
1702 * page might be smaller than the usual size defined by the cache.
1703 */
1704 do {
1705 idx = s->random_seq[*pos];
1706 *pos += 1;
1707 if (*pos >= freelist_count)
1708 *pos = 0;
1709 } while (unlikely(idx >= page_limit));
1710
1711 return (char *)start + idx;
1712}
1713
1714/* Shuffle the single linked freelist based on a random pre-computed sequence */
1715static bool shuffle_freelist(struct kmem_cache *s, struct page *page)
1716{
1717 void *start;
1718 void *cur;
1719 void *next;
1720 unsigned long idx, pos, page_limit, freelist_count;
1721
1722 if (page->objects < 2 || !s->random_seq)
1723 return false;
1724
1725 freelist_count = oo_objects(s->oo);
1726 pos = get_random_int() % freelist_count;
1727
1728 page_limit = page->objects * s->size;
1729 start = fixup_red_left(s, page_address(page));
1730
1731 /* First entry is used as the base of the freelist */
1732 cur = next_freelist_entry(s, page, &pos, start, page_limit,
1733 freelist_count);
Andrey Konovalov4d176712018-12-28 00:30:23 -08001734 cur = setup_object(s, page, cur);
Thomas Garnier210e7a42016-07-26 15:21:59 -07001735 page->freelist = cur;
1736
1737 for (idx = 1; idx < page->objects; idx++) {
Thomas Garnier210e7a42016-07-26 15:21:59 -07001738 next = next_freelist_entry(s, page, &pos, start, page_limit,
1739 freelist_count);
Andrey Konovalov4d176712018-12-28 00:30:23 -08001740 next = setup_object(s, page, next);
Thomas Garnier210e7a42016-07-26 15:21:59 -07001741 set_freepointer(s, cur, next);
1742 cur = next;
1743 }
Thomas Garnier210e7a42016-07-26 15:21:59 -07001744 set_freepointer(s, cur, NULL);
1745
1746 return true;
1747}
1748#else
1749static inline int init_cache_random_seq(struct kmem_cache *s)
1750{
1751 return 0;
1752}
1753static inline void init_freelist_randomization(void) { }
1754static inline bool shuffle_freelist(struct kmem_cache *s, struct page *page)
1755{
1756 return false;
1757}
1758#endif /* CONFIG_SLAB_FREELIST_RANDOM */
1759
Christoph Lameter81819f02007-05-06 14:49:36 -07001760static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1761{
Pekka Enberg06428782008-01-07 23:20:27 -08001762 struct page *page;
Christoph Lameter834f3d12008-04-14 19:11:31 +03001763 struct kmem_cache_order_objects oo = s->oo;
Pekka Enbergba522702009-06-24 21:59:51 +03001764 gfp_t alloc_gfp;
Andrey Konovalov4d176712018-12-28 00:30:23 -08001765 void *start, *p, *next;
Matthew Wilcox (Oracle)a50b8542019-09-23 15:34:25 -07001766 int idx;
Thomas Garnier210e7a42016-07-26 15:21:59 -07001767 bool shuffle;
Christoph Lameter81819f02007-05-06 14:49:36 -07001768
Christoph Lameter7e0528d2011-06-01 12:25:44 -05001769 flags &= gfp_allowed_mask;
1770
Mel Gormand0164ad2015-11-06 16:28:21 -08001771 if (gfpflags_allow_blocking(flags))
Christoph Lameter7e0528d2011-06-01 12:25:44 -05001772 local_irq_enable();
1773
Christoph Lameterb7a49f02008-02-14 14:21:32 -08001774 flags |= s->allocflags;
Mel Gormane12ba742007-10-16 01:25:52 -07001775
Pekka Enbergba522702009-06-24 21:59:51 +03001776 /*
1777 * Let the initial higher-order allocation fail under memory pressure
1778 * so we fall-back to the minimum order allocation.
1779 */
1780 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
Mel Gormand0164ad2015-11-06 16:28:21 -08001781 if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min))
Mel Gorman444eb2a42016-03-17 14:19:23 -07001782 alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~(__GFP_RECLAIM|__GFP_NOFAIL);
Pekka Enbergba522702009-06-24 21:59:51 +03001783
Vladimir Davydov5dfb4172014-06-04 16:06:38 -07001784 page = alloc_slab_page(s, alloc_gfp, node, oo);
Christoph Lameter65c33762008-04-14 19:11:40 +03001785 if (unlikely(!page)) {
1786 oo = s->min;
Joonsoo Kim80c3a992014-03-12 17:26:20 +09001787 alloc_gfp = flags;
Christoph Lameter65c33762008-04-14 19:11:40 +03001788 /*
1789 * Allocation may have failed due to fragmentation.
1790 * Try a lower order alloc if possible
1791 */
Vladimir Davydov5dfb4172014-06-04 16:06:38 -07001792 page = alloc_slab_page(s, alloc_gfp, node, oo);
Thomas Gleixner588f8ba2015-09-04 15:45:48 -07001793 if (unlikely(!page))
1794 goto out;
1795 stat(s, ORDER_FALLBACK);
Christoph Lameter65c33762008-04-14 19:11:40 +03001796 }
Vegard Nossum5a896d92008-04-04 00:54:48 +02001797
Christoph Lameter834f3d12008-04-14 19:11:31 +03001798 page->objects = oo_objects(oo);
Christoph Lameter81819f02007-05-06 14:49:36 -07001799
Roman Gushchin2e9bd482021-02-24 12:03:11 -08001800 account_slab_page(page, oo_order(oo), s, flags);
Roman Gushchin1f3147b2020-12-29 15:15:07 -08001801
Glauber Costa1b4f59e32012-10-22 18:05:36 +04001802 page->slab_cache = s;
Joonsoo Kimc03f94c2012-05-18 00:47:47 +09001803 __SetPageSlab(page);
Michal Hocko2f064f32015-08-21 14:11:51 -07001804 if (page_is_pfmemalloc(page))
Mel Gorman072bb0a2012-07-31 16:43:58 -07001805 SetPageSlabPfmemalloc(page);
Christoph Lameter81819f02007-05-06 14:49:36 -07001806
Andrey Konovalova7101222019-02-20 22:19:23 -08001807 kasan_poison_slab(page);
1808
Christoph Lameter81819f02007-05-06 14:49:36 -07001809 start = page_address(page);
Christoph Lameter81819f02007-05-06 14:49:36 -07001810
Matthew Wilcox (Oracle)a50b8542019-09-23 15:34:25 -07001811 setup_page_debug(s, page, start);
Andrey Ryabinin0316bec2015-02-13 14:39:42 -08001812
Thomas Garnier210e7a42016-07-26 15:21:59 -07001813 shuffle = shuffle_freelist(s, page);
1814
1815 if (!shuffle) {
Andrey Konovalov4d176712018-12-28 00:30:23 -08001816 start = fixup_red_left(s, start);
1817 start = setup_object(s, page, start);
1818 page->freelist = start;
Andrey Konovalov18e50662019-02-20 22:19:28 -08001819 for (idx = 0, p = start; idx < page->objects - 1; idx++) {
1820 next = p + s->size;
1821 next = setup_object(s, page, next);
1822 set_freepointer(s, p, next);
1823 p = next;
1824 }
1825 set_freepointer(s, p, NULL);
Christoph Lameter81819f02007-05-06 14:49:36 -07001826 }
Christoph Lameter81819f02007-05-06 14:49:36 -07001827
Christoph Lametere6e82ea2011-08-09 16:12:24 -05001828 page->inuse = page->objects;
Christoph Lameter8cb0a502011-06-01 12:25:46 -05001829 page->frozen = 1;
Thomas Gleixner588f8ba2015-09-04 15:45:48 -07001830
Christoph Lameter81819f02007-05-06 14:49:36 -07001831out:
Mel Gormand0164ad2015-11-06 16:28:21 -08001832 if (gfpflags_allow_blocking(flags))
Thomas Gleixner588f8ba2015-09-04 15:45:48 -07001833 local_irq_disable();
1834 if (!page)
1835 return NULL;
1836
Thomas Gleixner588f8ba2015-09-04 15:45:48 -07001837 inc_slabs_node(s, page_to_nid(page), page->objects);
1838
Christoph Lameter81819f02007-05-06 14:49:36 -07001839 return page;
1840}
1841
Thomas Gleixner588f8ba2015-09-04 15:45:48 -07001842static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1843{
Long Li44405092020-08-06 23:18:28 -07001844 if (unlikely(flags & GFP_SLAB_BUG_MASK))
1845 flags = kmalloc_fix_flags(flags);
Thomas Gleixner588f8ba2015-09-04 15:45:48 -07001846
1847 return allocate_slab(s,
1848 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
1849}
1850
Christoph Lameter81819f02007-05-06 14:49:36 -07001851static void __free_slab(struct kmem_cache *s, struct page *page)
1852{
Christoph Lameter834f3d12008-04-14 19:11:31 +03001853 int order = compound_order(page);
1854 int pages = 1 << order;
Christoph Lameter81819f02007-05-06 14:49:36 -07001855
Vlastimil Babka8fc8d662020-08-06 23:18:58 -07001856 if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) {
Christoph Lameter81819f02007-05-06 14:49:36 -07001857 void *p;
1858
1859 slab_pad_check(s, page);
Christoph Lameter224a88b2008-04-14 19:11:31 +03001860 for_each_object(p, s, page_address(page),
1861 page->objects)
Christoph Lameterf7cb1932010-09-29 07:15:01 -05001862 check_object(s, page, p, SLUB_RED_INACTIVE);
Christoph Lameter81819f02007-05-06 14:49:36 -07001863 }
1864
Mel Gorman072bb0a2012-07-31 16:43:58 -07001865 __ClearPageSlabPfmemalloc(page);
Christoph Lameter49bd5222008-04-14 18:52:18 +03001866 __ClearPageSlab(page);
Vlastimil Babka0c06dd72020-12-14 19:04:29 -08001867 /* In union with page->mapping where page allocator expects NULL */
1868 page->slab_cache = NULL;
Nick Piggin1eb5ac62009-05-05 19:13:44 +10001869 if (current->reclaim_state)
1870 current->reclaim_state->reclaimed_slab += pages;
Roman Gushchin74d555b2020-08-06 23:21:44 -07001871 unaccount_slab_page(page, order, s);
Vladimir Davydov27ee57c2016-03-17 14:17:35 -07001872 __free_pages(page, order);
Christoph Lameter81819f02007-05-06 14:49:36 -07001873}
1874
1875static void rcu_free_slab(struct rcu_head *h)
1876{
Matthew Wilcoxbf68c212018-06-07 17:09:05 -07001877 struct page *page = container_of(h, struct page, rcu_head);
Lai Jiangshanda9a6382011-03-10 15:22:00 +08001878
Glauber Costa1b4f59e32012-10-22 18:05:36 +04001879 __free_slab(page->slab_cache, page);
Christoph Lameter81819f02007-05-06 14:49:36 -07001880}
1881
1882static void free_slab(struct kmem_cache *s, struct page *page)
1883{
Paul E. McKenney5f0d5a32017-01-18 02:53:44 -08001884 if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
Matthew Wilcoxbf68c212018-06-07 17:09:05 -07001885 call_rcu(&page->rcu_head, rcu_free_slab);
Christoph Lameter81819f02007-05-06 14:49:36 -07001886 } else
1887 __free_slab(s, page);
1888}
1889
1890static void discard_slab(struct kmem_cache *s, struct page *page)
1891{
Christoph Lameter205ab992008-04-14 19:11:40 +03001892 dec_slabs_node(s, page_to_nid(page), page->objects);
Christoph Lameter81819f02007-05-06 14:49:36 -07001893 free_slab(s, page);
1894}
1895
1896/*
Christoph Lameter5cc6eee2011-06-01 12:25:50 -05001897 * Management of partially allocated slabs.
Christoph Lameter81819f02007-05-06 14:49:36 -07001898 */
Steven Rostedt1e4dd942014-02-10 14:25:46 -08001899static inline void
1900__add_partial(struct kmem_cache_node *n, struct page *page, int tail)
Christoph Lameter81819f02007-05-06 14:49:36 -07001901{
Christoph Lametere95eed52007-05-06 14:49:44 -07001902 n->nr_partial++;
Shaohua Li136333d2011-08-24 08:57:52 +08001903 if (tail == DEACTIVATE_TO_TAIL)
Tobin C. Harding916ac052019-05-13 17:16:12 -07001904 list_add_tail(&page->slab_list, &n->partial);
Christoph Lameter7c2e1322008-01-07 23:20:27 -08001905 else
Tobin C. Harding916ac052019-05-13 17:16:12 -07001906 list_add(&page->slab_list, &n->partial);
Christoph Lameter81819f02007-05-06 14:49:36 -07001907}
1908
Steven Rostedt1e4dd942014-02-10 14:25:46 -08001909static inline void add_partial(struct kmem_cache_node *n,
1910 struct page *page, int tail)
1911{
1912 lockdep_assert_held(&n->list_lock);
1913 __add_partial(n, page, tail);
1914}
1915
Christoph Lameter5cc6eee2011-06-01 12:25:50 -05001916static inline void remove_partial(struct kmem_cache_node *n,
Christoph Lameter62e346a2010-09-28 08:10:28 -05001917 struct page *page)
1918{
Peter Zijlstrac65c1872014-01-10 13:23:49 +01001919 lockdep_assert_held(&n->list_lock);
Tobin C. Harding916ac052019-05-13 17:16:12 -07001920 list_del(&page->slab_list);
Dmitry Safonov52b4b952016-02-17 13:11:37 -08001921 n->nr_partial--;
Christoph Lameter62e346a2010-09-28 08:10:28 -05001922}
1923
Christoph Lameter81819f02007-05-06 14:49:36 -07001924/*
Christoph Lameter7ced3712012-05-09 10:09:53 -05001925 * Remove slab from the partial list, freeze it and
1926 * return the pointer to the freelist.
Christoph Lameter81819f02007-05-06 14:49:36 -07001927 *
Christoph Lameter497b66f2011-08-09 16:12:26 -05001928 * Returns a list of objects or NULL if it fails.
Christoph Lameter81819f02007-05-06 14:49:36 -07001929 */
Christoph Lameter497b66f2011-08-09 16:12:26 -05001930static inline void *acquire_slab(struct kmem_cache *s,
Christoph Lameteracd19fd2011-08-09 16:12:25 -05001931 struct kmem_cache_node *n, struct page *page,
Joonsoo Kim633b0762013-01-21 17:01:25 +09001932 int mode, int *objects)
Christoph Lameter81819f02007-05-06 14:49:36 -07001933{
Christoph Lameter2cfb7452011-06-01 12:25:52 -05001934 void *freelist;
1935 unsigned long counters;
1936 struct page new;
1937
Peter Zijlstrac65c1872014-01-10 13:23:49 +01001938 lockdep_assert_held(&n->list_lock);
1939
Christoph Lameter2cfb7452011-06-01 12:25:52 -05001940 /*
1941 * Zap the freelist and set the frozen bit.
1942 * The old freelist is the list of objects for the
1943 * per cpu allocation list.
1944 */
Christoph Lameter7ced3712012-05-09 10:09:53 -05001945 freelist = page->freelist;
1946 counters = page->counters;
1947 new.counters = counters;
Joonsoo Kim633b0762013-01-21 17:01:25 +09001948 *objects = new.objects - new.inuse;
Pekka Enberg23910c52012-06-04 10:14:58 +03001949 if (mode) {
Christoph Lameter7ced3712012-05-09 10:09:53 -05001950 new.inuse = page->objects;
Pekka Enberg23910c52012-06-04 10:14:58 +03001951 new.freelist = NULL;
1952 } else {
1953 new.freelist = freelist;
1954 }
Christoph Lameter2cfb7452011-06-01 12:25:52 -05001955
Dave Hansena0132ac2014-01-29 14:05:50 -08001956 VM_BUG_ON(new.frozen);
Christoph Lameter7ced3712012-05-09 10:09:53 -05001957 new.frozen = 1;
Christoph Lameter2cfb7452011-06-01 12:25:52 -05001958
Christoph Lameter7ced3712012-05-09 10:09:53 -05001959 if (!__cmpxchg_double_slab(s, page,
Christoph Lameter2cfb7452011-06-01 12:25:52 -05001960 freelist, counters,
Joonsoo Kim02d76332012-05-17 00:13:02 +09001961 new.freelist, new.counters,
Christoph Lameter7ced3712012-05-09 10:09:53 -05001962 "acquire_slab"))
Christoph Lameter7ced3712012-05-09 10:09:53 -05001963 return NULL;
Christoph Lameter2cfb7452011-06-01 12:25:52 -05001964
1965 remove_partial(n, page);
Christoph Lameter7ced3712012-05-09 10:09:53 -05001966 WARN_ON(!freelist);
Christoph Lameter49e22582011-08-09 16:12:27 -05001967 return freelist;
Christoph Lameter81819f02007-05-06 14:49:36 -07001968}
1969
Joonsoo Kim633b0762013-01-21 17:01:25 +09001970static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
Joonsoo Kim8ba00bb2012-09-17 14:09:09 -07001971static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
Christoph Lameter49e22582011-08-09 16:12:27 -05001972
Christoph Lameter81819f02007-05-06 14:49:36 -07001973/*
Christoph Lameter672bba32007-05-09 02:32:39 -07001974 * Try to allocate a partial slab from a specific node.
Christoph Lameter81819f02007-05-06 14:49:36 -07001975 */
Joonsoo Kim8ba00bb2012-09-17 14:09:09 -07001976static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
1977 struct kmem_cache_cpu *c, gfp_t flags)
Christoph Lameter81819f02007-05-06 14:49:36 -07001978{
Christoph Lameter49e22582011-08-09 16:12:27 -05001979 struct page *page, *page2;
1980 void *object = NULL;
Alexey Dobriyane5d99982018-04-05 16:21:10 -07001981 unsigned int available = 0;
Joonsoo Kim633b0762013-01-21 17:01:25 +09001982 int objects;
Christoph Lameter81819f02007-05-06 14:49:36 -07001983
1984 /*
1985 * Racy check. If we mistakenly see no partial slabs then we
1986 * just allocate an empty slab. If we mistakenly try to get a
Chen Tao70b6d252020-10-15 20:10:01 -07001987 * partial slab and there is none available then get_partial()
Christoph Lameter672bba32007-05-09 02:32:39 -07001988 * will return NULL.
Christoph Lameter81819f02007-05-06 14:49:36 -07001989 */
1990 if (!n || !n->nr_partial)
1991 return NULL;
1992
1993 spin_lock(&n->list_lock);
Tobin C. Harding916ac052019-05-13 17:16:12 -07001994 list_for_each_entry_safe(page, page2, &n->partial, slab_list) {
Joonsoo Kim8ba00bb2012-09-17 14:09:09 -07001995 void *t;
Christoph Lameter49e22582011-08-09 16:12:27 -05001996
Joonsoo Kim8ba00bb2012-09-17 14:09:09 -07001997 if (!pfmemalloc_match(page, flags))
1998 continue;
1999
Joonsoo Kim633b0762013-01-21 17:01:25 +09002000 t = acquire_slab(s, n, page, object == NULL, &objects);
Christoph Lameter49e22582011-08-09 16:12:27 -05002001 if (!t)
Linus Torvalds9b1ea292021-03-10 10:18:04 -08002002 break;
Christoph Lameter49e22582011-08-09 16:12:27 -05002003
Joonsoo Kim633b0762013-01-21 17:01:25 +09002004 available += objects;
Alex,Shi12d79632011-09-07 10:26:36 +08002005 if (!object) {
Christoph Lameter49e22582011-08-09 16:12:27 -05002006 c->page = page;
Christoph Lameter49e22582011-08-09 16:12:27 -05002007 stat(s, ALLOC_FROM_PARTIAL);
Christoph Lameter49e22582011-08-09 16:12:27 -05002008 object = t;
Christoph Lameter49e22582011-08-09 16:12:27 -05002009 } else {
Joonsoo Kim633b0762013-01-21 17:01:25 +09002010 put_cpu_partial(s, page, 0);
Alex Shi8028dce2012-02-03 23:34:56 +08002011 stat(s, CPU_PARTIAL_NODE);
Christoph Lameter49e22582011-08-09 16:12:27 -05002012 }
Joonsoo Kim345c9052013-06-19 14:05:52 +09002013 if (!kmem_cache_has_cpu_partial(s)
Wei Yange6d0e1d2017-07-06 15:36:34 -07002014 || available > slub_cpu_partial(s) / 2)
Christoph Lameter49e22582011-08-09 16:12:27 -05002015 break;
2016
Christoph Lameter497b66f2011-08-09 16:12:26 -05002017 }
Christoph Lameter81819f02007-05-06 14:49:36 -07002018 spin_unlock(&n->list_lock);
Christoph Lameter497b66f2011-08-09 16:12:26 -05002019 return object;
Christoph Lameter81819f02007-05-06 14:49:36 -07002020}
2021
2022/*
Christoph Lameter672bba32007-05-09 02:32:39 -07002023 * Get a page from somewhere. Search in increasing NUMA distances.
Christoph Lameter81819f02007-05-06 14:49:36 -07002024 */
Joonsoo Kimde3ec032012-01-27 00:12:23 -08002025static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
Christoph Lameteracd19fd2011-08-09 16:12:25 -05002026 struct kmem_cache_cpu *c)
Christoph Lameter81819f02007-05-06 14:49:36 -07002027{
2028#ifdef CONFIG_NUMA
2029 struct zonelist *zonelist;
Mel Gormandd1a2392008-04-28 02:12:17 -07002030 struct zoneref *z;
Mel Gorman54a6eb52008-04-28 02:12:16 -07002031 struct zone *zone;
Joonsoo Kim97a225e2020-06-03 15:59:01 -07002032 enum zone_type highest_zoneidx = gfp_zone(flags);
Christoph Lameter497b66f2011-08-09 16:12:26 -05002033 void *object;
Mel Gormancc9a6c82012-03-21 16:34:11 -07002034 unsigned int cpuset_mems_cookie;
Christoph Lameter81819f02007-05-06 14:49:36 -07002035
2036 /*
Christoph Lameter672bba32007-05-09 02:32:39 -07002037 * The defrag ratio allows a configuration of the tradeoffs between
2038 * inter node defragmentation and node local allocations. A lower
2039 * defrag_ratio increases the tendency to do local allocations
2040 * instead of attempting to obtain partial slabs from other nodes.
Christoph Lameter81819f02007-05-06 14:49:36 -07002041 *
Christoph Lameter672bba32007-05-09 02:32:39 -07002042 * If the defrag_ratio is set to 0 then kmalloc() always
2043 * returns node local objects. If the ratio is higher then kmalloc()
2044 * may return off node objects because partial slabs are obtained
2045 * from other nodes and filled up.
Christoph Lameter81819f02007-05-06 14:49:36 -07002046 *
Li Peng43efd3e2016-05-19 17:10:43 -07002047 * If /sys/kernel/slab/xx/remote_node_defrag_ratio is set to 100
2048 * (which makes defrag_ratio = 1000) then every (well almost)
2049 * allocation will first attempt to defrag slab caches on other nodes.
2050 * This means scanning over all nodes to look for partial slabs which
2051 * may be expensive if we do it every time we are trying to find a slab
Christoph Lameter672bba32007-05-09 02:32:39 -07002052 * with available objects.
Christoph Lameter81819f02007-05-06 14:49:36 -07002053 */
Christoph Lameter98246012008-01-07 23:20:26 -08002054 if (!s->remote_node_defrag_ratio ||
2055 get_cycles() % 1024 > s->remote_node_defrag_ratio)
Christoph Lameter81819f02007-05-06 14:49:36 -07002056 return NULL;
2057
Mel Gormancc9a6c82012-03-21 16:34:11 -07002058 do {
Mel Gormand26914d2014-04-03 14:47:24 -07002059 cpuset_mems_cookie = read_mems_allowed_begin();
David Rientjes2a389612014-04-07 15:37:29 -07002060 zonelist = node_zonelist(mempolicy_slab_node(), flags);
Joonsoo Kim97a225e2020-06-03 15:59:01 -07002061 for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) {
Mel Gormancc9a6c82012-03-21 16:34:11 -07002062 struct kmem_cache_node *n;
Christoph Lameter81819f02007-05-06 14:49:36 -07002063
Mel Gormancc9a6c82012-03-21 16:34:11 -07002064 n = get_node(s, zone_to_nid(zone));
Christoph Lameter81819f02007-05-06 14:49:36 -07002065
Vladimir Davydovdee2f8a2014-12-12 16:58:28 -08002066 if (n && cpuset_zone_allowed(zone, flags) &&
Mel Gormancc9a6c82012-03-21 16:34:11 -07002067 n->nr_partial > s->min_partial) {
Joonsoo Kim8ba00bb2012-09-17 14:09:09 -07002068 object = get_partial_node(s, n, c, flags);
Mel Gormancc9a6c82012-03-21 16:34:11 -07002069 if (object) {
2070 /*
Mel Gormand26914d2014-04-03 14:47:24 -07002071 * Don't check read_mems_allowed_retry()
2072 * here - if mems_allowed was updated in
2073 * parallel, that was a harmless race
2074 * between allocation and the cpuset
2075 * update
Mel Gormancc9a6c82012-03-21 16:34:11 -07002076 */
Mel Gormancc9a6c82012-03-21 16:34:11 -07002077 return object;
2078 }
Miao Xiec0ff7452010-05-24 14:32:08 -07002079 }
Christoph Lameter81819f02007-05-06 14:49:36 -07002080 }
Mel Gormand26914d2014-04-03 14:47:24 -07002081 } while (read_mems_allowed_retry(cpuset_mems_cookie));
Tobin C. Harding6dfd1b62019-05-13 17:16:09 -07002082#endif /* CONFIG_NUMA */
Christoph Lameter81819f02007-05-06 14:49:36 -07002083 return NULL;
2084}
2085
2086/*
2087 * Get a partial page, lock it and return it.
2088 */
Christoph Lameter497b66f2011-08-09 16:12:26 -05002089static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
Christoph Lameteracd19fd2011-08-09 16:12:25 -05002090 struct kmem_cache_cpu *c)
Christoph Lameter81819f02007-05-06 14:49:36 -07002091{
Christoph Lameter497b66f2011-08-09 16:12:26 -05002092 void *object;
Joonsoo Kima561ce02014-10-09 15:26:15 -07002093 int searchnode = node;
2094
2095 if (node == NUMA_NO_NODE)
2096 searchnode = numa_mem_id();
Christoph Lameter81819f02007-05-06 14:49:36 -07002097
Joonsoo Kim8ba00bb2012-09-17 14:09:09 -07002098 object = get_partial_node(s, get_node(s, searchnode), c, flags);
Christoph Lameter497b66f2011-08-09 16:12:26 -05002099 if (object || node != NUMA_NO_NODE)
2100 return object;
Christoph Lameter81819f02007-05-06 14:49:36 -07002101
Christoph Lameteracd19fd2011-08-09 16:12:25 -05002102 return get_any_partial(s, flags, c);
Christoph Lameter81819f02007-05-06 14:49:36 -07002103}
2104
Thomas Gleixner923717c2019-10-15 21:18:12 +02002105#ifdef CONFIG_PREEMPTION
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06002106/*
Ethon Paul0d645ed2020-06-04 16:49:34 -07002107 * Calculate the next globally unique transaction for disambiguation
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06002108 * during cmpxchg. The transactions start with the cpu number and are then
2109 * incremented by CONFIG_NR_CPUS.
2110 */
2111#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
2112#else
2113/*
2114 * No preemption supported therefore also no need to check for
2115 * different cpus.
2116 */
2117#define TID_STEP 1
2118#endif
2119
2120static inline unsigned long next_tid(unsigned long tid)
2121{
2122 return tid + TID_STEP;
2123}
2124
Qian Cai9d5f0be2019-09-23 15:33:52 -07002125#ifdef SLUB_DEBUG_CMPXCHG
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06002126static inline unsigned int tid_to_cpu(unsigned long tid)
2127{
2128 return tid % TID_STEP;
2129}
2130
2131static inline unsigned long tid_to_event(unsigned long tid)
2132{
2133 return tid / TID_STEP;
2134}
Qian Cai9d5f0be2019-09-23 15:33:52 -07002135#endif
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06002136
2137static inline unsigned int init_tid(int cpu)
2138{
2139 return cpu;
2140}
2141
2142static inline void note_cmpxchg_failure(const char *n,
2143 const struct kmem_cache *s, unsigned long tid)
2144{
2145#ifdef SLUB_DEBUG_CMPXCHG
2146 unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
2147
Fabian Frederickf9f58282014-06-04 16:06:34 -07002148 pr_info("%s %s: cmpxchg redo ", n, s->name);
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06002149
Thomas Gleixner923717c2019-10-15 21:18:12 +02002150#ifdef CONFIG_PREEMPTION
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06002151 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
Fabian Frederickf9f58282014-06-04 16:06:34 -07002152 pr_warn("due to cpu change %d -> %d\n",
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06002153 tid_to_cpu(tid), tid_to_cpu(actual_tid));
2154 else
2155#endif
2156 if (tid_to_event(tid) != tid_to_event(actual_tid))
Fabian Frederickf9f58282014-06-04 16:06:34 -07002157 pr_warn("due to cpu running other code. Event %ld->%ld\n",
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06002158 tid_to_event(tid), tid_to_event(actual_tid));
2159 else
Fabian Frederickf9f58282014-06-04 16:06:34 -07002160 pr_warn("for unknown reason: actual=%lx was=%lx target=%lx\n",
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06002161 actual_tid, tid, next_tid(tid));
2162#endif
Christoph Lameter4fdccdf2011-03-22 13:35:00 -05002163 stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06002164}
2165
Fengguang Wu788e1aa2012-09-28 16:34:05 +08002166static void init_kmem_cache_cpus(struct kmem_cache *s)
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06002167{
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06002168 int cpu;
2169
2170 for_each_possible_cpu(cpu)
2171 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06002172}
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002173
2174/*
2175 * Remove the cpu slab
2176 */
Chen Gangd0e0ac92013-07-15 09:05:29 +08002177static void deactivate_slab(struct kmem_cache *s, struct page *page,
Wei Yangd4ff6d32017-07-06 15:36:25 -07002178 void *freelist, struct kmem_cache_cpu *c)
Christoph Lameter81819f02007-05-06 14:49:36 -07002179{
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002180 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002181 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
Vlastimil Babkad930ff02021-02-24 12:01:19 -08002182 int lock = 0, free_delta = 0;
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002183 enum slab_modes l = M_NONE, m = M_NONE;
Vlastimil Babkad930ff02021-02-24 12:01:19 -08002184 void *nextfree, *freelist_iter, *freelist_tail;
Shaohua Li136333d2011-08-24 08:57:52 +08002185 int tail = DEACTIVATE_TO_HEAD;
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002186 struct page new;
2187 struct page old;
Christoph Lameter8ff12cf2008-02-07 17:47:41 -08002188
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002189 if (page->freelist) {
Christoph Lameter84e554e62009-12-18 16:26:23 -06002190 stat(s, DEACTIVATE_REMOTE_FREES);
Shaohua Li136333d2011-08-24 08:57:52 +08002191 tail = DEACTIVATE_TO_TAIL;
Christoph Lameter894b8782007-05-10 03:15:16 -07002192 }
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002193
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002194 /*
Vlastimil Babkad930ff02021-02-24 12:01:19 -08002195 * Stage one: Count the objects on cpu's freelist as free_delta and
2196 * remember the last object in freelist_tail for later splicing.
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002197 */
Vlastimil Babkad930ff02021-02-24 12:01:19 -08002198 freelist_tail = NULL;
2199 freelist_iter = freelist;
2200 while (freelist_iter) {
2201 nextfree = get_freepointer(s, freelist_iter);
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002202
Dongli Zhang52f23472020-06-01 21:45:47 -07002203 /*
2204 * If 'nextfree' is invalid, it is possible that the object at
Vlastimil Babkad930ff02021-02-24 12:01:19 -08002205 * 'freelist_iter' is already corrupted. So isolate all objects
2206 * starting at 'freelist_iter' by skipping them.
Dongli Zhang52f23472020-06-01 21:45:47 -07002207 */
Vlastimil Babkad930ff02021-02-24 12:01:19 -08002208 if (freelist_corrupted(s, page, &freelist_iter, nextfree))
Dongli Zhang52f23472020-06-01 21:45:47 -07002209 break;
2210
Vlastimil Babkad930ff02021-02-24 12:01:19 -08002211 freelist_tail = freelist_iter;
2212 free_delta++;
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002213
Vlastimil Babkad930ff02021-02-24 12:01:19 -08002214 freelist_iter = nextfree;
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002215 }
2216
2217 /*
Vlastimil Babkad930ff02021-02-24 12:01:19 -08002218 * Stage two: Unfreeze the page while splicing the per-cpu
2219 * freelist to the head of page's freelist.
2220 *
2221 * Ensure that the page is unfrozen while the list presence
2222 * reflects the actual number of objects during unfreeze.
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002223 *
2224 * We setup the list membership and then perform a cmpxchg
2225 * with the count. If there is a mismatch then the page
2226 * is not unfrozen but the page is on the wrong list.
2227 *
2228 * Then we restart the process which may have to remove
2229 * the page from the list that we just put it on again
2230 * because the number of objects in the slab may have
2231 * changed.
2232 */
2233redo:
2234
Vlastimil Babkad930ff02021-02-24 12:01:19 -08002235 old.freelist = READ_ONCE(page->freelist);
2236 old.counters = READ_ONCE(page->counters);
Dave Hansena0132ac2014-01-29 14:05:50 -08002237 VM_BUG_ON(!old.frozen);
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002238
2239 /* Determine target state of the slab */
2240 new.counters = old.counters;
Vlastimil Babkad930ff02021-02-24 12:01:19 -08002241 if (freelist_tail) {
2242 new.inuse -= free_delta;
2243 set_freepointer(s, freelist_tail, old.freelist);
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002244 new.freelist = freelist;
2245 } else
2246 new.freelist = old.freelist;
2247
2248 new.frozen = 0;
2249
Joonsoo Kim8a5b20a2014-07-02 15:22:35 -07002250 if (!new.inuse && n->nr_partial >= s->min_partial)
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002251 m = M_FREE;
2252 else if (new.freelist) {
2253 m = M_PARTIAL;
2254 if (!lock) {
2255 lock = 1;
2256 /*
Wei Yang8bb4e7a2019-03-05 15:46:22 -08002257 * Taking the spinlock removes the possibility
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002258 * that acquire_slab() will see a slab page that
2259 * is frozen
2260 */
2261 spin_lock(&n->list_lock);
2262 }
2263 } else {
2264 m = M_FULL;
Vlastimil Babka965c4842020-12-14 19:04:36 -08002265 if (kmem_cache_debug_flags(s, SLAB_STORE_USER) && !lock) {
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002266 lock = 1;
2267 /*
2268 * This also ensures that the scanning of full
2269 * slabs from diagnostic functions will not see
2270 * any frozen slabs.
2271 */
2272 spin_lock(&n->list_lock);
2273 }
2274 }
2275
2276 if (l != m) {
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002277 if (l == M_PARTIAL)
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002278 remove_partial(n, page);
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002279 else if (l == M_FULL)
Peter Zijlstrac65c1872014-01-10 13:23:49 +01002280 remove_full(s, n, page);
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002281
Wei Yang88349a22018-12-28 00:33:13 -08002282 if (m == M_PARTIAL)
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002283 add_partial(n, page, tail);
Wei Yang88349a22018-12-28 00:33:13 -08002284 else if (m == M_FULL)
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002285 add_full(s, n, page);
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002286 }
2287
2288 l = m;
Christoph Lameter1d071712011-07-14 12:49:12 -05002289 if (!__cmpxchg_double_slab(s, page,
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002290 old.freelist, old.counters,
2291 new.freelist, new.counters,
2292 "unfreezing slab"))
2293 goto redo;
2294
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002295 if (lock)
2296 spin_unlock(&n->list_lock);
2297
Wei Yang88349a22018-12-28 00:33:13 -08002298 if (m == M_PARTIAL)
2299 stat(s, tail);
2300 else if (m == M_FULL)
2301 stat(s, DEACTIVATE_FULL);
2302 else if (m == M_FREE) {
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002303 stat(s, DEACTIVATE_EMPTY);
2304 discard_slab(s, page);
2305 stat(s, FREE_SLAB);
2306 }
Wei Yangd4ff6d32017-07-06 15:36:25 -07002307
2308 c->page = NULL;
2309 c->freelist = NULL;
Christoph Lameter81819f02007-05-06 14:49:36 -07002310}
2311
Joonsoo Kimd24ac772012-05-18 22:01:17 +09002312/*
2313 * Unfreeze all the cpu partial slabs.
2314 *
Christoph Lameter59a09912012-11-28 16:23:00 +00002315 * This function must be called with interrupts disabled
2316 * for the cpu using c (or some other guarantee must be there
2317 * to guarantee no concurrent accesses).
Joonsoo Kimd24ac772012-05-18 22:01:17 +09002318 */
Christoph Lameter59a09912012-11-28 16:23:00 +00002319static void unfreeze_partials(struct kmem_cache *s,
2320 struct kmem_cache_cpu *c)
Christoph Lameter49e22582011-08-09 16:12:27 -05002321{
Joonsoo Kim345c9052013-06-19 14:05:52 +09002322#ifdef CONFIG_SLUB_CPU_PARTIAL
Joonsoo Kim43d77862012-06-09 02:23:16 +09002323 struct kmem_cache_node *n = NULL, *n2 = NULL;
Shaohua Li9ada1932011-11-14 13:34:13 +08002324 struct page *page, *discard_page = NULL;
Christoph Lameter49e22582011-08-09 16:12:27 -05002325
chenqiwu4c7ba222020-04-01 21:04:16 -07002326 while ((page = slub_percpu_partial(c))) {
Christoph Lameter49e22582011-08-09 16:12:27 -05002327 struct page new;
2328 struct page old;
2329
chenqiwu4c7ba222020-04-01 21:04:16 -07002330 slub_set_percpu_partial(c, page);
Joonsoo Kim43d77862012-06-09 02:23:16 +09002331
2332 n2 = get_node(s, page_to_nid(page));
2333 if (n != n2) {
2334 if (n)
2335 spin_unlock(&n->list_lock);
2336
2337 n = n2;
2338 spin_lock(&n->list_lock);
2339 }
Christoph Lameter49e22582011-08-09 16:12:27 -05002340
2341 do {
2342
2343 old.freelist = page->freelist;
2344 old.counters = page->counters;
Dave Hansena0132ac2014-01-29 14:05:50 -08002345 VM_BUG_ON(!old.frozen);
Christoph Lameter49e22582011-08-09 16:12:27 -05002346
2347 new.counters = old.counters;
2348 new.freelist = old.freelist;
2349
2350 new.frozen = 0;
2351
Joonsoo Kimd24ac772012-05-18 22:01:17 +09002352 } while (!__cmpxchg_double_slab(s, page,
Christoph Lameter49e22582011-08-09 16:12:27 -05002353 old.freelist, old.counters,
2354 new.freelist, new.counters,
2355 "unfreezing slab"));
2356
Joonsoo Kim8a5b20a2014-07-02 15:22:35 -07002357 if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
Shaohua Li9ada1932011-11-14 13:34:13 +08002358 page->next = discard_page;
2359 discard_page = page;
Joonsoo Kim43d77862012-06-09 02:23:16 +09002360 } else {
2361 add_partial(n, page, DEACTIVATE_TO_TAIL);
2362 stat(s, FREE_ADD_PARTIAL);
Christoph Lameter49e22582011-08-09 16:12:27 -05002363 }
2364 }
2365
2366 if (n)
2367 spin_unlock(&n->list_lock);
Shaohua Li9ada1932011-11-14 13:34:13 +08002368
2369 while (discard_page) {
2370 page = discard_page;
2371 discard_page = discard_page->next;
2372
2373 stat(s, DEACTIVATE_EMPTY);
2374 discard_slab(s, page);
2375 stat(s, FREE_SLAB);
2376 }
Tobin C. Harding6dfd1b62019-05-13 17:16:09 -07002377#endif /* CONFIG_SLUB_CPU_PARTIAL */
Christoph Lameter49e22582011-08-09 16:12:27 -05002378}
2379
2380/*
Wei Yang9234bae2019-03-05 15:43:10 -08002381 * Put a page that was just frozen (in __slab_free|get_partial_node) into a
2382 * partial page slot if available.
Christoph Lameter49e22582011-08-09 16:12:27 -05002383 *
2384 * If we did not find a slot then simply move all the partials to the
2385 * per node partial list.
2386 */
Joonsoo Kim633b0762013-01-21 17:01:25 +09002387static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
Christoph Lameter49e22582011-08-09 16:12:27 -05002388{
Joonsoo Kim345c9052013-06-19 14:05:52 +09002389#ifdef CONFIG_SLUB_CPU_PARTIAL
Christoph Lameter49e22582011-08-09 16:12:27 -05002390 struct page *oldpage;
2391 int pages;
2392 int pobjects;
2393
Vladimir Davydovd6e0b7f2015-02-12 14:59:47 -08002394 preempt_disable();
Christoph Lameter49e22582011-08-09 16:12:27 -05002395 do {
2396 pages = 0;
2397 pobjects = 0;
2398 oldpage = this_cpu_read(s->cpu_slab->partial);
2399
2400 if (oldpage) {
2401 pobjects = oldpage->pobjects;
2402 pages = oldpage->pages;
chenqiwubbd4e302020-04-01 21:04:19 -07002403 if (drain && pobjects > slub_cpu_partial(s)) {
Christoph Lameter49e22582011-08-09 16:12:27 -05002404 unsigned long flags;
2405 /*
2406 * partial array is full. Move the existing
2407 * set to the per node partial list.
2408 */
2409 local_irq_save(flags);
Christoph Lameter59a09912012-11-28 16:23:00 +00002410 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
Christoph Lameter49e22582011-08-09 16:12:27 -05002411 local_irq_restore(flags);
Joonsoo Kime24fc412012-06-23 03:22:38 +09002412 oldpage = NULL;
Christoph Lameter49e22582011-08-09 16:12:27 -05002413 pobjects = 0;
2414 pages = 0;
Alex Shi8028dce2012-02-03 23:34:56 +08002415 stat(s, CPU_PARTIAL_DRAIN);
Christoph Lameter49e22582011-08-09 16:12:27 -05002416 }
2417 }
2418
2419 pages++;
2420 pobjects += page->objects - page->inuse;
2421
2422 page->pages = pages;
2423 page->pobjects = pobjects;
2424 page->next = oldpage;
2425
Chen Gangd0e0ac92013-07-15 09:05:29 +08002426 } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
2427 != oldpage);
chenqiwubbd4e302020-04-01 21:04:19 -07002428 if (unlikely(!slub_cpu_partial(s))) {
Vladimir Davydovd6e0b7f2015-02-12 14:59:47 -08002429 unsigned long flags;
2430
2431 local_irq_save(flags);
2432 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2433 local_irq_restore(flags);
2434 }
2435 preempt_enable();
Tobin C. Harding6dfd1b62019-05-13 17:16:09 -07002436#endif /* CONFIG_SLUB_CPU_PARTIAL */
Christoph Lameter49e22582011-08-09 16:12:27 -05002437}
2438
Christoph Lameterdfb4f092007-10-16 01:26:05 -07002439static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
Christoph Lameter81819f02007-05-06 14:49:36 -07002440{
Christoph Lameter84e554e62009-12-18 16:26:23 -06002441 stat(s, CPUSLAB_FLUSH);
Wei Yangd4ff6d32017-07-06 15:36:25 -07002442 deactivate_slab(s, c->page, c->freelist, c);
Christoph Lameterc17dda42012-05-09 10:09:57 -05002443
2444 c->tid = next_tid(c->tid);
Christoph Lameter81819f02007-05-06 14:49:36 -07002445}
2446
2447/*
2448 * Flush cpu slab.
Christoph Lameter6446faa2008-02-15 23:45:26 -08002449 *
Christoph Lameter81819f02007-05-06 14:49:36 -07002450 * Called from IPI handler with interrupts disabled.
2451 */
Christoph Lameter0c710012007-07-17 04:03:24 -07002452static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
Christoph Lameter81819f02007-05-06 14:49:36 -07002453{
Christoph Lameter9dfc6e62009-12-18 16:26:20 -06002454 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
Christoph Lameter81819f02007-05-06 14:49:36 -07002455
Wei Yang1265ef22018-12-28 00:33:06 -08002456 if (c->page)
2457 flush_slab(s, c);
Christoph Lameter49e22582011-08-09 16:12:27 -05002458
Wei Yang1265ef22018-12-28 00:33:06 -08002459 unfreeze_partials(s, c);
Christoph Lameter81819f02007-05-06 14:49:36 -07002460}
2461
2462static void flush_cpu_slab(void *d)
2463{
2464 struct kmem_cache *s = d;
Christoph Lameter81819f02007-05-06 14:49:36 -07002465
Christoph Lameterdfb4f092007-10-16 01:26:05 -07002466 __flush_cpu_slab(s, smp_processor_id());
Christoph Lameter81819f02007-05-06 14:49:36 -07002467}
2468
Gilad Ben-Yossefa8364d52012-03-28 14:42:44 -07002469static bool has_cpu_slab(int cpu, void *info)
2470{
2471 struct kmem_cache *s = info;
2472 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2473
Wei Yanga93cf072017-07-06 15:36:31 -07002474 return c->page || slub_percpu_partial(c);
Gilad Ben-Yossefa8364d52012-03-28 14:42:44 -07002475}
2476
Christoph Lameter81819f02007-05-06 14:49:36 -07002477static void flush_all(struct kmem_cache *s)
2478{
Sebastian Andrzej Siewiorcb923152020-01-17 10:01:37 +01002479 on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1);
Christoph Lameter81819f02007-05-06 14:49:36 -07002480}
2481
2482/*
Sebastian Andrzej Siewiora96a87b2016-08-18 14:57:19 +02002483 * Use the cpu notifier to insure that the cpu slabs are flushed when
2484 * necessary.
2485 */
2486static int slub_cpu_dead(unsigned int cpu)
2487{
2488 struct kmem_cache *s;
2489 unsigned long flags;
2490
2491 mutex_lock(&slab_mutex);
2492 list_for_each_entry(s, &slab_caches, list) {
2493 local_irq_save(flags);
2494 __flush_cpu_slab(s, cpu);
2495 local_irq_restore(flags);
2496 }
2497 mutex_unlock(&slab_mutex);
2498 return 0;
2499}
2500
2501/*
Christoph Lameterdfb4f092007-10-16 01:26:05 -07002502 * Check if the objects in a per cpu structure fit numa
2503 * locality expectations.
2504 */
Christoph Lameter57d437d2012-05-09 10:09:59 -05002505static inline int node_match(struct page *page, int node)
Christoph Lameterdfb4f092007-10-16 01:26:05 -07002506{
2507#ifdef CONFIG_NUMA
Wei Yang6159d0f2018-12-28 00:33:09 -08002508 if (node != NUMA_NO_NODE && page_to_nid(page) != node)
Christoph Lameterdfb4f092007-10-16 01:26:05 -07002509 return 0;
2510#endif
2511 return 1;
2512}
2513
David Rientjes9a02d692014-06-04 16:06:36 -07002514#ifdef CONFIG_SLUB_DEBUG
Pekka Enberg781b2ba2009-06-10 18:50:32 +03002515static int count_free(struct page *page)
2516{
2517 return page->objects - page->inuse;
2518}
2519
David Rientjes9a02d692014-06-04 16:06:36 -07002520static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
2521{
2522 return atomic_long_read(&n->total_objects);
2523}
2524#endif /* CONFIG_SLUB_DEBUG */
2525
2526#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS)
Pekka Enberg781b2ba2009-06-10 18:50:32 +03002527static unsigned long count_partial(struct kmem_cache_node *n,
2528 int (*get_count)(struct page *))
2529{
2530 unsigned long flags;
2531 unsigned long x = 0;
2532 struct page *page;
2533
2534 spin_lock_irqsave(&n->list_lock, flags);
Tobin C. Harding916ac052019-05-13 17:16:12 -07002535 list_for_each_entry(page, &n->partial, slab_list)
Pekka Enberg781b2ba2009-06-10 18:50:32 +03002536 x += get_count(page);
2537 spin_unlock_irqrestore(&n->list_lock, flags);
2538 return x;
2539}
David Rientjes9a02d692014-06-04 16:06:36 -07002540#endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */
Alexander Beregalov26c02cf2009-06-11 14:08:48 +04002541
Pekka Enberg781b2ba2009-06-10 18:50:32 +03002542static noinline void
2543slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2544{
David Rientjes9a02d692014-06-04 16:06:36 -07002545#ifdef CONFIG_SLUB_DEBUG
2546 static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
2547 DEFAULT_RATELIMIT_BURST);
Pekka Enberg781b2ba2009-06-10 18:50:32 +03002548 int node;
Christoph Lameterfa45dc22014-08-06 16:04:09 -07002549 struct kmem_cache_node *n;
Pekka Enberg781b2ba2009-06-10 18:50:32 +03002550
David Rientjes9a02d692014-06-04 16:06:36 -07002551 if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs))
2552 return;
2553
Vlastimil Babka5b3810e2016-03-15 14:56:33 -07002554 pr_warn("SLUB: Unable to allocate memory on node %d, gfp=%#x(%pGg)\n",
2555 nid, gfpflags, &gfpflags);
Alexey Dobriyan19af27a2018-04-05 16:21:39 -07002556 pr_warn(" cache: %s, object size: %u, buffer size: %u, default order: %u, min order: %u\n",
Fabian Frederickf9f58282014-06-04 16:06:34 -07002557 s->name, s->object_size, s->size, oo_order(s->oo),
2558 oo_order(s->min));
Pekka Enberg781b2ba2009-06-10 18:50:32 +03002559
Christoph Lameter3b0efdf2012-06-13 10:24:57 -05002560 if (oo_order(s->min) > get_order(s->object_size))
Fabian Frederickf9f58282014-06-04 16:06:34 -07002561 pr_warn(" %s debugging increased min order, use slub_debug=O to disable.\n",
2562 s->name);
David Rientjesfa5ec8a2009-07-07 00:14:14 -07002563
Christoph Lameterfa45dc22014-08-06 16:04:09 -07002564 for_each_kmem_cache_node(s, node, n) {
Pekka Enberg781b2ba2009-06-10 18:50:32 +03002565 unsigned long nr_slabs;
2566 unsigned long nr_objs;
2567 unsigned long nr_free;
2568
Alexander Beregalov26c02cf2009-06-11 14:08:48 +04002569 nr_free = count_partial(n, count_free);
2570 nr_slabs = node_nr_slabs(n);
2571 nr_objs = node_nr_objs(n);
Pekka Enberg781b2ba2009-06-10 18:50:32 +03002572
Fabian Frederickf9f58282014-06-04 16:06:34 -07002573 pr_warn(" node %d: slabs: %ld, objs: %ld, free: %ld\n",
Pekka Enberg781b2ba2009-06-10 18:50:32 +03002574 node, nr_slabs, nr_objs, nr_free);
2575 }
David Rientjes9a02d692014-06-04 16:06:36 -07002576#endif
Pekka Enberg781b2ba2009-06-10 18:50:32 +03002577}
2578
Christoph Lameter497b66f2011-08-09 16:12:26 -05002579static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2580 int node, struct kmem_cache_cpu **pc)
2581{
Christoph Lameter6faa6832012-05-09 10:09:51 -05002582 void *freelist;
Christoph Lameter188fd062012-05-09 10:09:55 -05002583 struct kmem_cache_cpu *c = *pc;
2584 struct page *page;
Christoph Lameter497b66f2011-08-09 16:12:26 -05002585
Matthew Wilcox128227e2018-06-07 17:05:13 -07002586 WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO));
2587
Christoph Lameter188fd062012-05-09 10:09:55 -05002588 freelist = get_partial(s, flags, node, c);
2589
2590 if (freelist)
2591 return freelist;
2592
2593 page = new_slab(s, flags, node);
Christoph Lameter497b66f2011-08-09 16:12:26 -05002594 if (page) {
Christoph Lameter7c8e0182014-06-04 16:07:56 -07002595 c = raw_cpu_ptr(s->cpu_slab);
Christoph Lameter497b66f2011-08-09 16:12:26 -05002596 if (c->page)
2597 flush_slab(s, c);
2598
2599 /*
2600 * No other reference to the page yet so we can
2601 * muck around with it freely without cmpxchg
2602 */
Christoph Lameter6faa6832012-05-09 10:09:51 -05002603 freelist = page->freelist;
Christoph Lameter497b66f2011-08-09 16:12:26 -05002604 page->freelist = NULL;
2605
2606 stat(s, ALLOC_SLAB);
Christoph Lameter497b66f2011-08-09 16:12:26 -05002607 c->page = page;
2608 *pc = c;
Peng Wangedde82b2019-03-05 15:42:00 -08002609 }
Christoph Lameter497b66f2011-08-09 16:12:26 -05002610
Christoph Lameter6faa6832012-05-09 10:09:51 -05002611 return freelist;
Christoph Lameter497b66f2011-08-09 16:12:26 -05002612}
2613
Mel Gorman072bb0a2012-07-31 16:43:58 -07002614static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
2615{
2616 if (unlikely(PageSlabPfmemalloc(page)))
2617 return gfp_pfmemalloc_allowed(gfpflags);
2618
2619 return true;
2620}
2621
Christoph Lameterdfb4f092007-10-16 01:26:05 -07002622/*
Chen Gangd0e0ac92013-07-15 09:05:29 +08002623 * Check the page->freelist of a page and either transfer the freelist to the
2624 * per cpu freelist or deactivate the page.
Christoph Lameter213eeb92011-11-11 14:07:14 -06002625 *
2626 * The page is still frozen if the return value is not NULL.
2627 *
2628 * If this function returns NULL then the page has been unfrozen.
Joonsoo Kimd24ac772012-05-18 22:01:17 +09002629 *
2630 * This function must be called with interrupt disabled.
Christoph Lameter213eeb92011-11-11 14:07:14 -06002631 */
2632static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2633{
2634 struct page new;
2635 unsigned long counters;
2636 void *freelist;
2637
2638 do {
2639 freelist = page->freelist;
2640 counters = page->counters;
Christoph Lameter6faa6832012-05-09 10:09:51 -05002641
Christoph Lameter213eeb92011-11-11 14:07:14 -06002642 new.counters = counters;
Dave Hansena0132ac2014-01-29 14:05:50 -08002643 VM_BUG_ON(!new.frozen);
Christoph Lameter213eeb92011-11-11 14:07:14 -06002644
2645 new.inuse = page->objects;
2646 new.frozen = freelist != NULL;
2647
Joonsoo Kimd24ac772012-05-18 22:01:17 +09002648 } while (!__cmpxchg_double_slab(s, page,
Christoph Lameter213eeb92011-11-11 14:07:14 -06002649 freelist, counters,
2650 NULL, new.counters,
2651 "get_freelist"));
2652
2653 return freelist;
2654}
2655
2656/*
Christoph Lameter894b8782007-05-10 03:15:16 -07002657 * Slow path. The lockless freelist is empty or we need to perform
2658 * debugging duties.
Christoph Lameter81819f02007-05-06 14:49:36 -07002659 *
Christoph Lameter894b8782007-05-10 03:15:16 -07002660 * Processing is still very fast if new objects have been freed to the
2661 * regular freelist. In that case we simply take over the regular freelist
2662 * as the lockless freelist and zap the regular freelist.
Christoph Lameter81819f02007-05-06 14:49:36 -07002663 *
Christoph Lameter894b8782007-05-10 03:15:16 -07002664 * If that is not working then we fall back to the partial lists. We take the
2665 * first element of the freelist as the object to allocate now and move the
2666 * rest of the freelist to the lockless freelist.
2667 *
2668 * And if we were unable to get a new slab from the partial slab lists then
Christoph Lameter6446faa2008-02-15 23:45:26 -08002669 * we need to allocate a new slab. This is the slowest path since it involves
2670 * a call to the page allocator and the setup of a new slab.
Christoph Lametera380a3c2015-11-20 15:57:35 -08002671 *
2672 * Version of __slab_alloc to use when we know that interrupts are
2673 * already disabled (which is the case for bulk allocation).
Christoph Lameter81819f02007-05-06 14:49:36 -07002674 */
Christoph Lametera380a3c2015-11-20 15:57:35 -08002675static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
Eduard - Gabriel Munteanuce71e272008-08-19 20:43:25 +03002676 unsigned long addr, struct kmem_cache_cpu *c)
Christoph Lameter81819f02007-05-06 14:49:36 -07002677{
Christoph Lameter6faa6832012-05-09 10:09:51 -05002678 void *freelist;
Christoph Lameterf6e7def2012-05-09 10:09:58 -05002679 struct page *page;
Christoph Lameter81819f02007-05-06 14:49:36 -07002680
Abel Wu9f986d92020-10-13 16:48:43 -07002681 stat(s, ALLOC_SLOWPATH);
2682
Christoph Lameterf6e7def2012-05-09 10:09:58 -05002683 page = c->page;
Vlastimil Babka0715e6c2020-03-21 18:22:37 -07002684 if (!page) {
2685 /*
2686 * if the node is not online or has no normal memory, just
2687 * ignore the node constraint
2688 */
2689 if (unlikely(node != NUMA_NO_NODE &&
Vlastimil Babka7e1fa932021-02-24 12:01:12 -08002690 !node_isset(node, slab_nodes)))
Vlastimil Babka0715e6c2020-03-21 18:22:37 -07002691 node = NUMA_NO_NODE;
Christoph Lameter81819f02007-05-06 14:49:36 -07002692 goto new_slab;
Vlastimil Babka0715e6c2020-03-21 18:22:37 -07002693 }
Christoph Lameter49e22582011-08-09 16:12:27 -05002694redo:
Christoph Lameter6faa6832012-05-09 10:09:51 -05002695
Christoph Lameter57d437d2012-05-09 10:09:59 -05002696 if (unlikely(!node_match(page, node))) {
Vlastimil Babka0715e6c2020-03-21 18:22:37 -07002697 /*
2698 * same as above but node_match() being false already
2699 * implies node != NUMA_NO_NODE
2700 */
Vlastimil Babka7e1fa932021-02-24 12:01:12 -08002701 if (!node_isset(node, slab_nodes)) {
Vlastimil Babka0715e6c2020-03-21 18:22:37 -07002702 node = NUMA_NO_NODE;
2703 goto redo;
2704 } else {
Joonsoo Kima561ce02014-10-09 15:26:15 -07002705 stat(s, ALLOC_NODE_MISMATCH);
Wei Yangd4ff6d32017-07-06 15:36:25 -07002706 deactivate_slab(s, page, c->freelist, c);
Joonsoo Kima561ce02014-10-09 15:26:15 -07002707 goto new_slab;
2708 }
Christoph Lameterfc59c052011-06-01 12:25:56 -05002709 }
Christoph Lameter6446faa2008-02-15 23:45:26 -08002710
Mel Gorman072bb0a2012-07-31 16:43:58 -07002711 /*
2712 * By rights, we should be searching for a slab page that was
2713 * PFMEMALLOC but right now, we are losing the pfmemalloc
2714 * information when the page leaves the per-cpu allocator
2715 */
2716 if (unlikely(!pfmemalloc_match(page, gfpflags))) {
Wei Yangd4ff6d32017-07-06 15:36:25 -07002717 deactivate_slab(s, page, c->freelist, c);
Mel Gorman072bb0a2012-07-31 16:43:58 -07002718 goto new_slab;
2719 }
2720
Eric Dumazet73736e02011-12-13 04:57:06 +01002721 /* must check again c->freelist in case of cpu migration or IRQ */
Christoph Lameter6faa6832012-05-09 10:09:51 -05002722 freelist = c->freelist;
2723 if (freelist)
Eric Dumazet73736e02011-12-13 04:57:06 +01002724 goto load_freelist;
2725
Christoph Lameterf6e7def2012-05-09 10:09:58 -05002726 freelist = get_freelist(s, page);
Christoph Lameter6446faa2008-02-15 23:45:26 -08002727
Christoph Lameter6faa6832012-05-09 10:09:51 -05002728 if (!freelist) {
Christoph Lameter03e404a2011-06-01 12:25:58 -05002729 c->page = NULL;
2730 stat(s, DEACTIVATE_BYPASS);
Christoph Lameterfc59c052011-06-01 12:25:56 -05002731 goto new_slab;
Christoph Lameter03e404a2011-06-01 12:25:58 -05002732 }
Christoph Lameterdfb4f092007-10-16 01:26:05 -07002733
Christoph Lameter81819f02007-05-06 14:49:36 -07002734 stat(s, ALLOC_REFILL);
Christoph Lameter8ff12cf2008-02-07 17:47:41 -08002735
Christoph Lameter894b8782007-05-10 03:15:16 -07002736load_freelist:
Christoph Lameter507effe2012-05-09 10:09:52 -05002737 /*
2738 * freelist is pointing to the list of objects to be used.
2739 * page is pointing to the page from which the objects are obtained.
2740 * That page must be frozen for per cpu allocations to work.
2741 */
Dave Hansena0132ac2014-01-29 14:05:50 -08002742 VM_BUG_ON(!c->page->frozen);
Christoph Lameter6faa6832012-05-09 10:09:51 -05002743 c->freelist = get_freepointer(s, freelist);
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06002744 c->tid = next_tid(c->tid);
Christoph Lameter6faa6832012-05-09 10:09:51 -05002745 return freelist;
Christoph Lameter81819f02007-05-06 14:49:36 -07002746
Christoph Lameter81819f02007-05-06 14:49:36 -07002747new_slab:
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002748
Wei Yanga93cf072017-07-06 15:36:31 -07002749 if (slub_percpu_partial(c)) {
2750 page = c->page = slub_percpu_partial(c);
2751 slub_set_percpu_partial(c, page);
Christoph Lameter49e22582011-08-09 16:12:27 -05002752 stat(s, CPU_PARTIAL_ALLOC);
Christoph Lameter49e22582011-08-09 16:12:27 -05002753 goto redo;
Christoph Lameter81819f02007-05-06 14:49:36 -07002754 }
2755
Christoph Lameter188fd062012-05-09 10:09:55 -05002756 freelist = new_slab_objects(s, gfpflags, node, &c);
Christoph Lameterb811c202007-10-16 23:25:51 -07002757
Christoph Lameterf46974362012-05-09 10:09:54 -05002758 if (unlikely(!freelist)) {
David Rientjes9a02d692014-06-04 16:06:36 -07002759 slab_out_of_memory(s, gfpflags, node);
Christoph Lameterf46974362012-05-09 10:09:54 -05002760 return NULL;
Christoph Lameter81819f02007-05-06 14:49:36 -07002761 }
Christoph Lameter894b8782007-05-10 03:15:16 -07002762
Christoph Lameterf6e7def2012-05-09 10:09:58 -05002763 page = c->page;
Christoph Lameter5091b742012-07-31 16:44:00 -07002764 if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags)))
Christoph Lameter81819f02007-05-06 14:49:36 -07002765 goto load_freelist;
Christoph Lameter894b8782007-05-10 03:15:16 -07002766
Christoph Lameter497b66f2011-08-09 16:12:26 -05002767 /* Only entered in the debug case */
Chen Gangd0e0ac92013-07-15 09:05:29 +08002768 if (kmem_cache_debug(s) &&
2769 !alloc_debug_processing(s, page, freelist, addr))
Christoph Lameter497b66f2011-08-09 16:12:26 -05002770 goto new_slab; /* Slab failed checks. Next slab needed */
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002771
Wei Yangd4ff6d32017-07-06 15:36:25 -07002772 deactivate_slab(s, page, get_freepointer(s, freelist), c);
Christoph Lameter6faa6832012-05-09 10:09:51 -05002773 return freelist;
Christoph Lameter894b8782007-05-10 03:15:16 -07002774}
2775
2776/*
Christoph Lametera380a3c2015-11-20 15:57:35 -08002777 * Another one that disabled interrupt and compensates for possible
2778 * cpu changes by refetching the per cpu area pointer.
2779 */
2780static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2781 unsigned long addr, struct kmem_cache_cpu *c)
2782{
2783 void *p;
2784 unsigned long flags;
2785
2786 local_irq_save(flags);
Thomas Gleixner923717c2019-10-15 21:18:12 +02002787#ifdef CONFIG_PREEMPTION
Christoph Lametera380a3c2015-11-20 15:57:35 -08002788 /*
2789 * We may have been preempted and rescheduled on a different
2790 * cpu before disabling interrupts. Need to reload cpu area
2791 * pointer.
2792 */
2793 c = this_cpu_ptr(s->cpu_slab);
2794#endif
2795
2796 p = ___slab_alloc(s, gfpflags, node, addr, c);
2797 local_irq_restore(flags);
2798 return p;
2799}
2800
2801/*
Alexander Potapenko0f181f92019-10-14 14:11:57 -07002802 * If the object has been wiped upon free, make sure it's fully initialized by
2803 * zeroing out freelist pointer.
2804 */
2805static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
2806 void *obj)
2807{
2808 if (unlikely(slab_want_init_on_free(s)) && obj)
Andrey Konovalovce5716c2021-01-23 21:01:38 -08002809 memset((void *)((char *)kasan_reset_tag(obj) + s->offset),
2810 0, sizeof(void *));
Alexander Potapenko0f181f92019-10-14 14:11:57 -07002811}
2812
2813/*
Christoph Lameter894b8782007-05-10 03:15:16 -07002814 * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
2815 * have the fastpath folded into their functions. So no function call
2816 * overhead for requests that can be satisfied on the fastpath.
2817 *
2818 * The fastpath works by first checking if the lockless freelist can be used.
2819 * If not then __slab_alloc is called for slow processing.
2820 *
2821 * Otherwise we can simply pick the next object from the lockless free list.
2822 */
Ezequiel Garcia2b847c32012-09-08 17:47:58 -03002823static __always_inline void *slab_alloc_node(struct kmem_cache *s,
Alexander Potapenkob89fb5e2021-02-25 17:19:16 -08002824 gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
Christoph Lameter894b8782007-05-10 03:15:16 -07002825{
Jesper Dangaard Brouer03ec0ed2015-11-20 15:57:52 -08002826 void *object;
Christoph Lameterdfb4f092007-10-16 01:26:05 -07002827 struct kmem_cache_cpu *c;
Christoph Lameter57d437d2012-05-09 10:09:59 -05002828 struct page *page;
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06002829 unsigned long tid;
Roman Gushchin964d4bd2020-08-06 23:20:56 -07002830 struct obj_cgroup *objcg = NULL;
Andrey Konovalovda844b72021-04-29 23:00:06 -07002831 bool init = false;
Christoph Lameter1f842602008-01-07 23:20:30 -08002832
Roman Gushchin964d4bd2020-08-06 23:20:56 -07002833 s = slab_pre_alloc_hook(s, &objcg, 1, gfpflags);
Vladimir Davydov8135be52014-12-12 16:56:38 -08002834 if (!s)
Akinobu Mita773ff602008-12-23 19:37:01 +09002835 return NULL;
Alexander Potapenkob89fb5e2021-02-25 17:19:16 -08002836
2837 object = kfence_alloc(s, orig_size, gfpflags);
2838 if (unlikely(object))
2839 goto out;
2840
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06002841redo:
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06002842 /*
2843 * Must read kmem_cache cpu data via this cpu ptr. Preemption is
2844 * enabled. We may switch back and forth between cpus while
2845 * reading from one cpu area. That does not matter as long
2846 * as we end up on the original cpu again when doing the cmpxchg.
Christoph Lameter7cccd80b2013-01-23 21:45:48 +00002847 *
Joonsoo Kim9aabf812015-02-10 14:09:32 -08002848 * We should guarantee that tid and kmem_cache are retrieved on
Thomas Gleixner923717c2019-10-15 21:18:12 +02002849 * the same cpu. It could be different if CONFIG_PREEMPTION so we need
Joonsoo Kim9aabf812015-02-10 14:09:32 -08002850 * to check if it is matched or not.
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06002851 */
Joonsoo Kim9aabf812015-02-10 14:09:32 -08002852 do {
2853 tid = this_cpu_read(s->cpu_slab->tid);
2854 c = raw_cpu_ptr(s->cpu_slab);
Thomas Gleixner923717c2019-10-15 21:18:12 +02002855 } while (IS_ENABLED(CONFIG_PREEMPTION) &&
Mark Rutland859b7a02015-03-25 15:55:23 -07002856 unlikely(tid != READ_ONCE(c->tid)));
Joonsoo Kim9aabf812015-02-10 14:09:32 -08002857
2858 /*
2859 * Irqless object alloc/free algorithm used here depends on sequence
2860 * of fetching cpu_slab's data. tid should be fetched before anything
2861 * on c to guarantee that object and page associated with previous tid
2862 * won't be used with current tid. If we fetch tid first, object and
2863 * page could be one associated with next tid and our alloc/free
2864 * request will be failed. In this case, we will retry. So, no problem.
2865 */
2866 barrier();
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06002867
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06002868 /*
2869 * The transaction ids are globally unique per cpu and per operation on
2870 * a per cpu queue. Thus they can be guarantee that the cmpxchg_double
2871 * occurs on the right processor and that there was no operation on the
2872 * linked list in between.
2873 */
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06002874
Christoph Lameter9dfc6e62009-12-18 16:26:20 -06002875 object = c->freelist;
Christoph Lameter57d437d2012-05-09 10:09:59 -05002876 page = c->page;
Laurent Dufour22e46632020-11-13 22:51:53 -08002877 if (unlikely(!object || !page || !node_match(page, node))) {
Christoph Lameterdfb4f092007-10-16 01:26:05 -07002878 object = __slab_alloc(s, gfpflags, node, addr, c);
Dave Hansen8eae1492014-06-04 16:06:37 -07002879 } else {
Eric Dumazet0ad95002011-12-16 16:25:34 +01002880 void *next_object = get_freepointer_safe(s, object);
2881
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06002882 /*
Lucas De Marchi25985ed2011-03-30 22:57:33 -03002883 * The cmpxchg will only match if there was no additional
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06002884 * operation and if we are on the right processor.
2885 *
Chen Gangd0e0ac92013-07-15 09:05:29 +08002886 * The cmpxchg does the following atomically (without lock
2887 * semantics!)
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06002888 * 1. Relocate first pointer to the current per cpu area.
2889 * 2. Verify that tid and freelist have not been changed
2890 * 3. If they were not changed replace tid and freelist
2891 *
Chen Gangd0e0ac92013-07-15 09:05:29 +08002892 * Since this is without lock semantics the protection is only
2893 * against code executing on this cpu *not* from access by
2894 * other cpus.
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06002895 */
Christoph Lameter933393f2011-12-22 11:58:51 -06002896 if (unlikely(!this_cpu_cmpxchg_double(
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06002897 s->cpu_slab->freelist, s->cpu_slab->tid,
2898 object, tid,
Eric Dumazet0ad95002011-12-16 16:25:34 +01002899 next_object, next_tid(tid)))) {
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06002900
2901 note_cmpxchg_failure("slab_alloc", s, tid);
2902 goto redo;
2903 }
Eric Dumazet0ad95002011-12-16 16:25:34 +01002904 prefetch_freepointer(s, next_object);
Christoph Lameter84e554e62009-12-18 16:26:23 -06002905 stat(s, ALLOC_FASTPATH);
Christoph Lameter894b8782007-05-10 03:15:16 -07002906 }
Alexander Potapenko0f181f92019-10-14 14:11:57 -07002907
Andrey Konovalovce5716c2021-01-23 21:01:38 -08002908 maybe_wipe_obj_freeptr(s, object);
Andrey Konovalovda844b72021-04-29 23:00:06 -07002909 init = slab_want_init_on_alloc(gfpflags, s);
Christoph Lameterd07dbea2007-07-17 04:03:23 -07002910
Alexander Potapenkob89fb5e2021-02-25 17:19:16 -08002911out:
Andrey Konovalovda844b72021-04-29 23:00:06 -07002912 slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init);
Vegard Nossum5a896d92008-04-04 00:54:48 +02002913
Christoph Lameter894b8782007-05-10 03:15:16 -07002914 return object;
Christoph Lameter81819f02007-05-06 14:49:36 -07002915}
2916
Ezequiel Garcia2b847c32012-09-08 17:47:58 -03002917static __always_inline void *slab_alloc(struct kmem_cache *s,
Alexander Potapenkob89fb5e2021-02-25 17:19:16 -08002918 gfp_t gfpflags, unsigned long addr, size_t orig_size)
Ezequiel Garcia2b847c32012-09-08 17:47:58 -03002919{
Alexander Potapenkob89fb5e2021-02-25 17:19:16 -08002920 return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr, orig_size);
Ezequiel Garcia2b847c32012-09-08 17:47:58 -03002921}
2922
Christoph Lameter81819f02007-05-06 14:49:36 -07002923void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
2924{
Alexander Potapenkob89fb5e2021-02-25 17:19:16 -08002925 void *ret = slab_alloc(s, gfpflags, _RET_IP_, s->object_size);
Eduard - Gabriel Munteanu5b882be2008-08-19 20:43:26 +03002926
Chen Gangd0e0ac92013-07-15 09:05:29 +08002927 trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
2928 s->size, gfpflags);
Eduard - Gabriel Munteanu5b882be2008-08-19 20:43:26 +03002929
2930 return ret;
Christoph Lameter81819f02007-05-06 14:49:36 -07002931}
2932EXPORT_SYMBOL(kmem_cache_alloc);
2933
Li Zefan0f24f122009-12-11 15:45:30 +08002934#ifdef CONFIG_TRACING
Richard Kennedy4a923792010-10-21 10:29:19 +01002935void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
Eduard - Gabriel Munteanu5b882be2008-08-19 20:43:26 +03002936{
Alexander Potapenkob89fb5e2021-02-25 17:19:16 -08002937 void *ret = slab_alloc(s, gfpflags, _RET_IP_, size);
Richard Kennedy4a923792010-10-21 10:29:19 +01002938 trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
Andrey Konovalov01165232018-12-28 00:29:37 -08002939 ret = kasan_kmalloc(s, ret, size, gfpflags);
Richard Kennedy4a923792010-10-21 10:29:19 +01002940 return ret;
Eduard - Gabriel Munteanu5b882be2008-08-19 20:43:26 +03002941}
Richard Kennedy4a923792010-10-21 10:29:19 +01002942EXPORT_SYMBOL(kmem_cache_alloc_trace);
Eduard - Gabriel Munteanu5b882be2008-08-19 20:43:26 +03002943#endif
2944
Christoph Lameter81819f02007-05-06 14:49:36 -07002945#ifdef CONFIG_NUMA
2946void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
2947{
Alexander Potapenkob89fb5e2021-02-25 17:19:16 -08002948 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, s->object_size);
Eduard - Gabriel Munteanu5b882be2008-08-19 20:43:26 +03002949
Eduard - Gabriel Munteanuca2b84cb2009-03-23 15:12:24 +02002950 trace_kmem_cache_alloc_node(_RET_IP_, ret,
Christoph Lameter3b0efdf2012-06-13 10:24:57 -05002951 s->object_size, s->size, gfpflags, node);
Eduard - Gabriel Munteanu5b882be2008-08-19 20:43:26 +03002952
2953 return ret;
Christoph Lameter81819f02007-05-06 14:49:36 -07002954}
2955EXPORT_SYMBOL(kmem_cache_alloc_node);
Christoph Lameter81819f02007-05-06 14:49:36 -07002956
Li Zefan0f24f122009-12-11 15:45:30 +08002957#ifdef CONFIG_TRACING
Richard Kennedy4a923792010-10-21 10:29:19 +01002958void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
Eduard - Gabriel Munteanu5b882be2008-08-19 20:43:26 +03002959 gfp_t gfpflags,
Richard Kennedy4a923792010-10-21 10:29:19 +01002960 int node, size_t size)
Eduard - Gabriel Munteanu5b882be2008-08-19 20:43:26 +03002961{
Alexander Potapenkob89fb5e2021-02-25 17:19:16 -08002962 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, size);
Richard Kennedy4a923792010-10-21 10:29:19 +01002963
2964 trace_kmalloc_node(_RET_IP_, ret,
2965 size, s->size, gfpflags, node);
Andrey Ryabinin0316bec2015-02-13 14:39:42 -08002966
Andrey Konovalov01165232018-12-28 00:29:37 -08002967 ret = kasan_kmalloc(s, ret, size, gfpflags);
Richard Kennedy4a923792010-10-21 10:29:19 +01002968 return ret;
Eduard - Gabriel Munteanu5b882be2008-08-19 20:43:26 +03002969}
Richard Kennedy4a923792010-10-21 10:29:19 +01002970EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
Eduard - Gabriel Munteanu5b882be2008-08-19 20:43:26 +03002971#endif
Tobin C. Harding6dfd1b62019-05-13 17:16:09 -07002972#endif /* CONFIG_NUMA */
Eduard - Gabriel Munteanu5b882be2008-08-19 20:43:26 +03002973
Christoph Lameter81819f02007-05-06 14:49:36 -07002974/*
Kim Phillips94e4d712015-02-10 14:09:37 -08002975 * Slow path handling. This may still be called frequently since objects
Christoph Lameter894b8782007-05-10 03:15:16 -07002976 * have a longer lifetime than the cpu slabs in most processing loads.
Christoph Lameter81819f02007-05-06 14:49:36 -07002977 *
Christoph Lameter894b8782007-05-10 03:15:16 -07002978 * So we still attempt to reduce cache line usage. Just take the slab
2979 * lock and free the item. If there is no additional partial page
2980 * handling required then we can return immediately.
Christoph Lameter81819f02007-05-06 14:49:36 -07002981 */
Christoph Lameter894b8782007-05-10 03:15:16 -07002982static void __slab_free(struct kmem_cache *s, struct page *page,
Jesper Dangaard Brouer81084652015-11-20 15:57:46 -08002983 void *head, void *tail, int cnt,
2984 unsigned long addr)
2985
Christoph Lameter81819f02007-05-06 14:49:36 -07002986{
2987 void *prior;
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002988 int was_frozen;
Christoph Lameter2cfb7452011-06-01 12:25:52 -05002989 struct page new;
2990 unsigned long counters;
2991 struct kmem_cache_node *n = NULL;
Kees Cook3f649ab2020-06-03 13:09:38 -07002992 unsigned long flags;
Christoph Lameter81819f02007-05-06 14:49:36 -07002993
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06002994 stat(s, FREE_SLOWPATH);
Christoph Lameter81819f02007-05-06 14:49:36 -07002995
Alexander Potapenkob89fb5e2021-02-25 17:19:16 -08002996 if (kfence_free(head))
2997 return;
2998
Christoph Lameter19c7ff92012-05-30 12:54:46 -05002999 if (kmem_cache_debug(s) &&
Laura Abbott282acb42016-03-15 14:54:59 -07003000 !free_debug_processing(s, page, head, tail, cnt, addr))
Christoph Lameter80f08c12011-06-01 12:25:55 -05003001 return;
Christoph Lameter6446faa2008-02-15 23:45:26 -08003002
Christoph Lameter2cfb7452011-06-01 12:25:52 -05003003 do {
Joonsoo Kim837d6782012-08-16 00:02:40 +09003004 if (unlikely(n)) {
3005 spin_unlock_irqrestore(&n->list_lock, flags);
3006 n = NULL;
3007 }
Christoph Lameter2cfb7452011-06-01 12:25:52 -05003008 prior = page->freelist;
3009 counters = page->counters;
Jesper Dangaard Brouer81084652015-11-20 15:57:46 -08003010 set_freepointer(s, tail, prior);
Christoph Lameter2cfb7452011-06-01 12:25:52 -05003011 new.counters = counters;
3012 was_frozen = new.frozen;
Jesper Dangaard Brouer81084652015-11-20 15:57:46 -08003013 new.inuse -= cnt;
Joonsoo Kim837d6782012-08-16 00:02:40 +09003014 if ((!new.inuse || !prior) && !was_frozen) {
Christoph Lameter49e22582011-08-09 16:12:27 -05003015
Peter Zijlstrac65c1872014-01-10 13:23:49 +01003016 if (kmem_cache_has_cpu_partial(s) && !prior) {
Christoph Lameter49e22582011-08-09 16:12:27 -05003017
3018 /*
Chen Gangd0e0ac92013-07-15 09:05:29 +08003019 * Slab was on no list before and will be
3020 * partially empty
3021 * We can defer the list move and instead
3022 * freeze it.
Christoph Lameter49e22582011-08-09 16:12:27 -05003023 */
3024 new.frozen = 1;
3025
Peter Zijlstrac65c1872014-01-10 13:23:49 +01003026 } else { /* Needs to be taken off a list */
Christoph Lameter49e22582011-08-09 16:12:27 -05003027
LQYMGTb455def2014-12-10 15:42:13 -08003028 n = get_node(s, page_to_nid(page));
Christoph Lameter49e22582011-08-09 16:12:27 -05003029 /*
3030 * Speculatively acquire the list_lock.
3031 * If the cmpxchg does not succeed then we may
3032 * drop the list_lock without any processing.
3033 *
3034 * Otherwise the list_lock will synchronize with
3035 * other processors updating the list of slabs.
3036 */
3037 spin_lock_irqsave(&n->list_lock, flags);
3038
3039 }
Christoph Lameter2cfb7452011-06-01 12:25:52 -05003040 }
Christoph Lameter81819f02007-05-06 14:49:36 -07003041
Christoph Lameter2cfb7452011-06-01 12:25:52 -05003042 } while (!cmpxchg_double_slab(s, page,
3043 prior, counters,
Jesper Dangaard Brouer81084652015-11-20 15:57:46 -08003044 head, new.counters,
Christoph Lameter2cfb7452011-06-01 12:25:52 -05003045 "__slab_free"));
Christoph Lameter81819f02007-05-06 14:49:36 -07003046
Christoph Lameter2cfb7452011-06-01 12:25:52 -05003047 if (likely(!n)) {
Christoph Lameter49e22582011-08-09 16:12:27 -05003048
Abel Wuc270cf32020-10-13 16:48:40 -07003049 if (likely(was_frozen)) {
3050 /*
3051 * The list lock was not taken therefore no list
3052 * activity can be necessary.
3053 */
3054 stat(s, FREE_FROZEN);
3055 } else if (new.frozen) {
3056 /*
3057 * If we just froze the page then put it onto the
3058 * per cpu partial list.
3059 */
Christoph Lameter49e22582011-08-09 16:12:27 -05003060 put_cpu_partial(s, page, 1);
Alex Shi8028dce2012-02-03 23:34:56 +08003061 stat(s, CPU_PARTIAL_FREE);
3062 }
Abel Wuc270cf32020-10-13 16:48:40 -07003063
LQYMGTb455def2014-12-10 15:42:13 -08003064 return;
3065 }
Christoph Lameter81819f02007-05-06 14:49:36 -07003066
Joonsoo Kim8a5b20a2014-07-02 15:22:35 -07003067 if (unlikely(!new.inuse && n->nr_partial >= s->min_partial))
Joonsoo Kim837d6782012-08-16 00:02:40 +09003068 goto slab_empty;
Christoph Lameter81819f02007-05-06 14:49:36 -07003069
Joonsoo Kim837d6782012-08-16 00:02:40 +09003070 /*
3071 * Objects left in the slab. If it was not on the partial list before
3072 * then add it.
3073 */
Joonsoo Kim345c9052013-06-19 14:05:52 +09003074 if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
Liu Xianga4d3f892019-05-13 17:16:22 -07003075 remove_full(s, n, page);
Joonsoo Kim837d6782012-08-16 00:02:40 +09003076 add_partial(n, page, DEACTIVATE_TO_TAIL);
3077 stat(s, FREE_ADD_PARTIAL);
Christoph Lameter81819f02007-05-06 14:49:36 -07003078 }
Christoph Lameter80f08c12011-06-01 12:25:55 -05003079 spin_unlock_irqrestore(&n->list_lock, flags);
Christoph Lameter81819f02007-05-06 14:49:36 -07003080 return;
3081
3082slab_empty:
Christoph Lametera973e9d2008-03-01 13:40:44 -08003083 if (prior) {
Christoph Lameter81819f02007-05-06 14:49:36 -07003084 /*
Christoph Lameter6fbabb22011-08-08 11:16:56 -05003085 * Slab on the partial list.
Christoph Lameter81819f02007-05-06 14:49:36 -07003086 */
Christoph Lameter5cc6eee2011-06-01 12:25:50 -05003087 remove_partial(n, page);
Christoph Lameter84e554e62009-12-18 16:26:23 -06003088 stat(s, FREE_REMOVE_PARTIAL);
Peter Zijlstrac65c1872014-01-10 13:23:49 +01003089 } else {
Christoph Lameter6fbabb22011-08-08 11:16:56 -05003090 /* Slab must be on the full list */
Peter Zijlstrac65c1872014-01-10 13:23:49 +01003091 remove_full(s, n, page);
3092 }
Christoph Lameter2cfb7452011-06-01 12:25:52 -05003093
Christoph Lameter80f08c12011-06-01 12:25:55 -05003094 spin_unlock_irqrestore(&n->list_lock, flags);
Christoph Lameter84e554e62009-12-18 16:26:23 -06003095 stat(s, FREE_SLAB);
Christoph Lameter81819f02007-05-06 14:49:36 -07003096 discard_slab(s, page);
Christoph Lameter81819f02007-05-06 14:49:36 -07003097}
3098
Christoph Lameter894b8782007-05-10 03:15:16 -07003099/*
3100 * Fastpath with forced inlining to produce a kfree and kmem_cache_free that
3101 * can perform fastpath freeing without additional function calls.
3102 *
3103 * The fastpath is only possible if we are freeing to the current cpu slab
3104 * of this processor. This typically the case if we have just allocated
3105 * the item before.
3106 *
3107 * If fastpath is not possible then fall back to __slab_free where we deal
3108 * with all sorts of special processing.
Jesper Dangaard Brouer81084652015-11-20 15:57:46 -08003109 *
3110 * Bulk free of a freelist with several objects (all pointing to the
3111 * same page) possible by specifying head and tail ptr, plus objects
3112 * count (cnt). Bulk free indicated by tail pointer being set.
Christoph Lameter894b8782007-05-10 03:15:16 -07003113 */
Alexander Potapenko80a92012016-07-28 15:49:07 -07003114static __always_inline void do_slab_free(struct kmem_cache *s,
3115 struct page *page, void *head, void *tail,
3116 int cnt, unsigned long addr)
Christoph Lameter894b8782007-05-10 03:15:16 -07003117{
Jesper Dangaard Brouer81084652015-11-20 15:57:46 -08003118 void *tail_obj = tail ? : head;
Christoph Lameterdfb4f092007-10-16 01:26:05 -07003119 struct kmem_cache_cpu *c;
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06003120 unsigned long tid;
Roman Gushchin964d4bd2020-08-06 23:20:56 -07003121
Bharata B Raod1b2cf62020-10-13 16:53:09 -07003122 memcg_slab_free_hook(s, &head, 1);
Christoph Lametera24c5a02011-03-15 12:45:21 -05003123redo:
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06003124 /*
3125 * Determine the currently cpus per cpu slab.
3126 * The cpu may change afterward. However that does not matter since
3127 * data is retrieved via this pointer. If we are on the same cpu
Jesper Dangaard Brouer2ae44002015-09-04 15:45:31 -07003128 * during the cmpxchg then the free will succeed.
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06003129 */
Joonsoo Kim9aabf812015-02-10 14:09:32 -08003130 do {
3131 tid = this_cpu_read(s->cpu_slab->tid);
3132 c = raw_cpu_ptr(s->cpu_slab);
Thomas Gleixner923717c2019-10-15 21:18:12 +02003133 } while (IS_ENABLED(CONFIG_PREEMPTION) &&
Mark Rutland859b7a02015-03-25 15:55:23 -07003134 unlikely(tid != READ_ONCE(c->tid)));
Christoph Lameterc016b0b2010-08-20 12:37:16 -05003135
Joonsoo Kim9aabf812015-02-10 14:09:32 -08003136 /* Same with comment on barrier() in slab_alloc_node() */
3137 barrier();
Christoph Lameterc016b0b2010-08-20 12:37:16 -05003138
Christoph Lameter442b06b2011-05-17 16:29:31 -05003139 if (likely(page == c->page)) {
Linus Torvalds50761902020-03-17 11:04:09 -07003140 void **freelist = READ_ONCE(c->freelist);
3141
3142 set_freepointer(s, tail_obj, freelist);
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06003143
Christoph Lameter933393f2011-12-22 11:58:51 -06003144 if (unlikely(!this_cpu_cmpxchg_double(
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06003145 s->cpu_slab->freelist, s->cpu_slab->tid,
Linus Torvalds50761902020-03-17 11:04:09 -07003146 freelist, tid,
Jesper Dangaard Brouer81084652015-11-20 15:57:46 -08003147 head, next_tid(tid)))) {
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06003148
3149 note_cmpxchg_failure("slab_free", s, tid);
3150 goto redo;
3151 }
Christoph Lameter84e554e62009-12-18 16:26:23 -06003152 stat(s, FREE_FASTPATH);
Christoph Lameter894b8782007-05-10 03:15:16 -07003153 } else
Jesper Dangaard Brouer81084652015-11-20 15:57:46 -08003154 __slab_free(s, page, head, tail_obj, cnt, addr);
Christoph Lameter894b8782007-05-10 03:15:16 -07003155
Christoph Lameter894b8782007-05-10 03:15:16 -07003156}
3157
Alexander Potapenko80a92012016-07-28 15:49:07 -07003158static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
3159 void *head, void *tail, int cnt,
3160 unsigned long addr)
3161{
Alexander Potapenko80a92012016-07-28 15:49:07 -07003162 /*
Andrey Konovalovc3895392018-04-10 16:30:31 -07003163 * With KASAN enabled slab_free_freelist_hook modifies the freelist
3164 * to remove objects, whose reuse must be delayed.
Alexander Potapenko80a92012016-07-28 15:49:07 -07003165 */
Andrey Konovalovc3895392018-04-10 16:30:31 -07003166 if (slab_free_freelist_hook(s, &head, &tail))
3167 do_slab_free(s, page, head, tail, cnt, addr);
Alexander Potapenko80a92012016-07-28 15:49:07 -07003168}
3169
Andrey Konovalov2bd926b2018-12-28 00:29:53 -08003170#ifdef CONFIG_KASAN_GENERIC
Alexander Potapenko80a92012016-07-28 15:49:07 -07003171void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
3172{
3173 do_slab_free(cache, virt_to_head_page(x), x, NULL, 1, addr);
3174}
3175#endif
3176
Christoph Lameter81819f02007-05-06 14:49:36 -07003177void kmem_cache_free(struct kmem_cache *s, void *x)
3178{
Glauber Costab9ce5ef2012-12-18 14:22:46 -08003179 s = cache_from_obj(s, x);
3180 if (!s)
Christoph Lameter79576102012-09-04 23:06:14 +00003181 return;
Jesper Dangaard Brouer81084652015-11-20 15:57:46 -08003182 slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);
Jacob Wen3544de8e2021-02-24 12:00:55 -08003183 trace_kmem_cache_free(_RET_IP_, x, s->name);
Christoph Lameter81819f02007-05-06 14:49:36 -07003184}
3185EXPORT_SYMBOL(kmem_cache_free);
3186
Jesper Dangaard Brouerd0ecd892015-11-20 15:57:49 -08003187struct detached_freelist {
3188 struct page *page;
3189 void *tail;
3190 void *freelist;
3191 int cnt;
Jesper Dangaard Brouer376bf122016-03-15 14:53:32 -07003192 struct kmem_cache *s;
Jesper Dangaard Brouerd0ecd892015-11-20 15:57:49 -08003193};
3194
3195/*
3196 * This function progressively scans the array with free objects (with
3197 * a limited look ahead) and extract objects belonging to the same
3198 * page. It builds a detached freelist directly within the given
3199 * page/objects. This can happen without any need for
3200 * synchronization, because the objects are owned by running process.
3201 * The freelist is build up as a single linked list in the objects.
3202 * The idea is, that this detached freelist can then be bulk
3203 * transferred to the real freelist(s), but only requiring a single
3204 * synchronization primitive. Look ahead in the array is limited due
3205 * to performance reasons.
3206 */
Jesper Dangaard Brouer376bf122016-03-15 14:53:32 -07003207static inline
3208int build_detached_freelist(struct kmem_cache *s, size_t size,
3209 void **p, struct detached_freelist *df)
Jesper Dangaard Brouerd0ecd892015-11-20 15:57:49 -08003210{
3211 size_t first_skipped_index = 0;
3212 int lookahead = 3;
3213 void *object;
Jesper Dangaard Brouerca257192016-03-15 14:54:00 -07003214 struct page *page;
Jesper Dangaard Brouerd0ecd892015-11-20 15:57:49 -08003215
3216 /* Always re-init detached_freelist */
3217 df->page = NULL;
3218
3219 do {
3220 object = p[--size];
Jesper Dangaard Brouerca257192016-03-15 14:54:00 -07003221 /* Do we need !ZERO_OR_NULL_PTR(object) here? (for kfree) */
Jesper Dangaard Brouerd0ecd892015-11-20 15:57:49 -08003222 } while (!object && size);
3223
3224 if (!object)
3225 return 0;
3226
Jesper Dangaard Brouerca257192016-03-15 14:54:00 -07003227 page = virt_to_head_page(object);
3228 if (!s) {
3229 /* Handle kalloc'ed objects */
3230 if (unlikely(!PageSlab(page))) {
3231 BUG_ON(!PageCompound(page));
3232 kfree_hook(object);
Vladimir Davydov49491482016-07-26 15:24:24 -07003233 __free_pages(page, compound_order(page));
Jesper Dangaard Brouerca257192016-03-15 14:54:00 -07003234 p[size] = NULL; /* mark object processed */
3235 return size;
3236 }
3237 /* Derive kmem_cache from object */
3238 df->s = page->slab_cache;
3239 } else {
3240 df->s = cache_from_obj(s, object); /* Support for memcg */
3241 }
Jesper Dangaard Brouer376bf122016-03-15 14:53:32 -07003242
Alexander Potapenkob89fb5e2021-02-25 17:19:16 -08003243 if (is_kfence_address(object)) {
Andrey Konovalovd57a9642021-04-29 23:00:09 -07003244 slab_free_hook(df->s, object, false);
Alexander Potapenkob89fb5e2021-02-25 17:19:16 -08003245 __kfence_free(object);
3246 p[size] = NULL; /* mark object processed */
3247 return size;
3248 }
3249
Jesper Dangaard Brouerd0ecd892015-11-20 15:57:49 -08003250 /* Start new detached freelist */
Jesper Dangaard Brouerca257192016-03-15 14:54:00 -07003251 df->page = page;
Jesper Dangaard Brouer376bf122016-03-15 14:53:32 -07003252 set_freepointer(df->s, object, NULL);
Jesper Dangaard Brouerd0ecd892015-11-20 15:57:49 -08003253 df->tail = object;
3254 df->freelist = object;
3255 p[size] = NULL; /* mark object processed */
3256 df->cnt = 1;
3257
3258 while (size) {
3259 object = p[--size];
3260 if (!object)
3261 continue; /* Skip processed objects */
3262
3263 /* df->page is always set at this point */
3264 if (df->page == virt_to_head_page(object)) {
3265 /* Opportunity build freelist */
Jesper Dangaard Brouer376bf122016-03-15 14:53:32 -07003266 set_freepointer(df->s, object, df->freelist);
Jesper Dangaard Brouerd0ecd892015-11-20 15:57:49 -08003267 df->freelist = object;
3268 df->cnt++;
3269 p[size] = NULL; /* mark object processed */
3270
3271 continue;
3272 }
3273
3274 /* Limit look ahead search */
3275 if (!--lookahead)
3276 break;
3277
3278 if (!first_skipped_index)
3279 first_skipped_index = size + 1;
3280 }
3281
3282 return first_skipped_index;
3283}
3284
Jesper Dangaard Brouer994eb762015-09-04 15:45:37 -07003285/* Note that interrupts must be enabled when calling this function. */
Jesper Dangaard Brouer376bf122016-03-15 14:53:32 -07003286void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
Christoph Lameter484748f2015-09-04 15:45:34 -07003287{
Jesper Dangaard Brouerd0ecd892015-11-20 15:57:49 -08003288 if (WARN_ON(!size))
3289 return;
Jesper Dangaard Brouerfbd02632015-09-04 15:45:43 -07003290
Bharata B Raod1b2cf62020-10-13 16:53:09 -07003291 memcg_slab_free_hook(s, p, size);
Jesper Dangaard Brouerd0ecd892015-11-20 15:57:49 -08003292 do {
3293 struct detached_freelist df;
Jesper Dangaard Brouerfbd02632015-09-04 15:45:43 -07003294
Jesper Dangaard Brouerd0ecd892015-11-20 15:57:49 -08003295 size = build_detached_freelist(s, size, p, &df);
Arnd Bergmann84582c82016-12-12 16:41:35 -08003296 if (!df.page)
Jesper Dangaard Brouerd0ecd892015-11-20 15:57:49 -08003297 continue;
Jesper Dangaard Brouerfbd02632015-09-04 15:45:43 -07003298
Zhiyuan Dai457c82c2021-02-24 12:01:26 -08003299 slab_free(df.s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_);
Jesper Dangaard Brouerd0ecd892015-11-20 15:57:49 -08003300 } while (likely(size));
Christoph Lameter484748f2015-09-04 15:45:34 -07003301}
3302EXPORT_SYMBOL(kmem_cache_free_bulk);
3303
Jesper Dangaard Brouer994eb762015-09-04 15:45:37 -07003304/* Note that interrupts must be enabled when calling this function. */
Jesper Dangaard Brouer865762a2015-11-20 15:57:58 -08003305int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
3306 void **p)
Christoph Lameter484748f2015-09-04 15:45:34 -07003307{
Jesper Dangaard Brouer994eb762015-09-04 15:45:37 -07003308 struct kmem_cache_cpu *c;
3309 int i;
Roman Gushchin964d4bd2020-08-06 23:20:56 -07003310 struct obj_cgroup *objcg = NULL;
Jesper Dangaard Brouer994eb762015-09-04 15:45:37 -07003311
Jesper Dangaard Brouer03ec0ed2015-11-20 15:57:52 -08003312 /* memcg and kmem_cache debug support */
Roman Gushchin964d4bd2020-08-06 23:20:56 -07003313 s = slab_pre_alloc_hook(s, &objcg, size, flags);
Jesper Dangaard Brouer03ec0ed2015-11-20 15:57:52 -08003314 if (unlikely(!s))
3315 return false;
Jesper Dangaard Brouer994eb762015-09-04 15:45:37 -07003316 /*
3317 * Drain objects in the per cpu slab, while disabling local
3318 * IRQs, which protects against PREEMPT and interrupts
3319 * handlers invoking normal fastpath.
3320 */
3321 local_irq_disable();
3322 c = this_cpu_ptr(s->cpu_slab);
3323
3324 for (i = 0; i < size; i++) {
Alexander Potapenkob89fb5e2021-02-25 17:19:16 -08003325 void *object = kfence_alloc(s, s->object_size, flags);
Jesper Dangaard Brouer994eb762015-09-04 15:45:37 -07003326
Alexander Potapenkob89fb5e2021-02-25 17:19:16 -08003327 if (unlikely(object)) {
3328 p[i] = object;
3329 continue;
3330 }
3331
3332 object = c->freelist;
Jesper Dangaard Brouerebe909e2015-09-04 15:45:40 -07003333 if (unlikely(!object)) {
Jesper Dangaard Brouerebe909e2015-09-04 15:45:40 -07003334 /*
Jann Hornfd4d9c72020-03-17 01:28:45 +01003335 * We may have removed an object from c->freelist using
3336 * the fastpath in the previous iteration; in that case,
3337 * c->tid has not been bumped yet.
3338 * Since ___slab_alloc() may reenable interrupts while
3339 * allocating memory, we should bump c->tid now.
3340 */
3341 c->tid = next_tid(c->tid);
3342
3343 /*
Jesper Dangaard Brouerebe909e2015-09-04 15:45:40 -07003344 * Invoking slow path likely have side-effect
3345 * of re-populating per CPU c->freelist
3346 */
Christoph Lameter87098372015-11-20 15:57:38 -08003347 p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
Jesper Dangaard Brouerebe909e2015-09-04 15:45:40 -07003348 _RET_IP_, c);
Christoph Lameter87098372015-11-20 15:57:38 -08003349 if (unlikely(!p[i]))
3350 goto error;
3351
Jesper Dangaard Brouerebe909e2015-09-04 15:45:40 -07003352 c = this_cpu_ptr(s->cpu_slab);
Alexander Potapenko0f181f92019-10-14 14:11:57 -07003353 maybe_wipe_obj_freeptr(s, p[i]);
3354
Jesper Dangaard Brouerebe909e2015-09-04 15:45:40 -07003355 continue; /* goto for-loop */
3356 }
Jesper Dangaard Brouer994eb762015-09-04 15:45:37 -07003357 c->freelist = get_freepointer(s, object);
3358 p[i] = object;
Alexander Potapenko0f181f92019-10-14 14:11:57 -07003359 maybe_wipe_obj_freeptr(s, p[i]);
Jesper Dangaard Brouer994eb762015-09-04 15:45:37 -07003360 }
3361 c->tid = next_tid(c->tid);
3362 local_irq_enable();
3363
Andrey Konovalovda844b72021-04-29 23:00:06 -07003364 /*
3365 * memcg and kmem_cache debug support and memory initialization.
3366 * Done outside of the IRQ disabled fastpath loop.
3367 */
3368 slab_post_alloc_hook(s, objcg, flags, size, p,
3369 slab_want_init_on_alloc(flags, s));
Jesper Dangaard Brouer865762a2015-11-20 15:57:58 -08003370 return i;
Christoph Lameter87098372015-11-20 15:57:38 -08003371error:
Christoph Lameter87098372015-11-20 15:57:38 -08003372 local_irq_enable();
Andrey Konovalovda844b72021-04-29 23:00:06 -07003373 slab_post_alloc_hook(s, objcg, flags, i, p, false);
Jesper Dangaard Brouer03ec0ed2015-11-20 15:57:52 -08003374 __kmem_cache_free_bulk(s, i, p);
Jesper Dangaard Brouer865762a2015-11-20 15:57:58 -08003375 return 0;
Christoph Lameter484748f2015-09-04 15:45:34 -07003376}
3377EXPORT_SYMBOL(kmem_cache_alloc_bulk);
3378
3379
Christoph Lameter81819f02007-05-06 14:49:36 -07003380/*
Christoph Lameter672bba32007-05-09 02:32:39 -07003381 * Object placement in a slab is made very easy because we always start at
3382 * offset 0. If we tune the size of the object to the alignment then we can
3383 * get the required alignment by putting one properly sized object after
3384 * another.
Christoph Lameter81819f02007-05-06 14:49:36 -07003385 *
3386 * Notice that the allocation order determines the sizes of the per cpu
3387 * caches. Each processor has always one slab available for allocations.
3388 * Increasing the allocation order reduces the number of times that slabs
Christoph Lameter672bba32007-05-09 02:32:39 -07003389 * must be moved on and off the partial lists and is therefore a factor in
Christoph Lameter81819f02007-05-06 14:49:36 -07003390 * locking overhead.
Christoph Lameter81819f02007-05-06 14:49:36 -07003391 */
3392
3393/*
Ingo Molnarf0953a12021-05-06 18:06:47 -07003394 * Minimum / Maximum order of slab pages. This influences locking overhead
Christoph Lameter81819f02007-05-06 14:49:36 -07003395 * and slab fragmentation. A higher order reduces the number of partial slabs
3396 * and increases the number of allocations possible without having to
3397 * take the list_lock.
3398 */
Alexey Dobriyan19af27a2018-04-05 16:21:39 -07003399static unsigned int slub_min_order;
3400static unsigned int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
3401static unsigned int slub_min_objects;
Christoph Lameter81819f02007-05-06 14:49:36 -07003402
3403/*
Christoph Lameter81819f02007-05-06 14:49:36 -07003404 * Calculate the order of allocation given an slab object size.
3405 *
Christoph Lameter672bba32007-05-09 02:32:39 -07003406 * The order of allocation has significant impact on performance and other
3407 * system components. Generally order 0 allocations should be preferred since
3408 * order 0 does not cause fragmentation in the page allocator. Larger objects
3409 * be problematic to put into order 0 slabs because there may be too much
Christoph Lameterc124f5b2008-04-14 19:13:29 +03003410 * unused space left. We go to a higher order if more than 1/16th of the slab
Christoph Lameter672bba32007-05-09 02:32:39 -07003411 * would be wasted.
Christoph Lameter81819f02007-05-06 14:49:36 -07003412 *
Christoph Lameter672bba32007-05-09 02:32:39 -07003413 * In order to reach satisfactory performance we must ensure that a minimum
3414 * number of objects is in one slab. Otherwise we may generate too much
3415 * activity on the partial lists which requires taking the list_lock. This is
3416 * less a concern for large slabs though which are rarely used.
Christoph Lameter81819f02007-05-06 14:49:36 -07003417 *
Christoph Lameter672bba32007-05-09 02:32:39 -07003418 * slub_max_order specifies the order where we begin to stop considering the
3419 * number of objects in a slab as critical. If we reach slub_max_order then
3420 * we try to keep the page order as low as possible. So we accept more waste
3421 * of space in favor of a small page order.
3422 *
3423 * Higher order allocations also allow the placement of more objects in a
3424 * slab and thereby reduce object handling overhead. If the user has
Bhaskar Chowdhurydc842072021-04-29 22:54:51 -07003425 * requested a higher minimum order then we start with that one instead of
Christoph Lameter672bba32007-05-09 02:32:39 -07003426 * the smallest order which will fit the object.
Christoph Lameter81819f02007-05-06 14:49:36 -07003427 */
Alexey Dobriyan19af27a2018-04-05 16:21:39 -07003428static inline unsigned int slab_order(unsigned int size,
3429 unsigned int min_objects, unsigned int max_order,
Matthew Wilcox9736d2a2018-06-07 17:09:10 -07003430 unsigned int fract_leftover)
Christoph Lameter81819f02007-05-06 14:49:36 -07003431{
Alexey Dobriyan19af27a2018-04-05 16:21:39 -07003432 unsigned int min_order = slub_min_order;
3433 unsigned int order;
Christoph Lameter81819f02007-05-06 14:49:36 -07003434
Matthew Wilcox9736d2a2018-06-07 17:09:10 -07003435 if (order_objects(min_order, size) > MAX_OBJS_PER_PAGE)
Cyrill Gorcunov210b5c02008-10-22 23:00:38 +04003436 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
Christoph Lameter39b26462008-04-14 19:11:30 +03003437
Matthew Wilcox9736d2a2018-06-07 17:09:10 -07003438 for (order = max(min_order, (unsigned int)get_order(min_objects * size));
Christoph Lameter5e6d4442007-05-09 02:32:46 -07003439 order <= max_order; order++) {
3440
Alexey Dobriyan19af27a2018-04-05 16:21:39 -07003441 unsigned int slab_size = (unsigned int)PAGE_SIZE << order;
3442 unsigned int rem;
Christoph Lameter81819f02007-05-06 14:49:36 -07003443
Matthew Wilcox9736d2a2018-06-07 17:09:10 -07003444 rem = slab_size % size;
Christoph Lameter81819f02007-05-06 14:49:36 -07003445
Christoph Lameter5e6d4442007-05-09 02:32:46 -07003446 if (rem <= slab_size / fract_leftover)
Christoph Lameter81819f02007-05-06 14:49:36 -07003447 break;
Christoph Lameter81819f02007-05-06 14:49:36 -07003448 }
Christoph Lameter672bba32007-05-09 02:32:39 -07003449
Christoph Lameter81819f02007-05-06 14:49:36 -07003450 return order;
3451}
3452
Matthew Wilcox9736d2a2018-06-07 17:09:10 -07003453static inline int calculate_order(unsigned int size)
Christoph Lameter5e6d4442007-05-09 02:32:46 -07003454{
Alexey Dobriyan19af27a2018-04-05 16:21:39 -07003455 unsigned int order;
3456 unsigned int min_objects;
3457 unsigned int max_objects;
Vlastimil Babka32862222021-02-09 13:42:32 -08003458 unsigned int nr_cpus;
Christoph Lameter5e6d4442007-05-09 02:32:46 -07003459
3460 /*
3461 * Attempt to find best configuration for a slab. This
3462 * works by first attempting to generate a layout with
3463 * the best configuration and backing off gradually.
3464 *
Wei Yang422ff4d2015-11-05 18:45:46 -08003465 * First we increase the acceptable waste in a slab. Then
Christoph Lameter5e6d4442007-05-09 02:32:46 -07003466 * we reduce the minimum objects required in a slab.
3467 */
3468 min_objects = slub_min_objects;
Vlastimil Babka32862222021-02-09 13:42:32 -08003469 if (!min_objects) {
3470 /*
3471 * Some architectures will only update present cpus when
3472 * onlining them, so don't trust the number if it's just 1. But
3473 * we also don't want to use nr_cpu_ids always, as on some other
3474 * architectures, there can be many possible cpus, but never
3475 * onlined. Here we compromise between trying to avoid too high
3476 * order on systems that appear larger than they are, and too
3477 * low order on systems that appear smaller than they are.
3478 */
3479 nr_cpus = num_present_cpus();
3480 if (nr_cpus <= 1)
3481 nr_cpus = nr_cpu_ids;
3482 min_objects = 4 * (fls(nr_cpus) + 1);
3483 }
Matthew Wilcox9736d2a2018-06-07 17:09:10 -07003484 max_objects = order_objects(slub_max_order, size);
Zhang Yanmine8120ff2009-02-12 18:00:17 +02003485 min_objects = min(min_objects, max_objects);
3486
Christoph Lameter5e6d4442007-05-09 02:32:46 -07003487 while (min_objects > 1) {
Alexey Dobriyan19af27a2018-04-05 16:21:39 -07003488 unsigned int fraction;
3489
Christoph Lameterc124f5b2008-04-14 19:13:29 +03003490 fraction = 16;
Christoph Lameter5e6d4442007-05-09 02:32:46 -07003491 while (fraction >= 4) {
3492 order = slab_order(size, min_objects,
Matthew Wilcox9736d2a2018-06-07 17:09:10 -07003493 slub_max_order, fraction);
Christoph Lameter5e6d4442007-05-09 02:32:46 -07003494 if (order <= slub_max_order)
3495 return order;
3496 fraction /= 2;
3497 }
Amerigo Wang5086c389c2009-08-19 21:44:13 +03003498 min_objects--;
Christoph Lameter5e6d4442007-05-09 02:32:46 -07003499 }
3500
3501 /*
3502 * We were unable to place multiple objects in a slab. Now
3503 * lets see if we can place a single object there.
3504 */
Matthew Wilcox9736d2a2018-06-07 17:09:10 -07003505 order = slab_order(size, 1, slub_max_order, 1);
Christoph Lameter5e6d4442007-05-09 02:32:46 -07003506 if (order <= slub_max_order)
3507 return order;
3508
3509 /*
3510 * Doh this slab cannot be placed using slub_max_order.
3511 */
Matthew Wilcox9736d2a2018-06-07 17:09:10 -07003512 order = slab_order(size, 1, MAX_ORDER, 1);
David Rientjes818cf592009-04-23 09:58:22 +03003513 if (order < MAX_ORDER)
Christoph Lameter5e6d4442007-05-09 02:32:46 -07003514 return order;
3515 return -ENOSYS;
3516}
3517
Pekka Enberg5595cff2008-08-05 09:28:47 +03003518static void
Joonsoo Kim40534972012-05-11 00:50:47 +09003519init_kmem_cache_node(struct kmem_cache_node *n)
Christoph Lameter81819f02007-05-06 14:49:36 -07003520{
3521 n->nr_partial = 0;
Christoph Lameter81819f02007-05-06 14:49:36 -07003522 spin_lock_init(&n->list_lock);
3523 INIT_LIST_HEAD(&n->partial);
Christoph Lameter8ab13722007-07-17 04:03:32 -07003524#ifdef CONFIG_SLUB_DEBUG
Christoph Lameter0f389ec2008-04-14 18:53:02 +03003525 atomic_long_set(&n->nr_slabs, 0);
Salman Qazi02b71b72008-09-11 12:25:41 -07003526 atomic_long_set(&n->total_objects, 0);
Christoph Lameter643b1132007-05-06 14:49:42 -07003527 INIT_LIST_HEAD(&n->full);
Christoph Lameter8ab13722007-07-17 04:03:32 -07003528#endif
Christoph Lameter81819f02007-05-06 14:49:36 -07003529}
3530
Christoph Lameter55136592010-08-20 12:37:13 -05003531static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
Christoph Lameter4c93c3552007-10-16 01:26:08 -07003532{
Christoph Lameter6c182dc2010-08-20 12:37:14 -05003533 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
Christoph Lameter95a05b42013-01-10 19:14:19 +00003534 KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));
Christoph Lameter9dfc6e62009-12-18 16:26:20 -06003535
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06003536 /*
Chris Metcalfd4d84fe2011-06-02 10:19:41 -04003537 * Must align to double word boundary for the double cmpxchg
3538 * instructions to work; see __pcpu_double_call_return_bool().
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06003539 */
Chris Metcalfd4d84fe2011-06-02 10:19:41 -04003540 s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
3541 2 * sizeof(void *));
Christoph Lameter9dfc6e62009-12-18 16:26:20 -06003542
Christoph Lameter8a5ec0b2011-02-25 11:38:54 -06003543 if (!s->cpu_slab)
3544 return 0;
3545
3546 init_kmem_cache_cpus(s);
3547
3548 return 1;
Christoph Lameter4c93c3552007-10-16 01:26:08 -07003549}
Christoph Lameter4c93c3552007-10-16 01:26:08 -07003550
Christoph Lameter51df1142010-08-20 12:37:15 -05003551static struct kmem_cache *kmem_cache_node;
3552
Christoph Lameter81819f02007-05-06 14:49:36 -07003553/*
3554 * No kmalloc_node yet so do it by hand. We know that this is the first
3555 * slab on the node for this slabcache. There are no concurrent accesses
3556 * possible.
3557 *
Zhi Yong Wu721ae222013-11-08 20:47:37 +08003558 * Note that this function only works on the kmem_cache_node
3559 * when allocating for the kmem_cache_node. This is used for bootstrapping
Christoph Lameter4c93c3552007-10-16 01:26:08 -07003560 * memory on a fresh node that has no slab structures yet.
Christoph Lameter81819f02007-05-06 14:49:36 -07003561 */
Christoph Lameter55136592010-08-20 12:37:13 -05003562static void early_kmem_cache_node_alloc(int node)
Christoph Lameter81819f02007-05-06 14:49:36 -07003563{
3564 struct page *page;
3565 struct kmem_cache_node *n;
3566
Christoph Lameter51df1142010-08-20 12:37:15 -05003567 BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
Christoph Lameter81819f02007-05-06 14:49:36 -07003568
Christoph Lameter51df1142010-08-20 12:37:15 -05003569 page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
Christoph Lameter81819f02007-05-06 14:49:36 -07003570
3571 BUG_ON(!page);
Christoph Lametera2f92ee2007-08-22 14:01:57 -07003572 if (page_to_nid(page) != node) {
Fabian Frederickf9f58282014-06-04 16:06:34 -07003573 pr_err("SLUB: Unable to allocate memory from node %d\n", node);
3574 pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n");
Christoph Lametera2f92ee2007-08-22 14:01:57 -07003575 }
3576
Christoph Lameter81819f02007-05-06 14:49:36 -07003577 n = page->freelist;
3578 BUG_ON(!n);
Christoph Lameter8ab13722007-07-17 04:03:32 -07003579#ifdef CONFIG_SLUB_DEBUG
Christoph Lameterf7cb1932010-09-29 07:15:01 -05003580 init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
Christoph Lameter51df1142010-08-20 12:37:15 -05003581 init_tracking(kmem_cache_node, n);
Christoph Lameter8ab13722007-07-17 04:03:32 -07003582#endif
Andrey Konovalovda844b72021-04-29 23:00:06 -07003583 n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL, false);
Andrey Konovalov12b22382018-12-28 00:29:41 -08003584 page->freelist = get_freepointer(kmem_cache_node, n);
3585 page->inuse = 1;
3586 page->frozen = 0;
3587 kmem_cache_node->node[node] = n;
Joonsoo Kim40534972012-05-11 00:50:47 +09003588 init_kmem_cache_node(n);
Christoph Lameter51df1142010-08-20 12:37:15 -05003589 inc_slabs_node(kmem_cache_node, node, page->objects);
Christoph Lameter6446faa2008-02-15 23:45:26 -08003590
Dave Hansen67b6c902014-01-24 07:20:23 -08003591 /*
Steven Rostedt1e4dd942014-02-10 14:25:46 -08003592 * No locks need to be taken here as it has just been
3593 * initialized and there is no concurrent access.
Dave Hansen67b6c902014-01-24 07:20:23 -08003594 */
Steven Rostedt1e4dd942014-02-10 14:25:46 -08003595 __add_partial(n, page, DEACTIVATE_TO_HEAD);
Christoph Lameter81819f02007-05-06 14:49:36 -07003596}
3597
3598static void free_kmem_cache_nodes(struct kmem_cache *s)
3599{
3600 int node;
Christoph Lameterfa45dc22014-08-06 16:04:09 -07003601 struct kmem_cache_node *n;
Christoph Lameter81819f02007-05-06 14:49:36 -07003602
Christoph Lameterfa45dc22014-08-06 16:04:09 -07003603 for_each_kmem_cache_node(s, node, n) {
Christoph Lameter81819f02007-05-06 14:49:36 -07003604 s->node[node] = NULL;
Alexander Potapenkoea37df52017-09-06 16:19:15 -07003605 kmem_cache_free(kmem_cache_node, n);
Christoph Lameter81819f02007-05-06 14:49:36 -07003606 }
3607}
3608
Dmitry Safonov52b4b952016-02-17 13:11:37 -08003609void __kmem_cache_release(struct kmem_cache *s)
3610{
Thomas Garnier210e7a42016-07-26 15:21:59 -07003611 cache_random_seq_destroy(s);
Dmitry Safonov52b4b952016-02-17 13:11:37 -08003612 free_percpu(s->cpu_slab);
3613 free_kmem_cache_nodes(s);
3614}
3615
Christoph Lameter55136592010-08-20 12:37:13 -05003616static int init_kmem_cache_nodes(struct kmem_cache *s)
Christoph Lameter81819f02007-05-06 14:49:36 -07003617{
3618 int node;
Christoph Lameter81819f02007-05-06 14:49:36 -07003619
Vlastimil Babka7e1fa932021-02-24 12:01:12 -08003620 for_each_node_mask(node, slab_nodes) {
Christoph Lameter81819f02007-05-06 14:49:36 -07003621 struct kmem_cache_node *n;
3622
Alexander Duyck73367bd2010-05-21 14:41:35 -07003623 if (slab_state == DOWN) {
Christoph Lameter55136592010-08-20 12:37:13 -05003624 early_kmem_cache_node_alloc(node);
Alexander Duyck73367bd2010-05-21 14:41:35 -07003625 continue;
Christoph Lameter81819f02007-05-06 14:49:36 -07003626 }
Christoph Lameter51df1142010-08-20 12:37:15 -05003627 n = kmem_cache_alloc_node(kmem_cache_node,
Christoph Lameter55136592010-08-20 12:37:13 -05003628 GFP_KERNEL, node);
Alexander Duyck73367bd2010-05-21 14:41:35 -07003629
3630 if (!n) {
3631 free_kmem_cache_nodes(s);
3632 return 0;
3633 }
3634
Joonsoo Kim40534972012-05-11 00:50:47 +09003635 init_kmem_cache_node(n);
Alexander Potapenkoea37df52017-09-06 16:19:15 -07003636 s->node[node] = n;
Christoph Lameter81819f02007-05-06 14:49:36 -07003637 }
3638 return 1;
3639}
Christoph Lameter81819f02007-05-06 14:49:36 -07003640
David Rientjesc0bdb232009-02-25 09:16:35 +02003641static void set_min_partial(struct kmem_cache *s, unsigned long min)
David Rientjes3b89d7d2009-02-22 17:40:07 -08003642{
3643 if (min < MIN_PARTIAL)
3644 min = MIN_PARTIAL;
3645 else if (min > MAX_PARTIAL)
3646 min = MAX_PARTIAL;
3647 s->min_partial = min;
3648}
3649
Wei Yange6d0e1d2017-07-06 15:36:34 -07003650static void set_cpu_partial(struct kmem_cache *s)
3651{
3652#ifdef CONFIG_SLUB_CPU_PARTIAL
3653 /*
3654 * cpu_partial determined the maximum number of objects kept in the
3655 * per cpu partial lists of a processor.
3656 *
3657 * Per cpu partial lists mainly contain slabs that just have one
3658 * object freed. If they are used for allocation then they can be
3659 * filled up again with minimal effort. The slab will never hit the
3660 * per node partial lists and therefore no locking will be required.
3661 *
3662 * This setting also determines
3663 *
3664 * A) The number of objects from per cpu partial slabs dumped to the
3665 * per node list when we reach the limit.
3666 * B) The number of objects in cpu partial slabs to extract from the
3667 * per node list when we run out of per cpu objects. We only fetch
3668 * 50% to keep some capacity around for frees.
3669 */
3670 if (!kmem_cache_has_cpu_partial(s))
chenqiwubbd4e302020-04-01 21:04:19 -07003671 slub_set_cpu_partial(s, 0);
Wei Yange6d0e1d2017-07-06 15:36:34 -07003672 else if (s->size >= PAGE_SIZE)
chenqiwubbd4e302020-04-01 21:04:19 -07003673 slub_set_cpu_partial(s, 2);
Wei Yange6d0e1d2017-07-06 15:36:34 -07003674 else if (s->size >= 1024)
chenqiwubbd4e302020-04-01 21:04:19 -07003675 slub_set_cpu_partial(s, 6);
Wei Yange6d0e1d2017-07-06 15:36:34 -07003676 else if (s->size >= 256)
chenqiwubbd4e302020-04-01 21:04:19 -07003677 slub_set_cpu_partial(s, 13);
Wei Yange6d0e1d2017-07-06 15:36:34 -07003678 else
chenqiwubbd4e302020-04-01 21:04:19 -07003679 slub_set_cpu_partial(s, 30);
Wei Yange6d0e1d2017-07-06 15:36:34 -07003680#endif
3681}
3682
Christoph Lameter81819f02007-05-06 14:49:36 -07003683/*
3684 * calculate_sizes() determines the order and the distribution of data within
3685 * a slab object.
3686 */
Christoph Lameter06b285d2008-04-14 19:11:41 +03003687static int calculate_sizes(struct kmem_cache *s, int forced_order)
Christoph Lameter81819f02007-05-06 14:49:36 -07003688{
Alexey Dobriyand50112e2017-11-15 17:32:18 -08003689 slab_flags_t flags = s->flags;
Alexey Dobriyanbe4a7982018-04-05 16:21:28 -07003690 unsigned int size = s->object_size;
Kees Cook89b83f22020-04-20 18:13:42 -07003691 unsigned int freepointer_area;
Alexey Dobriyan19af27a2018-04-05 16:21:39 -07003692 unsigned int order;
Christoph Lameter81819f02007-05-06 14:49:36 -07003693
3694 /*
Christoph Lameterd8b42bf2008-02-15 23:45:25 -08003695 * Round up object size to the next word boundary. We can only
3696 * place the free pointer at word boundaries and this determines
3697 * the possible location of the free pointer.
3698 */
3699 size = ALIGN(size, sizeof(void *));
Kees Cook89b83f22020-04-20 18:13:42 -07003700 /*
3701 * This is the area of the object where a freepointer can be
3702 * safely written. If redzoning adds more to the inuse size, we
3703 * can't use that portion for writing the freepointer, so
3704 * s->offset must be limited within this for the general case.
3705 */
3706 freepointer_area = size;
Christoph Lameterd8b42bf2008-02-15 23:45:25 -08003707
3708#ifdef CONFIG_SLUB_DEBUG
3709 /*
Christoph Lameter81819f02007-05-06 14:49:36 -07003710 * Determine if we can poison the object itself. If the user of
3711 * the slab may touch the object after free or before allocation
3712 * then we should never poison the object itself.
3713 */
Paul E. McKenney5f0d5a32017-01-18 02:53:44 -08003714 if ((flags & SLAB_POISON) && !(flags & SLAB_TYPESAFE_BY_RCU) &&
Christoph Lameterc59def92007-05-16 22:10:50 -07003715 !s->ctor)
Christoph Lameter81819f02007-05-06 14:49:36 -07003716 s->flags |= __OBJECT_POISON;
3717 else
3718 s->flags &= ~__OBJECT_POISON;
3719
Christoph Lameter81819f02007-05-06 14:49:36 -07003720
3721 /*
Christoph Lameter672bba32007-05-09 02:32:39 -07003722 * If we are Redzoning then check if there is some space between the
Christoph Lameter81819f02007-05-06 14:49:36 -07003723 * end of the object and the free pointer. If not then add an
Christoph Lameter672bba32007-05-09 02:32:39 -07003724 * additional word to have some bytes to store Redzone information.
Christoph Lameter81819f02007-05-06 14:49:36 -07003725 */
Christoph Lameter3b0efdf2012-06-13 10:24:57 -05003726 if ((flags & SLAB_RED_ZONE) && size == s->object_size)
Christoph Lameter81819f02007-05-06 14:49:36 -07003727 size += sizeof(void *);
Christoph Lameter41ecc552007-05-09 02:32:44 -07003728#endif
Christoph Lameter81819f02007-05-06 14:49:36 -07003729
3730 /*
Christoph Lameter672bba32007-05-09 02:32:39 -07003731 * With that we have determined the number of bytes in actual use
3732 * by the object. This is the potential offset to the free pointer.
Christoph Lameter81819f02007-05-06 14:49:36 -07003733 */
3734 s->inuse = size;
3735
Paul E. McKenney5f0d5a32017-01-18 02:53:44 -08003736 if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
Christoph Lameterc59def92007-05-16 22:10:50 -07003737 s->ctor)) {
Christoph Lameter81819f02007-05-06 14:49:36 -07003738 /*
3739 * Relocate free pointer after the object if it is not
3740 * permitted to overwrite the first word of the object on
3741 * kmem_cache_free.
3742 *
3743 * This is the case if we do RCU, have a constructor or
3744 * destructor or are poisoning the objects.
Waiman Longcbfc35a2020-05-07 18:36:06 -07003745 *
3746 * The assumption that s->offset >= s->inuse means free
3747 * pointer is outside of the object is used in the
3748 * freeptr_outside_object() function. If that is no
3749 * longer true, the function needs to be modified.
Christoph Lameter81819f02007-05-06 14:49:36 -07003750 */
3751 s->offset = size;
3752 size += sizeof(void *);
Kees Cook89b83f22020-04-20 18:13:42 -07003753 } else if (freepointer_area > sizeof(void *)) {
Kees Cook3202fa62020-04-01 21:04:27 -07003754 /*
3755 * Store freelist pointer near middle of object to keep
3756 * it away from the edges of the object to avoid small
3757 * sized over/underflows from neighboring allocations.
3758 */
Kees Cook89b83f22020-04-20 18:13:42 -07003759 s->offset = ALIGN(freepointer_area / 2, sizeof(void *));
Christoph Lameter81819f02007-05-06 14:49:36 -07003760 }
3761
Christoph Lameterc12b3c62007-05-23 13:57:31 -07003762#ifdef CONFIG_SLUB_DEBUG
Christoph Lameter81819f02007-05-06 14:49:36 -07003763 if (flags & SLAB_STORE_USER)
3764 /*
3765 * Need to store information about allocs and frees after
3766 * the object.
3767 */
3768 size += 2 * sizeof(struct track);
Alexander Potapenko80a92012016-07-28 15:49:07 -07003769#endif
Christoph Lameter81819f02007-05-06 14:49:36 -07003770
Alexander Potapenko80a92012016-07-28 15:49:07 -07003771 kasan_cache_create(s, &size, &s->flags);
3772#ifdef CONFIG_SLUB_DEBUG
Joonsoo Kimd86bd1b2016-03-15 14:55:12 -07003773 if (flags & SLAB_RED_ZONE) {
Christoph Lameter81819f02007-05-06 14:49:36 -07003774 /*
3775 * Add some empty padding so that we can catch
3776 * overwrites from earlier objects rather than let
3777 * tracking information or the free pointer be
Frederik Schwarzer0211a9c2008-12-29 22:14:56 +01003778 * corrupted if a user writes before the start
Christoph Lameter81819f02007-05-06 14:49:36 -07003779 * of the object.
3780 */
3781 size += sizeof(void *);
Joonsoo Kimd86bd1b2016-03-15 14:55:12 -07003782
3783 s->red_left_pad = sizeof(void *);
3784 s->red_left_pad = ALIGN(s->red_left_pad, s->align);
3785 size += s->red_left_pad;
3786 }
Christoph Lameter41ecc552007-05-09 02:32:44 -07003787#endif
Christoph Lameter672bba32007-05-09 02:32:39 -07003788
Christoph Lameter81819f02007-05-06 14:49:36 -07003789 /*
Christoph Lameter81819f02007-05-06 14:49:36 -07003790 * SLUB stores one object immediately after another beginning from
3791 * offset 0. In order to align the objects we have to simply size
3792 * each object to conform to the alignment.
3793 */
Christoph Lameter45906852012-11-28 16:23:16 +00003794 size = ALIGN(size, s->align);
Christoph Lameter81819f02007-05-06 14:49:36 -07003795 s->size = size;
Roman Gushchin4138fdf2020-08-06 23:20:42 -07003796 s->reciprocal_size = reciprocal_value(size);
Christoph Lameter06b285d2008-04-14 19:11:41 +03003797 if (forced_order >= 0)
3798 order = forced_order;
3799 else
Matthew Wilcox9736d2a2018-06-07 17:09:10 -07003800 order = calculate_order(size);
Christoph Lameter81819f02007-05-06 14:49:36 -07003801
Alexey Dobriyan19af27a2018-04-05 16:21:39 -07003802 if ((int)order < 0)
Christoph Lameter81819f02007-05-06 14:49:36 -07003803 return 0;
3804
Christoph Lameterb7a49f02008-02-14 14:21:32 -08003805 s->allocflags = 0;
Christoph Lameter834f3d12008-04-14 19:11:31 +03003806 if (order)
Christoph Lameterb7a49f02008-02-14 14:21:32 -08003807 s->allocflags |= __GFP_COMP;
3808
3809 if (s->flags & SLAB_CACHE_DMA)
Christoph Lameter2c59dd62013-01-10 19:14:19 +00003810 s->allocflags |= GFP_DMA;
Christoph Lameterb7a49f02008-02-14 14:21:32 -08003811
Nicolas Boichat6d6ea1e2019-03-28 20:43:42 -07003812 if (s->flags & SLAB_CACHE_DMA32)
3813 s->allocflags |= GFP_DMA32;
3814
Christoph Lameterb7a49f02008-02-14 14:21:32 -08003815 if (s->flags & SLAB_RECLAIM_ACCOUNT)
3816 s->allocflags |= __GFP_RECLAIMABLE;
3817
Christoph Lameter81819f02007-05-06 14:49:36 -07003818 /*
3819 * Determine the number of objects per slab
3820 */
Matthew Wilcox9736d2a2018-06-07 17:09:10 -07003821 s->oo = oo_make(order, size);
3822 s->min = oo_make(get_order(size), size);
Christoph Lameter205ab992008-04-14 19:11:40 +03003823 if (oo_objects(s->oo) > oo_objects(s->max))
3824 s->max = s->oo;
Christoph Lameter81819f02007-05-06 14:49:36 -07003825
Christoph Lameter834f3d12008-04-14 19:11:31 +03003826 return !!oo_objects(s->oo);
Christoph Lameter81819f02007-05-06 14:49:36 -07003827}
3828
Alexey Dobriyand50112e2017-11-15 17:32:18 -08003829static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
Christoph Lameter81819f02007-05-06 14:49:36 -07003830{
Vlastimil Babka1f0723a2021-04-29 22:54:42 -07003831#ifdef CONFIG_SLUB_DEBUG
3832 /*
3833 * If no slub_debug was enabled globally, the static key is not yet
3834 * enabled by setup_slub_debug(). Enable it if the cache is being
3835 * created with any of the debugging flags passed explicitly.
3836 */
3837 if (flags & SLAB_DEBUG_FLAGS)
3838 static_branch_enable(&slub_debug_enabled);
3839#endif
Nikolay Borisov37540002021-02-24 12:00:58 -08003840 s->flags = kmem_cache_flags(s->size, flags, s->name);
Kees Cook2482ddec2017-09-06 16:19:18 -07003841#ifdef CONFIG_SLAB_FREELIST_HARDENED
3842 s->random = get_random_long();
3843#endif
Christoph Lameter81819f02007-05-06 14:49:36 -07003844
Christoph Lameter06b285d2008-04-14 19:11:41 +03003845 if (!calculate_sizes(s, -1))
Christoph Lameter81819f02007-05-06 14:49:36 -07003846 goto error;
David Rientjes3de47212009-07-27 18:30:35 -07003847 if (disable_higher_order_debug) {
3848 /*
3849 * Disable debugging flags that store metadata if the min slab
3850 * order increased.
3851 */
Christoph Lameter3b0efdf2012-06-13 10:24:57 -05003852 if (get_order(s->size) > get_order(s->object_size)) {
David Rientjes3de47212009-07-27 18:30:35 -07003853 s->flags &= ~DEBUG_METADATA_FLAGS;
3854 s->offset = 0;
3855 if (!calculate_sizes(s, -1))
3856 goto error;
3857 }
3858 }
Christoph Lameter81819f02007-05-06 14:49:36 -07003859
Heiko Carstens25654092012-01-12 17:17:33 -08003860#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
3861 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
Laura Abbott149daaf2016-03-15 14:55:09 -07003862 if (system_has_cmpxchg_double() && (s->flags & SLAB_NO_CMPXCHG) == 0)
Christoph Lameterb789ef52011-06-01 12:25:49 -05003863 /* Enable fast mode */
3864 s->flags |= __CMPXCHG_DOUBLE;
3865#endif
3866
David Rientjes3b89d7d2009-02-22 17:40:07 -08003867 /*
3868 * The larger the object size is, the more pages we want on the partial
3869 * list to avoid pounding the page allocator excessively.
3870 */
Christoph Lameter49e22582011-08-09 16:12:27 -05003871 set_min_partial(s, ilog2(s->size) / 2);
3872
Wei Yange6d0e1d2017-07-06 15:36:34 -07003873 set_cpu_partial(s);
Christoph Lameter49e22582011-08-09 16:12:27 -05003874
Christoph Lameter81819f02007-05-06 14:49:36 -07003875#ifdef CONFIG_NUMA
Christoph Lametere2cb96b2008-08-19 08:51:22 -05003876 s->remote_node_defrag_ratio = 1000;
Christoph Lameter81819f02007-05-06 14:49:36 -07003877#endif
Thomas Garnier210e7a42016-07-26 15:21:59 -07003878
3879 /* Initialize the pre-computed randomized freelist if slab is up */
3880 if (slab_state >= UP) {
3881 if (init_cache_random_seq(s))
3882 goto error;
3883 }
3884
Christoph Lameter55136592010-08-20 12:37:13 -05003885 if (!init_kmem_cache_nodes(s))
Christoph Lameterdfb4f092007-10-16 01:26:05 -07003886 goto error;
Christoph Lameter81819f02007-05-06 14:49:36 -07003887
Christoph Lameter55136592010-08-20 12:37:13 -05003888 if (alloc_kmem_cache_cpus(s))
Christoph Lameter278b1bb2012-09-05 00:20:34 +00003889 return 0;
Christoph Lameterff120592009-12-18 16:26:22 -06003890
Christoph Lameter4c93c3552007-10-16 01:26:08 -07003891 free_kmem_cache_nodes(s);
Christoph Lameter81819f02007-05-06 14:49:36 -07003892error:
Christoph Lameter278b1bb2012-09-05 00:20:34 +00003893 return -EINVAL;
Christoph Lameter81819f02007-05-06 14:49:36 -07003894}
Christoph Lameter81819f02007-05-06 14:49:36 -07003895
Christoph Lameter33b12c32008-04-25 12:22:43 -07003896static void list_slab_objects(struct kmem_cache *s, struct page *page,
Sebastian Andrzej Siewior55860d92020-06-25 20:29:55 -07003897 const char *text)
Christoph Lameter81819f02007-05-06 14:49:36 -07003898{
Christoph Lameter33b12c32008-04-25 12:22:43 -07003899#ifdef CONFIG_SLUB_DEBUG
3900 void *addr = page_address(page);
Sebastian Andrzej Siewior55860d92020-06-25 20:29:55 -07003901 unsigned long *map;
Christoph Lameter33b12c32008-04-25 12:22:43 -07003902 void *p;
Christopher Lameteraa456c72020-06-01 21:45:53 -07003903
Christoph Lameter945cf2b2012-09-04 23:18:33 +00003904 slab_err(s, page, text, s->name);
Christoph Lameter33b12c32008-04-25 12:22:43 -07003905 slab_lock(page);
Christoph Lameter33b12c32008-04-25 12:22:43 -07003906
Yu Zhao90e9f6a2020-01-30 22:11:57 -08003907 map = get_map(s, page);
Christoph Lameter33b12c32008-04-25 12:22:43 -07003908 for_each_object(p, s, addr, page->objects) {
3909
Roman Gushchin4138fdf2020-08-06 23:20:42 -07003910 if (!test_bit(__obj_to_index(s, addr, p), map)) {
Yafang Shao96b94ab2021-03-19 18:12:45 +08003911 pr_err("Object 0x%p @offset=%tu\n", p, p - addr);
Christoph Lameter33b12c32008-04-25 12:22:43 -07003912 print_tracking(s, p);
3913 }
3914 }
Sebastian Andrzej Siewior55860d92020-06-25 20:29:55 -07003915 put_map(map);
Christoph Lameter33b12c32008-04-25 12:22:43 -07003916 slab_unlock(page);
3917#endif
3918}
3919
Christoph Lameter81819f02007-05-06 14:49:36 -07003920/*
Christoph Lameter599870b2008-04-23 12:36:52 -07003921 * Attempt to free all partial slabs on a node.
Dmitry Safonov52b4b952016-02-17 13:11:37 -08003922 * This is called from __kmem_cache_shutdown(). We must take list_lock
3923 * because sysfs file might still access partial list after the shutdowning.
Christoph Lameter81819f02007-05-06 14:49:36 -07003924 */
Christoph Lameter599870b2008-04-23 12:36:52 -07003925static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
Christoph Lameter81819f02007-05-06 14:49:36 -07003926{
Chris Wilson60398922016-08-10 16:27:58 -07003927 LIST_HEAD(discard);
Christoph Lameter81819f02007-05-06 14:49:36 -07003928 struct page *page, *h;
3929
Dmitry Safonov52b4b952016-02-17 13:11:37 -08003930 BUG_ON(irqs_disabled());
3931 spin_lock_irq(&n->list_lock);
Tobin C. Harding916ac052019-05-13 17:16:12 -07003932 list_for_each_entry_safe(page, h, &n->partial, slab_list) {
Christoph Lameter81819f02007-05-06 14:49:36 -07003933 if (!page->inuse) {
Dmitry Safonov52b4b952016-02-17 13:11:37 -08003934 remove_partial(n, page);
Tobin C. Harding916ac052019-05-13 17:16:12 -07003935 list_add(&page->slab_list, &discard);
Christoph Lameter33b12c32008-04-25 12:22:43 -07003936 } else {
3937 list_slab_objects(s, page,
Sebastian Andrzej Siewior55860d92020-06-25 20:29:55 -07003938 "Objects remaining in %s on __kmem_cache_shutdown()");
Christoph Lameter599870b2008-04-23 12:36:52 -07003939 }
Christoph Lameter33b12c32008-04-25 12:22:43 -07003940 }
Dmitry Safonov52b4b952016-02-17 13:11:37 -08003941 spin_unlock_irq(&n->list_lock);
Chris Wilson60398922016-08-10 16:27:58 -07003942
Tobin C. Harding916ac052019-05-13 17:16:12 -07003943 list_for_each_entry_safe(page, h, &discard, slab_list)
Chris Wilson60398922016-08-10 16:27:58 -07003944 discard_slab(s, page);
Christoph Lameter81819f02007-05-06 14:49:36 -07003945}
3946
Shakeel Buttf9e13c02018-04-05 16:21:57 -07003947bool __kmem_cache_empty(struct kmem_cache *s)
3948{
3949 int node;
3950 struct kmem_cache_node *n;
3951
3952 for_each_kmem_cache_node(s, node, n)
3953 if (n->nr_partial || slabs_node(s, node))
3954 return false;
3955 return true;
3956}
3957
Christoph Lameter81819f02007-05-06 14:49:36 -07003958/*
Christoph Lameter672bba32007-05-09 02:32:39 -07003959 * Release all resources used by a slab cache.
Christoph Lameter81819f02007-05-06 14:49:36 -07003960 */
Dmitry Safonov52b4b952016-02-17 13:11:37 -08003961int __kmem_cache_shutdown(struct kmem_cache *s)
Christoph Lameter81819f02007-05-06 14:49:36 -07003962{
3963 int node;
Christoph Lameterfa45dc22014-08-06 16:04:09 -07003964 struct kmem_cache_node *n;
Christoph Lameter81819f02007-05-06 14:49:36 -07003965
3966 flush_all(s);
Christoph Lameter81819f02007-05-06 14:49:36 -07003967 /* Attempt to free all objects */
Christoph Lameterfa45dc22014-08-06 16:04:09 -07003968 for_each_kmem_cache_node(s, node, n) {
Christoph Lameter599870b2008-04-23 12:36:52 -07003969 free_partial(s, n);
3970 if (n->nr_partial || slabs_node(s, node))
Christoph Lameter81819f02007-05-06 14:49:36 -07003971 return 1;
3972 }
Christoph Lameter81819f02007-05-06 14:49:36 -07003973 return 0;
3974}
3975
Paul E. McKenney5bb1bb32021-01-07 13:46:11 -08003976#ifdef CONFIG_PRINTK
Paul E. McKenney8e7f37f2020-12-07 17:41:02 -08003977void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
3978{
3979 void *base;
3980 int __maybe_unused i;
3981 unsigned int objnr;
3982 void *objp;
3983 void *objp0;
3984 struct kmem_cache *s = page->slab_cache;
3985 struct track __maybe_unused *trackp;
3986
3987 kpp->kp_ptr = object;
3988 kpp->kp_page = page;
3989 kpp->kp_slab_cache = s;
3990 base = page_address(page);
3991 objp0 = kasan_reset_tag(object);
3992#ifdef CONFIG_SLUB_DEBUG
3993 objp = restore_red_left(s, objp0);
3994#else
3995 objp = objp0;
3996#endif
3997 objnr = obj_to_index(s, page, objp);
3998 kpp->kp_data_offset = (unsigned long)((char *)objp0 - (char *)objp);
3999 objp = base + s->size * objnr;
4000 kpp->kp_objp = objp;
4001 if (WARN_ON_ONCE(objp < base || objp >= base + page->objects * s->size || (objp - base) % s->size) ||
4002 !(s->flags & SLAB_STORE_USER))
4003 return;
4004#ifdef CONFIG_SLUB_DEBUG
Maninder Singh0cbc1242021-03-16 16:07:10 +05304005 objp = fixup_red_left(s, objp);
Paul E. McKenney8e7f37f2020-12-07 17:41:02 -08004006 trackp = get_track(s, objp, TRACK_ALLOC);
4007 kpp->kp_ret = (void *)trackp->addr;
4008#ifdef CONFIG_STACKTRACE
4009 for (i = 0; i < KS_ADDRS_COUNT && i < TRACK_ADDRS_COUNT; i++) {
4010 kpp->kp_stack[i] = (void *)trackp->addrs[i];
4011 if (!kpp->kp_stack[i])
4012 break;
4013 }
Maninder Singhe548eaa2021-03-16 16:07:11 +05304014
4015 trackp = get_track(s, objp, TRACK_FREE);
4016 for (i = 0; i < KS_ADDRS_COUNT && i < TRACK_ADDRS_COUNT; i++) {
4017 kpp->kp_free_stack[i] = (void *)trackp->addrs[i];
4018 if (!kpp->kp_free_stack[i])
4019 break;
4020 }
Paul E. McKenney8e7f37f2020-12-07 17:41:02 -08004021#endif
4022#endif
4023}
Paul E. McKenney5bb1bb32021-01-07 13:46:11 -08004024#endif
Paul E. McKenney8e7f37f2020-12-07 17:41:02 -08004025
Christoph Lameter81819f02007-05-06 14:49:36 -07004026/********************************************************************
4027 * Kmalloc subsystem
4028 *******************************************************************/
4029
Christoph Lameter81819f02007-05-06 14:49:36 -07004030static int __init setup_slub_min_order(char *str)
4031{
Alexey Dobriyan19af27a2018-04-05 16:21:39 -07004032 get_option(&str, (int *)&slub_min_order);
Christoph Lameter81819f02007-05-06 14:49:36 -07004033
4034 return 1;
4035}
4036
4037__setup("slub_min_order=", setup_slub_min_order);
4038
4039static int __init setup_slub_max_order(char *str)
4040{
Alexey Dobriyan19af27a2018-04-05 16:21:39 -07004041 get_option(&str, (int *)&slub_max_order);
4042 slub_max_order = min(slub_max_order, (unsigned int)MAX_ORDER - 1);
Christoph Lameter81819f02007-05-06 14:49:36 -07004043
4044 return 1;
4045}
4046
4047__setup("slub_max_order=", setup_slub_max_order);
4048
4049static int __init setup_slub_min_objects(char *str)
4050{
Alexey Dobriyan19af27a2018-04-05 16:21:39 -07004051 get_option(&str, (int *)&slub_min_objects);
Christoph Lameter81819f02007-05-06 14:49:36 -07004052
4053 return 1;
4054}
4055
4056__setup("slub_min_objects=", setup_slub_min_objects);
4057
Christoph Lameter81819f02007-05-06 14:49:36 -07004058void *__kmalloc(size_t size, gfp_t flags)
4059{
Christoph Lameteraadb4bc2007-10-16 01:24:38 -07004060 struct kmem_cache *s;
Eduard - Gabriel Munteanu5b882be2008-08-19 20:43:26 +03004061 void *ret;
Christoph Lameter81819f02007-05-06 14:49:36 -07004062
Christoph Lameter95a05b42013-01-10 19:14:19 +00004063 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
Pekka Enbergeada35e2008-02-11 22:47:46 +02004064 return kmalloc_large(size, flags);
Christoph Lameteraadb4bc2007-10-16 01:24:38 -07004065
Christoph Lameter2c59dd62013-01-10 19:14:19 +00004066 s = kmalloc_slab(size, flags);
Christoph Lameteraadb4bc2007-10-16 01:24:38 -07004067
4068 if (unlikely(ZERO_OR_NULL_PTR(s)))
Christoph Lameter6cb8f912007-07-17 04:03:22 -07004069 return s;
4070
Alexander Potapenkob89fb5e2021-02-25 17:19:16 -08004071 ret = slab_alloc(s, flags, _RET_IP_, size);
Eduard - Gabriel Munteanu5b882be2008-08-19 20:43:26 +03004072
Eduard - Gabriel Munteanuca2b84cb2009-03-23 15:12:24 +02004073 trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
Eduard - Gabriel Munteanu5b882be2008-08-19 20:43:26 +03004074
Andrey Konovalov01165232018-12-28 00:29:37 -08004075 ret = kasan_kmalloc(s, ret, size, flags);
Andrey Ryabinin0316bec2015-02-13 14:39:42 -08004076
Eduard - Gabriel Munteanu5b882be2008-08-19 20:43:26 +03004077 return ret;
Christoph Lameter81819f02007-05-06 14:49:36 -07004078}
4079EXPORT_SYMBOL(__kmalloc);
4080
Namhyung Kim5d1f57e2010-09-29 21:02:15 +09004081#ifdef CONFIG_NUMA
Christoph Lameterf619cfe2008-03-01 13:56:40 -08004082static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
4083{
Vegard Nossumb1eeab62008-11-25 16:55:53 +01004084 struct page *page;
Catalin Marinase4f7c0b42009-07-07 10:32:59 +01004085 void *ptr = NULL;
Vlastimil Babka6a486c02019-10-06 17:58:42 -07004086 unsigned int order = get_order(size);
Christoph Lameterf619cfe2008-03-01 13:56:40 -08004087
Levin, Alexander (Sasha Levin)75f296d2017-11-15 17:35:54 -08004088 flags |= __GFP_COMP;
Vlastimil Babka6a486c02019-10-06 17:58:42 -07004089 page = alloc_pages_node(node, flags, order);
4090 if (page) {
Catalin Marinase4f7c0b42009-07-07 10:32:59 +01004091 ptr = page_address(page);
Muchun Song96403bf2021-02-24 12:04:26 -08004092 mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
4093 PAGE_SIZE << order);
Vlastimil Babka6a486c02019-10-06 17:58:42 -07004094 }
Catalin Marinase4f7c0b42009-07-07 10:32:59 +01004095
Andrey Konovalov01165232018-12-28 00:29:37 -08004096 return kmalloc_large_node_hook(ptr, size, flags);
Christoph Lameterf619cfe2008-03-01 13:56:40 -08004097}
4098
Christoph Lameter81819f02007-05-06 14:49:36 -07004099void *__kmalloc_node(size_t size, gfp_t flags, int node)
4100{
Christoph Lameteraadb4bc2007-10-16 01:24:38 -07004101 struct kmem_cache *s;
Eduard - Gabriel Munteanu5b882be2008-08-19 20:43:26 +03004102 void *ret;
Christoph Lameter81819f02007-05-06 14:49:36 -07004103
Christoph Lameter95a05b42013-01-10 19:14:19 +00004104 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
Eduard - Gabriel Munteanu5b882be2008-08-19 20:43:26 +03004105 ret = kmalloc_large_node(size, flags, node);
4106
Eduard - Gabriel Munteanuca2b84cb2009-03-23 15:12:24 +02004107 trace_kmalloc_node(_RET_IP_, ret,
4108 size, PAGE_SIZE << get_order(size),
4109 flags, node);
Eduard - Gabriel Munteanu5b882be2008-08-19 20:43:26 +03004110
4111 return ret;
4112 }
Christoph Lameteraadb4bc2007-10-16 01:24:38 -07004113
Christoph Lameter2c59dd62013-01-10 19:14:19 +00004114 s = kmalloc_slab(size, flags);
Christoph Lameteraadb4bc2007-10-16 01:24:38 -07004115
4116 if (unlikely(ZERO_OR_NULL_PTR(s)))
Christoph Lameter6cb8f912007-07-17 04:03:22 -07004117 return s;
4118
Alexander Potapenkob89fb5e2021-02-25 17:19:16 -08004119 ret = slab_alloc_node(s, flags, node, _RET_IP_, size);
Eduard - Gabriel Munteanu5b882be2008-08-19 20:43:26 +03004120
Eduard - Gabriel Munteanuca2b84cb2009-03-23 15:12:24 +02004121 trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
Eduard - Gabriel Munteanu5b882be2008-08-19 20:43:26 +03004122
Andrey Konovalov01165232018-12-28 00:29:37 -08004123 ret = kasan_kmalloc(s, ret, size, flags);
Andrey Ryabinin0316bec2015-02-13 14:39:42 -08004124
Eduard - Gabriel Munteanu5b882be2008-08-19 20:43:26 +03004125 return ret;
Christoph Lameter81819f02007-05-06 14:49:36 -07004126}
4127EXPORT_SYMBOL(__kmalloc_node);
Tobin C. Harding6dfd1b62019-05-13 17:16:09 -07004128#endif /* CONFIG_NUMA */
Christoph Lameter81819f02007-05-06 14:49:36 -07004129
Kees Cooked18adc2016-06-23 15:24:05 -07004130#ifdef CONFIG_HARDENED_USERCOPY
4131/*
Kees Cookafcc90f82018-01-10 15:17:01 -08004132 * Rejects incorrectly sized objects and objects that are to be copied
4133 * to/from userspace but do not fall entirely within the containing slab
4134 * cache's usercopy region.
Kees Cooked18adc2016-06-23 15:24:05 -07004135 *
4136 * Returns NULL if check passes, otherwise const char * to name of cache
4137 * to indicate an error.
4138 */
Kees Cookf4e6e282018-01-10 14:48:22 -08004139void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
4140 bool to_user)
Kees Cooked18adc2016-06-23 15:24:05 -07004141{
4142 struct kmem_cache *s;
Alexey Dobriyan44065b22018-04-05 16:21:20 -07004143 unsigned int offset;
Kees Cooked18adc2016-06-23 15:24:05 -07004144 size_t object_size;
Alexander Potapenkob89fb5e2021-02-25 17:19:16 -08004145 bool is_kfence = is_kfence_address(ptr);
Kees Cooked18adc2016-06-23 15:24:05 -07004146
Andrey Konovalov96fedce2019-01-08 15:23:15 -08004147 ptr = kasan_reset_tag(ptr);
4148
Kees Cooked18adc2016-06-23 15:24:05 -07004149 /* Find object and usable object size. */
4150 s = page->slab_cache;
Kees Cooked18adc2016-06-23 15:24:05 -07004151
4152 /* Reject impossible pointers. */
4153 if (ptr < page_address(page))
Kees Cookf4e6e282018-01-10 14:48:22 -08004154 usercopy_abort("SLUB object not in SLUB page?!", NULL,
4155 to_user, 0, n);
Kees Cooked18adc2016-06-23 15:24:05 -07004156
4157 /* Find offset within object. */
Alexander Potapenkob89fb5e2021-02-25 17:19:16 -08004158 if (is_kfence)
4159 offset = ptr - kfence_object_start(ptr);
4160 else
4161 offset = (ptr - page_address(page)) % s->size;
Kees Cooked18adc2016-06-23 15:24:05 -07004162
4163 /* Adjust for redzone and reject if within the redzone. */
Alexander Potapenkob89fb5e2021-02-25 17:19:16 -08004164 if (!is_kfence && kmem_cache_debug_flags(s, SLAB_RED_ZONE)) {
Kees Cooked18adc2016-06-23 15:24:05 -07004165 if (offset < s->red_left_pad)
Kees Cookf4e6e282018-01-10 14:48:22 -08004166 usercopy_abort("SLUB object in left red zone",
4167 s->name, to_user, offset, n);
Kees Cooked18adc2016-06-23 15:24:05 -07004168 offset -= s->red_left_pad;
4169 }
4170
Kees Cookafcc90f82018-01-10 15:17:01 -08004171 /* Allow address range falling entirely within usercopy region. */
4172 if (offset >= s->useroffset &&
4173 offset - s->useroffset <= s->usersize &&
4174 n <= s->useroffset - offset + s->usersize)
Kees Cookf4e6e282018-01-10 14:48:22 -08004175 return;
Kees Cooked18adc2016-06-23 15:24:05 -07004176
Kees Cookafcc90f82018-01-10 15:17:01 -08004177 /*
4178 * If the copy is still within the allocated object, produce
4179 * a warning instead of rejecting the copy. This is intended
4180 * to be a temporary method to find any missing usercopy
4181 * whitelists.
4182 */
4183 object_size = slab_ksize(s);
Kees Cook2d891fb2017-11-30 13:04:32 -08004184 if (usercopy_fallback &&
4185 offset <= object_size && n <= object_size - offset) {
Kees Cookafcc90f82018-01-10 15:17:01 -08004186 usercopy_warn("SLUB object", s->name, to_user, offset, n);
4187 return;
4188 }
4189
Kees Cookf4e6e282018-01-10 14:48:22 -08004190 usercopy_abort("SLUB object", s->name, to_user, offset, n);
Kees Cooked18adc2016-06-23 15:24:05 -07004191}
4192#endif /* CONFIG_HARDENED_USERCOPY */
4193
Marco Elver10d1f8c2019-07-11 20:54:14 -07004194size_t __ksize(const void *object)
Christoph Lameter81819f02007-05-06 14:49:36 -07004195{
Christoph Lameter272c1d22007-06-08 13:46:49 -07004196 struct page *page;
Christoph Lameter81819f02007-05-06 14:49:36 -07004197
Christoph Lameteref8b4522007-10-16 01:24:46 -07004198 if (unlikely(object == ZERO_SIZE_PTR))
Christoph Lameter272c1d22007-06-08 13:46:49 -07004199 return 0;
4200
Vegard Nossum294a80a2007-12-04 23:45:30 -08004201 page = virt_to_head_page(object);
Vegard Nossum294a80a2007-12-04 23:45:30 -08004202
Pekka Enberg76994412008-05-22 19:22:25 +03004203 if (unlikely(!PageSlab(page))) {
4204 WARN_ON(!PageCompound(page));
Matthew Wilcox (Oracle)a50b8542019-09-23 15:34:25 -07004205 return page_size(page);
Pekka Enberg76994412008-05-22 19:22:25 +03004206 }
Christoph Lameter81819f02007-05-06 14:49:36 -07004207
Glauber Costa1b4f59e32012-10-22 18:05:36 +04004208 return slab_ksize(page->slab_cache);
Christoph Lameter81819f02007-05-06 14:49:36 -07004209}
Marco Elver10d1f8c2019-07-11 20:54:14 -07004210EXPORT_SYMBOL(__ksize);
Christoph Lameter81819f02007-05-06 14:49:36 -07004211
4212void kfree(const void *x)
4213{
Christoph Lameter81819f02007-05-06 14:49:36 -07004214 struct page *page;
Christoph Lameter5bb983b2008-02-07 17:47:41 -08004215 void *object = (void *)x;
Christoph Lameter81819f02007-05-06 14:49:36 -07004216
Pekka Enberg2121db72009-03-25 11:05:57 +02004217 trace_kfree(_RET_IP_, x);
4218
Satyam Sharma2408c552007-10-16 01:24:44 -07004219 if (unlikely(ZERO_OR_NULL_PTR(x)))
Christoph Lameter81819f02007-05-06 14:49:36 -07004220 return;
4221
Christoph Lameterb49af682007-05-06 14:49:41 -07004222 page = virt_to_head_page(x);
Christoph Lameteraadb4bc2007-10-16 01:24:38 -07004223 if (unlikely(!PageSlab(page))) {
Vlastimil Babka6a486c02019-10-06 17:58:42 -07004224 unsigned int order = compound_order(page);
4225
Christoph Lameter09375022008-05-28 10:32:22 -07004226 BUG_ON(!PageCompound(page));
Dmitry Vyukov47adccc2018-02-06 15:36:23 -08004227 kfree_hook(object);
Muchun Song96403bf2021-02-24 12:04:26 -08004228 mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
4229 -(PAGE_SIZE << order));
Vlastimil Babka6a486c02019-10-06 17:58:42 -07004230 __free_pages(page, order);
Christoph Lameteraadb4bc2007-10-16 01:24:38 -07004231 return;
4232 }
Jesper Dangaard Brouer81084652015-11-20 15:57:46 -08004233 slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
Christoph Lameter81819f02007-05-06 14:49:36 -07004234}
4235EXPORT_SYMBOL(kfree);
4236
Vladimir Davydov832f37f2015-02-12 14:59:41 -08004237#define SHRINK_PROMOTE_MAX 32
4238
Christoph Lameter2086d262007-05-06 14:49:46 -07004239/*
Vladimir Davydov832f37f2015-02-12 14:59:41 -08004240 * kmem_cache_shrink discards empty slabs and promotes the slabs filled
4241 * up most to the head of the partial lists. New allocations will then
4242 * fill those up and thus they can be removed from the partial lists.
Christoph Lameter672bba32007-05-09 02:32:39 -07004243 *
4244 * The slabs with the least items are placed last. This results in them
4245 * being allocated from last increasing the chance that the last objects
4246 * are freed in them.
Christoph Lameter2086d262007-05-06 14:49:46 -07004247 */
Tejun Heoc9fc5862017-02-22 15:41:27 -08004248int __kmem_cache_shrink(struct kmem_cache *s)
Christoph Lameter2086d262007-05-06 14:49:46 -07004249{
4250 int node;
4251 int i;
4252 struct kmem_cache_node *n;
4253 struct page *page;
4254 struct page *t;
Vladimir Davydov832f37f2015-02-12 14:59:41 -08004255 struct list_head discard;
4256 struct list_head promote[SHRINK_PROMOTE_MAX];
Christoph Lameter2086d262007-05-06 14:49:46 -07004257 unsigned long flags;
Vladimir Davydovce3712d2015-02-12 14:59:44 -08004258 int ret = 0;
Christoph Lameter2086d262007-05-06 14:49:46 -07004259
Christoph Lameter2086d262007-05-06 14:49:46 -07004260 flush_all(s);
Christoph Lameterfa45dc22014-08-06 16:04:09 -07004261 for_each_kmem_cache_node(s, node, n) {
Vladimir Davydov832f37f2015-02-12 14:59:41 -08004262 INIT_LIST_HEAD(&discard);
4263 for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
4264 INIT_LIST_HEAD(promote + i);
Christoph Lameter2086d262007-05-06 14:49:46 -07004265
4266 spin_lock_irqsave(&n->list_lock, flags);
4267
4268 /*
Vladimir Davydov832f37f2015-02-12 14:59:41 -08004269 * Build lists of slabs to discard or promote.
Christoph Lameter2086d262007-05-06 14:49:46 -07004270 *
Christoph Lameter672bba32007-05-09 02:32:39 -07004271 * Note that concurrent frees may occur while we hold the
4272 * list_lock. page->inuse here is the upper limit.
Christoph Lameter2086d262007-05-06 14:49:46 -07004273 */
Tobin C. Harding916ac052019-05-13 17:16:12 -07004274 list_for_each_entry_safe(page, t, &n->partial, slab_list) {
Vladimir Davydov832f37f2015-02-12 14:59:41 -08004275 int free = page->objects - page->inuse;
4276
4277 /* Do not reread page->inuse */
4278 barrier();
4279
4280 /* We do not keep full slabs on the list */
4281 BUG_ON(free <= 0);
4282
4283 if (free == page->objects) {
Tobin C. Harding916ac052019-05-13 17:16:12 -07004284 list_move(&page->slab_list, &discard);
Christoph Lameter69cb8e62011-08-09 16:12:22 -05004285 n->nr_partial--;
Vladimir Davydov832f37f2015-02-12 14:59:41 -08004286 } else if (free <= SHRINK_PROMOTE_MAX)
Tobin C. Harding916ac052019-05-13 17:16:12 -07004287 list_move(&page->slab_list, promote + free - 1);
Christoph Lameter2086d262007-05-06 14:49:46 -07004288 }
4289
Christoph Lameter2086d262007-05-06 14:49:46 -07004290 /*
Vladimir Davydov832f37f2015-02-12 14:59:41 -08004291 * Promote the slabs filled up most to the head of the
4292 * partial list.
Christoph Lameter2086d262007-05-06 14:49:46 -07004293 */
Vladimir Davydov832f37f2015-02-12 14:59:41 -08004294 for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
4295 list_splice(promote + i, &n->partial);
Christoph Lameter2086d262007-05-06 14:49:46 -07004296
Christoph Lameter2086d262007-05-06 14:49:46 -07004297 spin_unlock_irqrestore(&n->list_lock, flags);
Christoph Lameter69cb8e62011-08-09 16:12:22 -05004298
4299 /* Release empty slabs */
Tobin C. Harding916ac052019-05-13 17:16:12 -07004300 list_for_each_entry_safe(page, t, &discard, slab_list)
Christoph Lameter69cb8e62011-08-09 16:12:22 -05004301 discard_slab(s, page);
Vladimir Davydovce3712d2015-02-12 14:59:44 -08004302
4303 if (slabs_node(s, node))
4304 ret = 1;
Christoph Lameter2086d262007-05-06 14:49:46 -07004305 }
4306
Vladimir Davydovce3712d2015-02-12 14:59:44 -08004307 return ret;
Christoph Lameter2086d262007-05-06 14:49:46 -07004308}
Christoph Lameter2086d262007-05-06 14:49:46 -07004309
Yasunori Gotob9049e22007-10-21 16:41:37 -07004310static int slab_mem_going_offline_callback(void *arg)
4311{
4312 struct kmem_cache *s;
4313
Christoph Lameter18004c52012-07-06 15:25:12 -05004314 mutex_lock(&slab_mutex);
Yasunori Gotob9049e22007-10-21 16:41:37 -07004315 list_for_each_entry(s, &slab_caches, list)
Tejun Heoc9fc5862017-02-22 15:41:27 -08004316 __kmem_cache_shrink(s);
Christoph Lameter18004c52012-07-06 15:25:12 -05004317 mutex_unlock(&slab_mutex);
Yasunori Gotob9049e22007-10-21 16:41:37 -07004318
4319 return 0;
4320}
4321
4322static void slab_mem_offline_callback(void *arg)
4323{
Yasunori Gotob9049e22007-10-21 16:41:37 -07004324 struct memory_notify *marg = arg;
4325 int offline_node;
4326
Lai Jiangshanb9d5ab22012-12-11 16:01:05 -08004327 offline_node = marg->status_change_nid_normal;
Yasunori Gotob9049e22007-10-21 16:41:37 -07004328
4329 /*
4330 * If the node still has available memory. we need kmem_cache_node
4331 * for it yet.
4332 */
4333 if (offline_node < 0)
4334 return;
4335
Christoph Lameter18004c52012-07-06 15:25:12 -05004336 mutex_lock(&slab_mutex);
Vlastimil Babka7e1fa932021-02-24 12:01:12 -08004337 node_clear(offline_node, slab_nodes);
Vlastimil Babka666716f2021-02-24 12:01:08 -08004338 /*
4339 * We no longer free kmem_cache_node structures here, as it would be
4340 * racy with all get_node() users, and infeasible to protect them with
4341 * slab_mutex.
4342 */
Christoph Lameter18004c52012-07-06 15:25:12 -05004343 mutex_unlock(&slab_mutex);
Yasunori Gotob9049e22007-10-21 16:41:37 -07004344}
4345
4346static int slab_mem_going_online_callback(void *arg)
4347{
4348 struct kmem_cache_node *n;
4349 struct kmem_cache *s;
4350 struct memory_notify *marg = arg;
Lai Jiangshanb9d5ab22012-12-11 16:01:05 -08004351 int nid = marg->status_change_nid_normal;
Yasunori Gotob9049e22007-10-21 16:41:37 -07004352 int ret = 0;
4353
4354 /*
4355 * If the node's memory is already available, then kmem_cache_node is
4356 * already created. Nothing to do.
4357 */
4358 if (nid < 0)
4359 return 0;
4360
4361 /*
Christoph Lameter0121c6192008-04-29 16:11:12 -07004362 * We are bringing a node online. No memory is available yet. We must
Yasunori Gotob9049e22007-10-21 16:41:37 -07004363 * allocate a kmem_cache_node structure in order to bring the node
4364 * online.
4365 */
Christoph Lameter18004c52012-07-06 15:25:12 -05004366 mutex_lock(&slab_mutex);
Yasunori Gotob9049e22007-10-21 16:41:37 -07004367 list_for_each_entry(s, &slab_caches, list) {
4368 /*
Vlastimil Babka666716f2021-02-24 12:01:08 -08004369 * The structure may already exist if the node was previously
4370 * onlined and offlined.
4371 */
4372 if (get_node(s, nid))
4373 continue;
4374 /*
Yasunori Gotob9049e22007-10-21 16:41:37 -07004375 * XXX: kmem_cache_alloc_node will fallback to other nodes
4376 * since memory is not yet available from the node that
4377 * is brought up.
4378 */
Christoph Lameter8de66a02010-08-25 14:51:14 -05004379 n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
Yasunori Gotob9049e22007-10-21 16:41:37 -07004380 if (!n) {
4381 ret = -ENOMEM;
4382 goto out;
4383 }
Joonsoo Kim40534972012-05-11 00:50:47 +09004384 init_kmem_cache_node(n);
Yasunori Gotob9049e22007-10-21 16:41:37 -07004385 s->node[nid] = n;
4386 }
Vlastimil Babka7e1fa932021-02-24 12:01:12 -08004387 /*
4388 * Any cache created after this point will also have kmem_cache_node
4389 * initialized for the new node.
4390 */
4391 node_set(nid, slab_nodes);
Yasunori Gotob9049e22007-10-21 16:41:37 -07004392out:
Christoph Lameter18004c52012-07-06 15:25:12 -05004393 mutex_unlock(&slab_mutex);
Yasunori Gotob9049e22007-10-21 16:41:37 -07004394 return ret;
4395}
4396
4397static int slab_memory_callback(struct notifier_block *self,
4398 unsigned long action, void *arg)
4399{
4400 int ret = 0;
4401
4402 switch (action) {
4403 case MEM_GOING_ONLINE:
4404 ret = slab_mem_going_online_callback(arg);
4405 break;
4406 case MEM_GOING_OFFLINE:
4407 ret = slab_mem_going_offline_callback(arg);
4408 break;
4409 case MEM_OFFLINE:
4410 case MEM_CANCEL_ONLINE:
4411 slab_mem_offline_callback(arg);
4412 break;
4413 case MEM_ONLINE:
4414 case MEM_CANCEL_OFFLINE:
4415 break;
4416 }
KAMEZAWA Hiroyukidc19f9d2008-12-01 13:13:48 -08004417 if (ret)
4418 ret = notifier_from_errno(ret);
4419 else
4420 ret = NOTIFY_OK;
Yasunori Gotob9049e22007-10-21 16:41:37 -07004421 return ret;
4422}
4423
Andrew Morton3ac38fa2013-04-29 15:08:06 -07004424static struct notifier_block slab_memory_callback_nb = {
4425 .notifier_call = slab_memory_callback,
4426 .priority = SLAB_CALLBACK_PRI,
4427};
Yasunori Gotob9049e22007-10-21 16:41:37 -07004428
Christoph Lameter81819f02007-05-06 14:49:36 -07004429/********************************************************************
4430 * Basic setup of slabs
4431 *******************************************************************/
4432
Christoph Lameter51df1142010-08-20 12:37:15 -05004433/*
4434 * Used for early kmem_cache structures that were allocated using
Christoph Lameterdffb4d62012-11-28 16:23:07 +00004435 * the page allocator. Allocate them properly then fix up the pointers
4436 * that may be pointing to the wrong kmem_cache structure.
Christoph Lameter51df1142010-08-20 12:37:15 -05004437 */
4438
Christoph Lameterdffb4d62012-11-28 16:23:07 +00004439static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
Christoph Lameter51df1142010-08-20 12:37:15 -05004440{
4441 int node;
Christoph Lameterdffb4d62012-11-28 16:23:07 +00004442 struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
Christoph Lameterfa45dc22014-08-06 16:04:09 -07004443 struct kmem_cache_node *n;
Christoph Lameter51df1142010-08-20 12:37:15 -05004444
Christoph Lameterdffb4d62012-11-28 16:23:07 +00004445 memcpy(s, static_cache, kmem_cache->object_size);
Christoph Lameter51df1142010-08-20 12:37:15 -05004446
Glauber Costa7d557b32013-02-22 20:20:00 +04004447 /*
4448 * This runs very early, and only the boot processor is supposed to be
4449 * up. Even if it weren't true, IRQs are not up so we couldn't fire
4450 * IPIs around.
4451 */
4452 __flush_cpu_slab(s, smp_processor_id());
Christoph Lameterfa45dc22014-08-06 16:04:09 -07004453 for_each_kmem_cache_node(s, node, n) {
Christoph Lameter51df1142010-08-20 12:37:15 -05004454 struct page *p;
4455
Tobin C. Harding916ac052019-05-13 17:16:12 -07004456 list_for_each_entry(p, &n->partial, slab_list)
Christoph Lameterfa45dc22014-08-06 16:04:09 -07004457 p->slab_cache = s;
Christoph Lameter51df1142010-08-20 12:37:15 -05004458
Li Zefan607bf322011-04-12 15:22:26 +08004459#ifdef CONFIG_SLUB_DEBUG
Tobin C. Harding916ac052019-05-13 17:16:12 -07004460 list_for_each_entry(p, &n->full, slab_list)
Christoph Lameterfa45dc22014-08-06 16:04:09 -07004461 p->slab_cache = s;
Christoph Lameter51df1142010-08-20 12:37:15 -05004462#endif
Christoph Lameter51df1142010-08-20 12:37:15 -05004463 }
Christoph Lameterdffb4d62012-11-28 16:23:07 +00004464 list_add(&s->list, &slab_caches);
4465 return s;
Christoph Lameter51df1142010-08-20 12:37:15 -05004466}
4467
Christoph Lameter81819f02007-05-06 14:49:36 -07004468void __init kmem_cache_init(void)
4469{
Christoph Lameterdffb4d62012-11-28 16:23:07 +00004470 static __initdata struct kmem_cache boot_kmem_cache,
4471 boot_kmem_cache_node;
Vlastimil Babka7e1fa932021-02-24 12:01:12 -08004472 int node;
Christoph Lameter51df1142010-08-20 12:37:15 -05004473
Stanislaw Gruszkafc8d8622012-01-10 15:07:32 -08004474 if (debug_guardpage_minorder())
4475 slub_max_order = 0;
4476
Christoph Lameterdffb4d62012-11-28 16:23:07 +00004477 kmem_cache_node = &boot_kmem_cache_node;
4478 kmem_cache = &boot_kmem_cache;
Christoph Lameter51df1142010-08-20 12:37:15 -05004479
Vlastimil Babka7e1fa932021-02-24 12:01:12 -08004480 /*
4481 * Initialize the nodemask for which we will allocate per node
4482 * structures. Here we don't need taking slab_mutex yet.
4483 */
4484 for_each_node_state(node, N_NORMAL_MEMORY)
4485 node_set(node, slab_nodes);
4486
Christoph Lameterdffb4d62012-11-28 16:23:07 +00004487 create_boot_cache(kmem_cache_node, "kmem_cache_node",
David Windsor8eb82842017-06-10 22:50:28 -04004488 sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, 0, 0);
Yasunori Gotob9049e22007-10-21 16:41:37 -07004489
Andrew Morton3ac38fa2013-04-29 15:08:06 -07004490 register_hotmemory_notifier(&slab_memory_callback_nb);
Christoph Lameter81819f02007-05-06 14:49:36 -07004491
4492 /* Able to allocate the per node structures */
4493 slab_state = PARTIAL;
4494
Christoph Lameterdffb4d62012-11-28 16:23:07 +00004495 create_boot_cache(kmem_cache, "kmem_cache",
4496 offsetof(struct kmem_cache, node) +
4497 nr_node_ids * sizeof(struct kmem_cache_node *),
David Windsor8eb82842017-06-10 22:50:28 -04004498 SLAB_HWCACHE_ALIGN, 0, 0);
Christoph Lameter8a13a4c2012-09-04 23:18:33 +00004499
Christoph Lameterdffb4d62012-11-28 16:23:07 +00004500 kmem_cache = bootstrap(&boot_kmem_cache);
Christoph Lameterdffb4d62012-11-28 16:23:07 +00004501 kmem_cache_node = bootstrap(&boot_kmem_cache_node);
Christoph Lameter51df1142010-08-20 12:37:15 -05004502
4503 /* Now we can use the kmem_cache to allocate kmalloc slabs */
Daniel Sanders34cc6992015-06-24 16:55:57 -07004504 setup_kmalloc_cache_index_table();
Christoph Lameterf97d5f62013-01-10 19:12:17 +00004505 create_kmalloc_caches(0);
Christoph Lameter81819f02007-05-06 14:49:36 -07004506
Thomas Garnier210e7a42016-07-26 15:21:59 -07004507 /* Setup random freelists for each cache */
4508 init_freelist_randomization();
4509
Sebastian Andrzej Siewiora96a87b2016-08-18 14:57:19 +02004510 cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL,
4511 slub_cpu_dead);
Christoph Lameter81819f02007-05-06 14:49:36 -07004512
Alexey Dobriyanb9726c22019-03-05 15:48:26 -08004513 pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%u\n",
Christoph Lameterf97d5f62013-01-10 19:12:17 +00004514 cache_line_size(),
Christoph Lameter81819f02007-05-06 14:49:36 -07004515 slub_min_order, slub_max_order, slub_min_objects,
4516 nr_cpu_ids, nr_node_ids);
4517}
4518
Pekka Enberg7e85ee02009-06-12 14:03:06 +03004519void __init kmem_cache_init_late(void)
4520{
Pekka Enberg7e85ee02009-06-12 14:03:06 +03004521}
4522
Glauber Costa2633d7a2012-12-18 14:22:34 -08004523struct kmem_cache *
Alexey Dobriyanf4957d52018-04-05 16:20:37 -07004524__kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
Alexey Dobriyand50112e2017-11-15 17:32:18 -08004525 slab_flags_t flags, void (*ctor)(void *))
Christoph Lameter81819f02007-05-06 14:49:36 -07004526{
Roman Gushchin10befea2020-08-06 23:21:27 -07004527 struct kmem_cache *s;
Christoph Lameter81819f02007-05-06 14:49:36 -07004528
Vladimir Davydova44cb9442014-04-07 15:39:23 -07004529 s = find_mergeable(size, align, flags, name, ctor);
Christoph Lameter81819f02007-05-06 14:49:36 -07004530 if (s) {
4531 s->refcount++;
Vladimir Davydov84d0ddd2014-04-07 15:39:29 -07004532
Christoph Lameter81819f02007-05-06 14:49:36 -07004533 /*
4534 * Adjust the object sizes so that we clear
4535 * the complete object on kzalloc.
4536 */
Alexey Dobriyan1b473f22018-04-05 16:21:17 -07004537 s->object_size = max(s->object_size, size);
Alexey Dobriyan52ee6d72018-04-05 16:21:06 -07004538 s->inuse = max(s->inuse, ALIGN(size, sizeof(void *)));
Christoph Lameter6446faa2008-02-15 23:45:26 -08004539
David Rientjes7b8f3b62008-12-17 22:09:46 -08004540 if (sysfs_slab_alias(s, name)) {
David Rientjes7b8f3b62008-12-17 22:09:46 -08004541 s->refcount--;
Christoph Lametercbb79692012-09-05 00:18:32 +00004542 s = NULL;
David Rientjes7b8f3b62008-12-17 22:09:46 -08004543 }
Christoph Lametera0e1d1b2007-07-17 04:03:31 -07004544 }
Christoph Lameter6446faa2008-02-15 23:45:26 -08004545
Christoph Lametercbb79692012-09-05 00:18:32 +00004546 return s;
4547}
Pekka Enberg84c1cf62010-09-14 23:21:12 +03004548
Alexey Dobriyand50112e2017-11-15 17:32:18 -08004549int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
Christoph Lametercbb79692012-09-05 00:18:32 +00004550{
Pekka Enbergaac3a162012-09-05 12:07:44 +03004551 int err;
Christoph Lameter20cea962012-07-06 15:25:13 -05004552
Pekka Enbergaac3a162012-09-05 12:07:44 +03004553 err = kmem_cache_open(s, flags);
4554 if (err)
4555 return err;
Christoph Lameter20cea962012-07-06 15:25:13 -05004556
Christoph Lameter45530c42012-11-28 16:23:07 +00004557 /* Mutex is not taken during early boot */
4558 if (slab_state <= UP)
4559 return 0;
4560
Pekka Enbergaac3a162012-09-05 12:07:44 +03004561 err = sysfs_slab_add(s);
Pekka Enbergaac3a162012-09-05 12:07:44 +03004562 if (err)
Dmitry Safonov52b4b952016-02-17 13:11:37 -08004563 __kmem_cache_release(s);
Pekka Enbergaac3a162012-09-05 12:07:44 +03004564
4565 return err;
Christoph Lameter81819f02007-05-06 14:49:36 -07004566}
Christoph Lameter81819f02007-05-06 14:49:36 -07004567
Eduard - Gabriel Munteanuce71e272008-08-19 20:43:25 +03004568void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
Christoph Lameter81819f02007-05-06 14:49:36 -07004569{
Christoph Lameteraadb4bc2007-10-16 01:24:38 -07004570 struct kmem_cache *s;
Eduard - Gabriel Munteanu94b528d2008-08-24 20:49:35 +03004571 void *ret;
Christoph Lameteraadb4bc2007-10-16 01:24:38 -07004572
Christoph Lameter95a05b42013-01-10 19:14:19 +00004573 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
Pekka Enbergeada35e2008-02-11 22:47:46 +02004574 return kmalloc_large(size, gfpflags);
4575
Christoph Lameter2c59dd62013-01-10 19:14:19 +00004576 s = kmalloc_slab(size, gfpflags);
Christoph Lameter81819f02007-05-06 14:49:36 -07004577
Satyam Sharma2408c552007-10-16 01:24:44 -07004578 if (unlikely(ZERO_OR_NULL_PTR(s)))
Christoph Lameter6cb8f912007-07-17 04:03:22 -07004579 return s;
Christoph Lameter81819f02007-05-06 14:49:36 -07004580
Alexander Potapenkob89fb5e2021-02-25 17:19:16 -08004581 ret = slab_alloc(s, gfpflags, caller, size);
Eduard - Gabriel Munteanu94b528d2008-08-24 20:49:35 +03004582
Lucas De Marchi25985ed2011-03-30 22:57:33 -03004583 /* Honor the call site pointer we received. */
Eduard - Gabriel Munteanuca2b84cb2009-03-23 15:12:24 +02004584 trace_kmalloc(caller, ret, size, s->size, gfpflags);
Eduard - Gabriel Munteanu94b528d2008-08-24 20:49:35 +03004585
4586 return ret;
Christoph Lameter81819f02007-05-06 14:49:36 -07004587}
Daniel Vetterfd7cb572020-03-23 15:49:00 +01004588EXPORT_SYMBOL(__kmalloc_track_caller);
Christoph Lameter81819f02007-05-06 14:49:36 -07004589
Namhyung Kim5d1f57e2010-09-29 21:02:15 +09004590#ifdef CONFIG_NUMA
Christoph Lameter81819f02007-05-06 14:49:36 -07004591void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
Eduard - Gabriel Munteanuce71e272008-08-19 20:43:25 +03004592 int node, unsigned long caller)
Christoph Lameter81819f02007-05-06 14:49:36 -07004593{
Christoph Lameteraadb4bc2007-10-16 01:24:38 -07004594 struct kmem_cache *s;
Eduard - Gabriel Munteanu94b528d2008-08-24 20:49:35 +03004595 void *ret;
Christoph Lameteraadb4bc2007-10-16 01:24:38 -07004596
Christoph Lameter95a05b42013-01-10 19:14:19 +00004597 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
Xiaotian Fengd3e14aa2010-04-08 17:26:44 +08004598 ret = kmalloc_large_node(size, gfpflags, node);
4599
4600 trace_kmalloc_node(caller, ret,
4601 size, PAGE_SIZE << get_order(size),
4602 gfpflags, node);
4603
4604 return ret;
4605 }
Pekka Enbergeada35e2008-02-11 22:47:46 +02004606
Christoph Lameter2c59dd62013-01-10 19:14:19 +00004607 s = kmalloc_slab(size, gfpflags);
Christoph Lameter81819f02007-05-06 14:49:36 -07004608
Satyam Sharma2408c552007-10-16 01:24:44 -07004609 if (unlikely(ZERO_OR_NULL_PTR(s)))
Christoph Lameter6cb8f912007-07-17 04:03:22 -07004610 return s;
Christoph Lameter81819f02007-05-06 14:49:36 -07004611
Alexander Potapenkob89fb5e2021-02-25 17:19:16 -08004612 ret = slab_alloc_node(s, gfpflags, node, caller, size);
Eduard - Gabriel Munteanu94b528d2008-08-24 20:49:35 +03004613
Lucas De Marchi25985ed2011-03-30 22:57:33 -03004614 /* Honor the call site pointer we received. */
Eduard - Gabriel Munteanuca2b84cb2009-03-23 15:12:24 +02004615 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
Eduard - Gabriel Munteanu94b528d2008-08-24 20:49:35 +03004616
4617 return ret;
Christoph Lameter81819f02007-05-06 14:49:36 -07004618}
Daniel Vetterfd7cb572020-03-23 15:49:00 +01004619EXPORT_SYMBOL(__kmalloc_node_track_caller);
Namhyung Kim5d1f57e2010-09-29 21:02:15 +09004620#endif
Christoph Lameter81819f02007-05-06 14:49:36 -07004621
Christoph Lameterab4d5ed2010-10-05 13:57:26 -05004622#ifdef CONFIG_SYSFS
Christoph Lameter205ab992008-04-14 19:11:40 +03004623static int count_inuse(struct page *page)
4624{
4625 return page->inuse;
4626}
4627
4628static int count_total(struct page *page)
4629{
4630 return page->objects;
4631}
Christoph Lameterab4d5ed2010-10-05 13:57:26 -05004632#endif
Christoph Lameter205ab992008-04-14 19:11:40 +03004633
Christoph Lameterab4d5ed2010-10-05 13:57:26 -05004634#ifdef CONFIG_SLUB_DEBUG
Yu Zhao90e9f6a2020-01-30 22:11:57 -08004635static void validate_slab(struct kmem_cache *s, struct page *page)
Christoph Lameter53e15af2007-05-06 14:49:43 -07004636{
4637 void *p;
Christoph Lametera973e9d2008-03-01 13:40:44 -08004638 void *addr = page_address(page);
Yu Zhao90e9f6a2020-01-30 22:11:57 -08004639 unsigned long *map;
4640
4641 slab_lock(page);
Christoph Lameter53e15af2007-05-06 14:49:43 -07004642
Yu Zhaodd98afd2019-11-30 17:49:37 -08004643 if (!check_slab(s, page) || !on_freelist(s, page, NULL))
Yu Zhao90e9f6a2020-01-30 22:11:57 -08004644 goto unlock;
Christoph Lameter53e15af2007-05-06 14:49:43 -07004645
4646 /* Now we know that a valid freelist exists */
Yu Zhao90e9f6a2020-01-30 22:11:57 -08004647 map = get_map(s, page);
Christoph Lameter5f80b132011-04-15 14:48:13 -05004648 for_each_object(p, s, addr, page->objects) {
Roman Gushchin4138fdf2020-08-06 23:20:42 -07004649 u8 val = test_bit(__obj_to_index(s, addr, p), map) ?
Yu Zhaodd98afd2019-11-30 17:49:37 -08004650 SLUB_RED_INACTIVE : SLUB_RED_ACTIVE;
Christoph Lameter53e15af2007-05-06 14:49:43 -07004651
Yu Zhaodd98afd2019-11-30 17:49:37 -08004652 if (!check_object(s, page, p, val))
4653 break;
4654 }
Yu Zhao90e9f6a2020-01-30 22:11:57 -08004655 put_map(map);
4656unlock:
Christoph Lameter881db7f2011-06-01 12:25:53 -05004657 slab_unlock(page);
Christoph Lameter53e15af2007-05-06 14:49:43 -07004658}
4659
Christoph Lameter434e2452007-07-17 04:03:30 -07004660static int validate_slab_node(struct kmem_cache *s,
Yu Zhao90e9f6a2020-01-30 22:11:57 -08004661 struct kmem_cache_node *n)
Christoph Lameter53e15af2007-05-06 14:49:43 -07004662{
4663 unsigned long count = 0;
4664 struct page *page;
4665 unsigned long flags;
4666
4667 spin_lock_irqsave(&n->list_lock, flags);
4668
Tobin C. Harding916ac052019-05-13 17:16:12 -07004669 list_for_each_entry(page, &n->partial, slab_list) {
Yu Zhao90e9f6a2020-01-30 22:11:57 -08004670 validate_slab(s, page);
Christoph Lameter53e15af2007-05-06 14:49:43 -07004671 count++;
4672 }
4673 if (count != n->nr_partial)
Fabian Frederickf9f58282014-06-04 16:06:34 -07004674 pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n",
4675 s->name, count, n->nr_partial);
Christoph Lameter53e15af2007-05-06 14:49:43 -07004676
4677 if (!(s->flags & SLAB_STORE_USER))
4678 goto out;
4679
Tobin C. Harding916ac052019-05-13 17:16:12 -07004680 list_for_each_entry(page, &n->full, slab_list) {
Yu Zhao90e9f6a2020-01-30 22:11:57 -08004681 validate_slab(s, page);
Christoph Lameter53e15af2007-05-06 14:49:43 -07004682 count++;
4683 }
4684 if (count != atomic_long_read(&n->nr_slabs))
Fabian Frederickf9f58282014-06-04 16:06:34 -07004685 pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
4686 s->name, count, atomic_long_read(&n->nr_slabs));
Christoph Lameter53e15af2007-05-06 14:49:43 -07004687
4688out:
4689 spin_unlock_irqrestore(&n->list_lock, flags);
4690 return count;
4691}
4692
Christoph Lameter434e2452007-07-17 04:03:30 -07004693static long validate_slab_cache(struct kmem_cache *s)
Christoph Lameter53e15af2007-05-06 14:49:43 -07004694{
4695 int node;
4696 unsigned long count = 0;
Christoph Lameterfa45dc22014-08-06 16:04:09 -07004697 struct kmem_cache_node *n;
Christoph Lameter53e15af2007-05-06 14:49:43 -07004698
4699 flush_all(s);
Christoph Lameterfa45dc22014-08-06 16:04:09 -07004700 for_each_kmem_cache_node(s, node, n)
Yu Zhao90e9f6a2020-01-30 22:11:57 -08004701 count += validate_slab_node(s, n);
4702
Christoph Lameter53e15af2007-05-06 14:49:43 -07004703 return count;
4704}
Christoph Lameter88a420e2007-05-06 14:49:45 -07004705/*
Christoph Lameter672bba32007-05-09 02:32:39 -07004706 * Generate lists of code addresses where slabcache objects are allocated
Christoph Lameter88a420e2007-05-06 14:49:45 -07004707 * and freed.
4708 */
4709
4710struct location {
4711 unsigned long count;
Eduard - Gabriel Munteanuce71e272008-08-19 20:43:25 +03004712 unsigned long addr;
Christoph Lameter45edfa52007-05-09 02:32:45 -07004713 long long sum_time;
4714 long min_time;
4715 long max_time;
4716 long min_pid;
4717 long max_pid;
Rusty Russell174596a2009-01-01 10:12:29 +10304718 DECLARE_BITMAP(cpus, NR_CPUS);
Christoph Lameter45edfa52007-05-09 02:32:45 -07004719 nodemask_t nodes;
Christoph Lameter88a420e2007-05-06 14:49:45 -07004720};
4721
4722struct loc_track {
4723 unsigned long max;
4724 unsigned long count;
4725 struct location *loc;
4726};
4727
4728static void free_loc_track(struct loc_track *t)
4729{
4730 if (t->max)
4731 free_pages((unsigned long)t->loc,
4732 get_order(sizeof(struct location) * t->max));
4733}
4734
Christoph Lameter68dff6a2007-07-17 04:03:20 -07004735static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
Christoph Lameter88a420e2007-05-06 14:49:45 -07004736{
4737 struct location *l;
4738 int order;
4739
Christoph Lameter88a420e2007-05-06 14:49:45 -07004740 order = get_order(sizeof(struct location) * max);
4741
Christoph Lameter68dff6a2007-07-17 04:03:20 -07004742 l = (void *)__get_free_pages(flags, order);
Christoph Lameter88a420e2007-05-06 14:49:45 -07004743 if (!l)
4744 return 0;
4745
4746 if (t->count) {
4747 memcpy(l, t->loc, sizeof(struct location) * t->count);
4748 free_loc_track(t);
4749 }
4750 t->max = max;
4751 t->loc = l;
4752 return 1;
4753}
4754
4755static int add_location(struct loc_track *t, struct kmem_cache *s,
Christoph Lameter45edfa52007-05-09 02:32:45 -07004756 const struct track *track)
Christoph Lameter88a420e2007-05-06 14:49:45 -07004757{
4758 long start, end, pos;
4759 struct location *l;
Eduard - Gabriel Munteanuce71e272008-08-19 20:43:25 +03004760 unsigned long caddr;
Christoph Lameter45edfa52007-05-09 02:32:45 -07004761 unsigned long age = jiffies - track->when;
Christoph Lameter88a420e2007-05-06 14:49:45 -07004762
4763 start = -1;
4764 end = t->count;
4765
4766 for ( ; ; ) {
4767 pos = start + (end - start + 1) / 2;
4768
4769 /*
4770 * There is nothing at "end". If we end up there
4771 * we need to add something to before end.
4772 */
4773 if (pos == end)
4774 break;
4775
4776 caddr = t->loc[pos].addr;
Christoph Lameter45edfa52007-05-09 02:32:45 -07004777 if (track->addr == caddr) {
4778
4779 l = &t->loc[pos];
4780 l->count++;
4781 if (track->when) {
4782 l->sum_time += age;
4783 if (age < l->min_time)
4784 l->min_time = age;
4785 if (age > l->max_time)
4786 l->max_time = age;
4787
4788 if (track->pid < l->min_pid)
4789 l->min_pid = track->pid;
4790 if (track->pid > l->max_pid)
4791 l->max_pid = track->pid;
4792
Rusty Russell174596a2009-01-01 10:12:29 +10304793 cpumask_set_cpu(track->cpu,
4794 to_cpumask(l->cpus));
Christoph Lameter45edfa52007-05-09 02:32:45 -07004795 }
4796 node_set(page_to_nid(virt_to_page(track)), l->nodes);
Christoph Lameter88a420e2007-05-06 14:49:45 -07004797 return 1;
4798 }
4799
Christoph Lameter45edfa52007-05-09 02:32:45 -07004800 if (track->addr < caddr)
Christoph Lameter88a420e2007-05-06 14:49:45 -07004801 end = pos;
4802 else
4803 start = pos;
4804 }
4805
4806 /*
Christoph Lameter672bba32007-05-09 02:32:39 -07004807 * Not found. Insert new tracking element.
Christoph Lameter88a420e2007-05-06 14:49:45 -07004808 */
Christoph Lameter68dff6a2007-07-17 04:03:20 -07004809 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
Christoph Lameter88a420e2007-05-06 14:49:45 -07004810 return 0;
4811
4812 l = t->loc + pos;
4813 if (pos < t->count)
4814 memmove(l + 1, l,
4815 (t->count - pos) * sizeof(struct location));
4816 t->count++;
4817 l->count = 1;
Christoph Lameter45edfa52007-05-09 02:32:45 -07004818 l->addr = track->addr;
4819 l->sum_time = age;
4820 l->min_time = age;
4821 l->max_time = age;
4822 l->min_pid = track->pid;
4823 l->max_pid = track->pid;
Rusty Russell174596a2009-01-01 10:12:29 +10304824 cpumask_clear(to_cpumask(l->cpus));
4825 cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
Christoph Lameter45edfa52007-05-09 02:32:45 -07004826 nodes_clear(l->nodes);
4827 node_set(page_to_nid(virt_to_page(track)), l->nodes);
Christoph Lameter88a420e2007-05-06 14:49:45 -07004828 return 1;
4829}
4830
4831static void process_slab(struct loc_track *t, struct kmem_cache *s,
Yu Zhao90e9f6a2020-01-30 22:11:57 -08004832 struct page *page, enum track_item alloc)
Christoph Lameter88a420e2007-05-06 14:49:45 -07004833{
Christoph Lametera973e9d2008-03-01 13:40:44 -08004834 void *addr = page_address(page);
Christoph Lameter88a420e2007-05-06 14:49:45 -07004835 void *p;
Yu Zhao90e9f6a2020-01-30 22:11:57 -08004836 unsigned long *map;
Christoph Lameter88a420e2007-05-06 14:49:45 -07004837
Yu Zhao90e9f6a2020-01-30 22:11:57 -08004838 map = get_map(s, page);
Christoph Lameter224a88b2008-04-14 19:11:31 +03004839 for_each_object(p, s, addr, page->objects)
Roman Gushchin4138fdf2020-08-06 23:20:42 -07004840 if (!test_bit(__obj_to_index(s, addr, p), map))
Christoph Lameter45edfa52007-05-09 02:32:45 -07004841 add_location(t, s, get_track(s, p, alloc));
Yu Zhao90e9f6a2020-01-30 22:11:57 -08004842 put_map(map);
Christoph Lameter88a420e2007-05-06 14:49:45 -07004843}
4844
4845static int list_locations(struct kmem_cache *s, char *buf,
Joe Perchesbf16d192020-12-14 19:14:57 -08004846 enum track_item alloc)
Christoph Lameter88a420e2007-05-06 14:49:45 -07004847{
Harvey Harrisone374d482008-01-31 15:20:50 -08004848 int len = 0;
Christoph Lameter88a420e2007-05-06 14:49:45 -07004849 unsigned long i;
Christoph Lameter68dff6a2007-07-17 04:03:20 -07004850 struct loc_track t = { 0, 0, NULL };
Christoph Lameter88a420e2007-05-06 14:49:45 -07004851 int node;
Christoph Lameterfa45dc22014-08-06 16:04:09 -07004852 struct kmem_cache_node *n;
Christoph Lameter88a420e2007-05-06 14:49:45 -07004853
Yu Zhao90e9f6a2020-01-30 22:11:57 -08004854 if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
4855 GFP_KERNEL)) {
Joe Perchesbf16d192020-12-14 19:14:57 -08004856 return sysfs_emit(buf, "Out of memory\n");
Eric Dumazetbbd7d572010-03-24 22:25:47 +01004857 }
Christoph Lameter88a420e2007-05-06 14:49:45 -07004858 /* Push back cpu slabs */
4859 flush_all(s);
4860
Christoph Lameterfa45dc22014-08-06 16:04:09 -07004861 for_each_kmem_cache_node(s, node, n) {
Christoph Lameter88a420e2007-05-06 14:49:45 -07004862 unsigned long flags;
4863 struct page *page;
4864
Christoph Lameter9e869432007-08-22 14:01:56 -07004865 if (!atomic_long_read(&n->nr_slabs))
Christoph Lameter88a420e2007-05-06 14:49:45 -07004866 continue;
4867
4868 spin_lock_irqsave(&n->list_lock, flags);
Tobin C. Harding916ac052019-05-13 17:16:12 -07004869 list_for_each_entry(page, &n->partial, slab_list)
Yu Zhao90e9f6a2020-01-30 22:11:57 -08004870 process_slab(&t, s, page, alloc);
Tobin C. Harding916ac052019-05-13 17:16:12 -07004871 list_for_each_entry(page, &n->full, slab_list)
Yu Zhao90e9f6a2020-01-30 22:11:57 -08004872 process_slab(&t, s, page, alloc);
Christoph Lameter88a420e2007-05-06 14:49:45 -07004873 spin_unlock_irqrestore(&n->list_lock, flags);
4874 }
4875
4876 for (i = 0; i < t.count; i++) {
Christoph Lameter45edfa52007-05-09 02:32:45 -07004877 struct location *l = &t.loc[i];
Christoph Lameter88a420e2007-05-06 14:49:45 -07004878
Joe Perchesbf16d192020-12-14 19:14:57 -08004879 len += sysfs_emit_at(buf, len, "%7ld ", l->count);
Christoph Lameter45edfa52007-05-09 02:32:45 -07004880
4881 if (l->addr)
Joe Perchesbf16d192020-12-14 19:14:57 -08004882 len += sysfs_emit_at(buf, len, "%pS", (void *)l->addr);
Christoph Lameter88a420e2007-05-06 14:49:45 -07004883 else
Joe Perchesbf16d192020-12-14 19:14:57 -08004884 len += sysfs_emit_at(buf, len, "<not-available>");
Christoph Lameter45edfa52007-05-09 02:32:45 -07004885
Joe Perchesbf16d192020-12-14 19:14:57 -08004886 if (l->sum_time != l->min_time)
4887 len += sysfs_emit_at(buf, len, " age=%ld/%ld/%ld",
4888 l->min_time,
4889 (long)div_u64(l->sum_time,
4890 l->count),
4891 l->max_time);
4892 else
4893 len += sysfs_emit_at(buf, len, " age=%ld", l->min_time);
Christoph Lameter45edfa52007-05-09 02:32:45 -07004894
4895 if (l->min_pid != l->max_pid)
Joe Perchesbf16d192020-12-14 19:14:57 -08004896 len += sysfs_emit_at(buf, len, " pid=%ld-%ld",
4897 l->min_pid, l->max_pid);
Christoph Lameter45edfa52007-05-09 02:32:45 -07004898 else
Joe Perchesbf16d192020-12-14 19:14:57 -08004899 len += sysfs_emit_at(buf, len, " pid=%ld",
4900 l->min_pid);
Christoph Lameter45edfa52007-05-09 02:32:45 -07004901
Rusty Russell174596a2009-01-01 10:12:29 +10304902 if (num_online_cpus() > 1 &&
Joe Perchesbf16d192020-12-14 19:14:57 -08004903 !cpumask_empty(to_cpumask(l->cpus)))
4904 len += sysfs_emit_at(buf, len, " cpus=%*pbl",
4905 cpumask_pr_args(to_cpumask(l->cpus)));
Christoph Lameter45edfa52007-05-09 02:32:45 -07004906
Joe Perchesbf16d192020-12-14 19:14:57 -08004907 if (nr_online_nodes > 1 && !nodes_empty(l->nodes))
4908 len += sysfs_emit_at(buf, len, " nodes=%*pbl",
4909 nodemask_pr_args(&l->nodes));
Christoph Lameter45edfa52007-05-09 02:32:45 -07004910
Joe Perchesbf16d192020-12-14 19:14:57 -08004911 len += sysfs_emit_at(buf, len, "\n");
Christoph Lameter88a420e2007-05-06 14:49:45 -07004912 }
4913
4914 free_loc_track(&t);
4915 if (!t.count)
Joe Perchesbf16d192020-12-14 19:14:57 -08004916 len += sysfs_emit_at(buf, len, "No data\n");
4917
Harvey Harrisone374d482008-01-31 15:20:50 -08004918 return len;
Christoph Lameter88a420e2007-05-06 14:49:45 -07004919}
Tobin C. Harding6dfd1b62019-05-13 17:16:09 -07004920#endif /* CONFIG_SLUB_DEBUG */
Christoph Lameter88a420e2007-05-06 14:49:45 -07004921
Christoph Lametera5a84752010-10-05 13:57:27 -05004922#ifdef SLUB_RESILIENCY_TEST
David Rientjesc07b8182014-08-06 16:04:16 -07004923static void __init resiliency_test(void)
Christoph Lametera5a84752010-10-05 13:57:27 -05004924{
4925 u8 *p;
Vlastimil Babkacc252ea2018-10-26 15:05:34 -07004926 int type = KMALLOC_NORMAL;
Christoph Lametera5a84752010-10-05 13:57:27 -05004927
Christoph Lameter95a05b42013-01-10 19:14:19 +00004928 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10);
Christoph Lametera5a84752010-10-05 13:57:27 -05004929
Fabian Frederickf9f58282014-06-04 16:06:34 -07004930 pr_err("SLUB resiliency testing\n");
4931 pr_err("-----------------------\n");
4932 pr_err("A. Corruption after allocation\n");
Christoph Lametera5a84752010-10-05 13:57:27 -05004933
4934 p = kzalloc(16, GFP_KERNEL);
4935 p[16] = 0x12;
Fabian Frederickf9f58282014-06-04 16:06:34 -07004936 pr_err("\n1. kmalloc-16: Clobber Redzone/next pointer 0x12->0x%p\n\n",
4937 p + 16);
Christoph Lametera5a84752010-10-05 13:57:27 -05004938
Vlastimil Babkacc252ea2018-10-26 15:05:34 -07004939 validate_slab_cache(kmalloc_caches[type][4]);
Christoph Lametera5a84752010-10-05 13:57:27 -05004940
4941 /* Hmmm... The next two are dangerous */
4942 p = kzalloc(32, GFP_KERNEL);
4943 p[32 + sizeof(void *)] = 0x34;
Fabian Frederickf9f58282014-06-04 16:06:34 -07004944 pr_err("\n2. kmalloc-32: Clobber next pointer/next slab 0x34 -> -0x%p\n",
4945 p);
4946 pr_err("If allocated object is overwritten then not detectable\n\n");
Christoph Lametera5a84752010-10-05 13:57:27 -05004947
Vlastimil Babkacc252ea2018-10-26 15:05:34 -07004948 validate_slab_cache(kmalloc_caches[type][5]);
Christoph Lametera5a84752010-10-05 13:57:27 -05004949 p = kzalloc(64, GFP_KERNEL);
4950 p += 64 + (get_cycles() & 0xff) * sizeof(void *);
4951 *p = 0x56;
Fabian Frederickf9f58282014-06-04 16:06:34 -07004952 pr_err("\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
4953 p);
4954 pr_err("If allocated object is overwritten then not detectable\n\n");
Vlastimil Babkacc252ea2018-10-26 15:05:34 -07004955 validate_slab_cache(kmalloc_caches[type][6]);
Christoph Lametera5a84752010-10-05 13:57:27 -05004956
Fabian Frederickf9f58282014-06-04 16:06:34 -07004957 pr_err("\nB. Corruption after free\n");
Christoph Lametera5a84752010-10-05 13:57:27 -05004958 p = kzalloc(128, GFP_KERNEL);
4959 kfree(p);
4960 *p = 0x78;
Fabian Frederickf9f58282014-06-04 16:06:34 -07004961 pr_err("1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
Vlastimil Babkacc252ea2018-10-26 15:05:34 -07004962 validate_slab_cache(kmalloc_caches[type][7]);
Christoph Lametera5a84752010-10-05 13:57:27 -05004963
4964 p = kzalloc(256, GFP_KERNEL);
4965 kfree(p);
4966 p[50] = 0x9a;
Fabian Frederickf9f58282014-06-04 16:06:34 -07004967 pr_err("\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p);
Vlastimil Babkacc252ea2018-10-26 15:05:34 -07004968 validate_slab_cache(kmalloc_caches[type][8]);
Christoph Lametera5a84752010-10-05 13:57:27 -05004969
4970 p = kzalloc(512, GFP_KERNEL);
4971 kfree(p);
4972 p[512] = 0xab;
Fabian Frederickf9f58282014-06-04 16:06:34 -07004973 pr_err("\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
Vlastimil Babkacc252ea2018-10-26 15:05:34 -07004974 validate_slab_cache(kmalloc_caches[type][9]);
Christoph Lametera5a84752010-10-05 13:57:27 -05004975}
4976#else
4977#ifdef CONFIG_SYSFS
4978static void resiliency_test(void) {};
4979#endif
Tobin C. Harding6dfd1b62019-05-13 17:16:09 -07004980#endif /* SLUB_RESILIENCY_TEST */
Christoph Lametera5a84752010-10-05 13:57:27 -05004981
Christoph Lameterab4d5ed2010-10-05 13:57:26 -05004982#ifdef CONFIG_SYSFS
Christoph Lameter81819f02007-05-06 14:49:36 -07004983enum slab_stat_type {
Christoph Lameter205ab992008-04-14 19:11:40 +03004984 SL_ALL, /* All slabs */
4985 SL_PARTIAL, /* Only partially allocated slabs */
4986 SL_CPU, /* Only slabs used for cpu caches */
4987 SL_OBJECTS, /* Determine allocated objects not slabs */
4988 SL_TOTAL /* Determine object capacity not slabs */
Christoph Lameter81819f02007-05-06 14:49:36 -07004989};
4990
Christoph Lameter205ab992008-04-14 19:11:40 +03004991#define SO_ALL (1 << SL_ALL)
Christoph Lameter81819f02007-05-06 14:49:36 -07004992#define SO_PARTIAL (1 << SL_PARTIAL)
4993#define SO_CPU (1 << SL_CPU)
4994#define SO_OBJECTS (1 << SL_OBJECTS)
Christoph Lameter205ab992008-04-14 19:11:40 +03004995#define SO_TOTAL (1 << SL_TOTAL)
Christoph Lameter81819f02007-05-06 14:49:36 -07004996
Cyrill Gorcunov62e5c4b2008-03-02 23:28:24 +03004997static ssize_t show_slab_objects(struct kmem_cache *s,
Joe Perchesbf16d192020-12-14 19:14:57 -08004998 char *buf, unsigned long flags)
Christoph Lameter81819f02007-05-06 14:49:36 -07004999{
5000 unsigned long total = 0;
Christoph Lameter81819f02007-05-06 14:49:36 -07005001 int node;
5002 int x;
5003 unsigned long *nodes;
Joe Perchesbf16d192020-12-14 19:14:57 -08005004 int len = 0;
Christoph Lameter81819f02007-05-06 14:49:36 -07005005
Kees Cook6396bb22018-06-12 14:03:40 -07005006 nodes = kcalloc(nr_node_ids, sizeof(unsigned long), GFP_KERNEL);
Cyrill Gorcunov62e5c4b2008-03-02 23:28:24 +03005007 if (!nodes)
5008 return -ENOMEM;
Christoph Lameter81819f02007-05-06 14:49:36 -07005009
Christoph Lameter205ab992008-04-14 19:11:40 +03005010 if (flags & SO_CPU) {
5011 int cpu;
Christoph Lameter81819f02007-05-06 14:49:36 -07005012
Christoph Lameter205ab992008-04-14 19:11:40 +03005013 for_each_possible_cpu(cpu) {
Chen Gangd0e0ac92013-07-15 09:05:29 +08005014 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab,
5015 cpu);
Christoph Lameterec3ab082012-05-09 10:09:56 -05005016 int node;
Christoph Lameter49e22582011-08-09 16:12:27 -05005017 struct page *page;
Christoph Lameterdfb4f092007-10-16 01:26:05 -07005018
Jason Low4db0c3c2015-04-15 16:14:08 -07005019 page = READ_ONCE(c->page);
Christoph Lameterec3ab082012-05-09 10:09:56 -05005020 if (!page)
5021 continue;
Christoph Lameter205ab992008-04-14 19:11:40 +03005022
Christoph Lameterec3ab082012-05-09 10:09:56 -05005023 node = page_to_nid(page);
5024 if (flags & SO_TOTAL)
5025 x = page->objects;
5026 else if (flags & SO_OBJECTS)
5027 x = page->inuse;
5028 else
5029 x = 1;
Christoph Lameter49e22582011-08-09 16:12:27 -05005030
Christoph Lameterec3ab082012-05-09 10:09:56 -05005031 total += x;
5032 nodes[node] += x;
5033
Wei Yanga93cf072017-07-06 15:36:31 -07005034 page = slub_percpu_partial_read_once(c);
Christoph Lameter49e22582011-08-09 16:12:27 -05005035 if (page) {
Li Zefan8afb1472013-09-10 11:43:37 +08005036 node = page_to_nid(page);
5037 if (flags & SO_TOTAL)
5038 WARN_ON_ONCE(1);
5039 else if (flags & SO_OBJECTS)
5040 WARN_ON_ONCE(1);
5041 else
5042 x = page->pages;
Eric Dumazetbc6697d2011-11-22 16:02:02 +01005043 total += x;
5044 nodes[node] += x;
Christoph Lameter49e22582011-08-09 16:12:27 -05005045 }
Christoph Lameter81819f02007-05-06 14:49:36 -07005046 }
5047 }
5048
Qian Caie4f8e512019-10-14 14:11:51 -07005049 /*
5050 * It is impossible to take "mem_hotplug_lock" here with "kernfs_mutex"
5051 * already held which will conflict with an existing lock order:
5052 *
5053 * mem_hotplug_lock->slab_mutex->kernfs_mutex
5054 *
5055 * We don't really need mem_hotplug_lock (to hold off
5056 * slab_mem_going_offline_callback) here because slab's memory hot
5057 * unplug code doesn't destroy the kmem_cache->node[] data.
5058 */
5059
Christoph Lameterab4d5ed2010-10-05 13:57:26 -05005060#ifdef CONFIG_SLUB_DEBUG
Christoph Lameter205ab992008-04-14 19:11:40 +03005061 if (flags & SO_ALL) {
Christoph Lameterfa45dc22014-08-06 16:04:09 -07005062 struct kmem_cache_node *n;
5063
5064 for_each_kmem_cache_node(s, node, n) {
Christoph Lameter81819f02007-05-06 14:49:36 -07005065
Chen Gangd0e0ac92013-07-15 09:05:29 +08005066 if (flags & SO_TOTAL)
5067 x = atomic_long_read(&n->total_objects);
5068 else if (flags & SO_OBJECTS)
5069 x = atomic_long_read(&n->total_objects) -
5070 count_partial(n, count_free);
Christoph Lameter205ab992008-04-14 19:11:40 +03005071 else
5072 x = atomic_long_read(&n->nr_slabs);
5073 total += x;
5074 nodes[node] += x;
5075 }
5076
Christoph Lameterab4d5ed2010-10-05 13:57:26 -05005077 } else
5078#endif
5079 if (flags & SO_PARTIAL) {
Christoph Lameterfa45dc22014-08-06 16:04:09 -07005080 struct kmem_cache_node *n;
Christoph Lameter205ab992008-04-14 19:11:40 +03005081
Christoph Lameterfa45dc22014-08-06 16:04:09 -07005082 for_each_kmem_cache_node(s, node, n) {
Christoph Lameter205ab992008-04-14 19:11:40 +03005083 if (flags & SO_TOTAL)
5084 x = count_partial(n, count_total);
5085 else if (flags & SO_OBJECTS)
5086 x = count_partial(n, count_inuse);
Christoph Lameter81819f02007-05-06 14:49:36 -07005087 else
5088 x = n->nr_partial;
5089 total += x;
5090 nodes[node] += x;
5091 }
Christoph Lameter81819f02007-05-06 14:49:36 -07005092 }
Joe Perchesbf16d192020-12-14 19:14:57 -08005093
5094 len += sysfs_emit_at(buf, len, "%lu", total);
Christoph Lameter81819f02007-05-06 14:49:36 -07005095#ifdef CONFIG_NUMA
Joe Perchesbf16d192020-12-14 19:14:57 -08005096 for (node = 0; node < nr_node_ids; node++) {
Christoph Lameter81819f02007-05-06 14:49:36 -07005097 if (nodes[node])
Joe Perchesbf16d192020-12-14 19:14:57 -08005098 len += sysfs_emit_at(buf, len, " N%d=%lu",
5099 node, nodes[node]);
5100 }
Christoph Lameter81819f02007-05-06 14:49:36 -07005101#endif
Joe Perchesbf16d192020-12-14 19:14:57 -08005102 len += sysfs_emit_at(buf, len, "\n");
Christoph Lameter81819f02007-05-06 14:49:36 -07005103 kfree(nodes);
Joe Perchesbf16d192020-12-14 19:14:57 -08005104
5105 return len;
Christoph Lameter81819f02007-05-06 14:49:36 -07005106}
5107
Christoph Lameter81819f02007-05-06 14:49:36 -07005108#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
Phil Carmody497888c2011-07-14 15:07:13 +03005109#define to_slab(n) container_of(n, struct kmem_cache, kobj)
Christoph Lameter81819f02007-05-06 14:49:36 -07005110
5111struct slab_attribute {
5112 struct attribute attr;
5113 ssize_t (*show)(struct kmem_cache *s, char *buf);
5114 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
5115};
5116
5117#define SLAB_ATTR_RO(_name) \
Vasiliy Kulikovab067e92011-09-27 21:54:53 +04005118 static struct slab_attribute _name##_attr = \
5119 __ATTR(_name, 0400, _name##_show, NULL)
Christoph Lameter81819f02007-05-06 14:49:36 -07005120
5121#define SLAB_ATTR(_name) \
5122 static struct slab_attribute _name##_attr = \
Vasiliy Kulikovab067e92011-09-27 21:54:53 +04005123 __ATTR(_name, 0600, _name##_show, _name##_store)
Christoph Lameter81819f02007-05-06 14:49:36 -07005124
Christoph Lameter81819f02007-05-06 14:49:36 -07005125static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
5126{
Joe Perchesbf16d192020-12-14 19:14:57 -08005127 return sysfs_emit(buf, "%u\n", s->size);
Christoph Lameter81819f02007-05-06 14:49:36 -07005128}
5129SLAB_ATTR_RO(slab_size);
5130
5131static ssize_t align_show(struct kmem_cache *s, char *buf)
5132{
Joe Perchesbf16d192020-12-14 19:14:57 -08005133 return sysfs_emit(buf, "%u\n", s->align);
Christoph Lameter81819f02007-05-06 14:49:36 -07005134}
5135SLAB_ATTR_RO(align);
5136
5137static ssize_t object_size_show(struct kmem_cache *s, char *buf)
5138{
Joe Perchesbf16d192020-12-14 19:14:57 -08005139 return sysfs_emit(buf, "%u\n", s->object_size);
Christoph Lameter81819f02007-05-06 14:49:36 -07005140}
5141SLAB_ATTR_RO(object_size);
5142
5143static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
5144{
Joe Perchesbf16d192020-12-14 19:14:57 -08005145 return sysfs_emit(buf, "%u\n", oo_objects(s->oo));
Christoph Lameter81819f02007-05-06 14:49:36 -07005146}
5147SLAB_ATTR_RO(objs_per_slab);
5148
5149static ssize_t order_show(struct kmem_cache *s, char *buf)
5150{
Joe Perchesbf16d192020-12-14 19:14:57 -08005151 return sysfs_emit(buf, "%u\n", oo_order(s->oo));
Christoph Lameter81819f02007-05-06 14:49:36 -07005152}
Vlastimil Babka32a6f402020-08-06 23:18:41 -07005153SLAB_ATTR_RO(order);
Christoph Lameter81819f02007-05-06 14:49:36 -07005154
David Rientjes73d342b2009-02-22 17:40:09 -08005155static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
5156{
Joe Perchesbf16d192020-12-14 19:14:57 -08005157 return sysfs_emit(buf, "%lu\n", s->min_partial);
David Rientjes73d342b2009-02-22 17:40:09 -08005158}
5159
5160static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
5161 size_t length)
5162{
5163 unsigned long min;
5164 int err;
5165
Jingoo Han3dbb95f2013-09-11 14:20:25 -07005166 err = kstrtoul(buf, 10, &min);
David Rientjes73d342b2009-02-22 17:40:09 -08005167 if (err)
5168 return err;
5169
David Rientjesc0bdb232009-02-25 09:16:35 +02005170 set_min_partial(s, min);
David Rientjes73d342b2009-02-22 17:40:09 -08005171 return length;
5172}
5173SLAB_ATTR(min_partial);
5174
Christoph Lameter49e22582011-08-09 16:12:27 -05005175static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
5176{
Joe Perchesbf16d192020-12-14 19:14:57 -08005177 return sysfs_emit(buf, "%u\n", slub_cpu_partial(s));
Christoph Lameter49e22582011-08-09 16:12:27 -05005178}
5179
5180static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
5181 size_t length)
5182{
Alexey Dobriyane5d99982018-04-05 16:21:10 -07005183 unsigned int objects;
Christoph Lameter49e22582011-08-09 16:12:27 -05005184 int err;
5185
Alexey Dobriyane5d99982018-04-05 16:21:10 -07005186 err = kstrtouint(buf, 10, &objects);
Christoph Lameter49e22582011-08-09 16:12:27 -05005187 if (err)
5188 return err;
Joonsoo Kim345c9052013-06-19 14:05:52 +09005189 if (objects && !kmem_cache_has_cpu_partial(s))
David Rientjes74ee4ef2012-01-09 13:19:45 -08005190 return -EINVAL;
Christoph Lameter49e22582011-08-09 16:12:27 -05005191
Wei Yange6d0e1d2017-07-06 15:36:34 -07005192 slub_set_cpu_partial(s, objects);
Christoph Lameter49e22582011-08-09 16:12:27 -05005193 flush_all(s);
5194 return length;
5195}
5196SLAB_ATTR(cpu_partial);
5197
Christoph Lameter81819f02007-05-06 14:49:36 -07005198static ssize_t ctor_show(struct kmem_cache *s, char *buf)
5199{
Joe Perches62c70bc2011-01-13 15:45:52 -08005200 if (!s->ctor)
5201 return 0;
Joe Perchesbf16d192020-12-14 19:14:57 -08005202 return sysfs_emit(buf, "%pS\n", s->ctor);
Christoph Lameter81819f02007-05-06 14:49:36 -07005203}
5204SLAB_ATTR_RO(ctor);
5205
Christoph Lameter81819f02007-05-06 14:49:36 -07005206static ssize_t aliases_show(struct kmem_cache *s, char *buf)
5207{
Joe Perchesbf16d192020-12-14 19:14:57 -08005208 return sysfs_emit(buf, "%d\n", s->refcount < 0 ? 0 : s->refcount - 1);
Christoph Lameter81819f02007-05-06 14:49:36 -07005209}
5210SLAB_ATTR_RO(aliases);
5211
Christoph Lameter81819f02007-05-06 14:49:36 -07005212static ssize_t partial_show(struct kmem_cache *s, char *buf)
5213{
Christoph Lameterd9acf4b2008-02-15 15:22:21 -08005214 return show_slab_objects(s, buf, SO_PARTIAL);
Christoph Lameter81819f02007-05-06 14:49:36 -07005215}
5216SLAB_ATTR_RO(partial);
5217
5218static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
5219{
Christoph Lameterd9acf4b2008-02-15 15:22:21 -08005220 return show_slab_objects(s, buf, SO_CPU);
Christoph Lameter81819f02007-05-06 14:49:36 -07005221}
5222SLAB_ATTR_RO(cpu_slabs);
5223
5224static ssize_t objects_show(struct kmem_cache *s, char *buf)
5225{
Christoph Lameter205ab992008-04-14 19:11:40 +03005226 return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
Christoph Lameter81819f02007-05-06 14:49:36 -07005227}
5228SLAB_ATTR_RO(objects);
5229
Christoph Lameter205ab992008-04-14 19:11:40 +03005230static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
5231{
5232 return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
5233}
5234SLAB_ATTR_RO(objects_partial);
5235
Christoph Lameter49e22582011-08-09 16:12:27 -05005236static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
5237{
5238 int objects = 0;
5239 int pages = 0;
5240 int cpu;
Joe Perchesbf16d192020-12-14 19:14:57 -08005241 int len = 0;
Christoph Lameter49e22582011-08-09 16:12:27 -05005242
5243 for_each_online_cpu(cpu) {
Wei Yanga93cf072017-07-06 15:36:31 -07005244 struct page *page;
5245
5246 page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
Christoph Lameter49e22582011-08-09 16:12:27 -05005247
5248 if (page) {
5249 pages += page->pages;
5250 objects += page->pobjects;
5251 }
5252 }
5253
Joe Perchesbf16d192020-12-14 19:14:57 -08005254 len += sysfs_emit_at(buf, len, "%d(%d)", objects, pages);
Christoph Lameter49e22582011-08-09 16:12:27 -05005255
5256#ifdef CONFIG_SMP
5257 for_each_online_cpu(cpu) {
Wei Yanga93cf072017-07-06 15:36:31 -07005258 struct page *page;
5259
5260 page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
Joe Perchesbf16d192020-12-14 19:14:57 -08005261 if (page)
5262 len += sysfs_emit_at(buf, len, " C%d=%d(%d)",
5263 cpu, page->pobjects, page->pages);
Christoph Lameter49e22582011-08-09 16:12:27 -05005264 }
5265#endif
Joe Perchesbf16d192020-12-14 19:14:57 -08005266 len += sysfs_emit_at(buf, len, "\n");
5267
5268 return len;
Christoph Lameter49e22582011-08-09 16:12:27 -05005269}
5270SLAB_ATTR_RO(slabs_cpu_partial);
5271
Christoph Lameter81819f02007-05-06 14:49:36 -07005272static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
5273{
Joe Perchesbf16d192020-12-14 19:14:57 -08005274 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
Christoph Lameter81819f02007-05-06 14:49:36 -07005275}
Vlastimil Babka8f58119a2020-08-06 23:18:48 -07005276SLAB_ATTR_RO(reclaim_account);
Christoph Lameter81819f02007-05-06 14:49:36 -07005277
5278static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
5279{
Joe Perchesbf16d192020-12-14 19:14:57 -08005280 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
Christoph Lameter81819f02007-05-06 14:49:36 -07005281}
5282SLAB_ATTR_RO(hwcache_align);
5283
5284#ifdef CONFIG_ZONE_DMA
5285static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
5286{
Joe Perchesbf16d192020-12-14 19:14:57 -08005287 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
Christoph Lameter81819f02007-05-06 14:49:36 -07005288}
5289SLAB_ATTR_RO(cache_dma);
5290#endif
5291
David Windsor8eb82842017-06-10 22:50:28 -04005292static ssize_t usersize_show(struct kmem_cache *s, char *buf)
5293{
Joe Perchesbf16d192020-12-14 19:14:57 -08005294 return sysfs_emit(buf, "%u\n", s->usersize);
David Windsor8eb82842017-06-10 22:50:28 -04005295}
5296SLAB_ATTR_RO(usersize);
5297
Christoph Lameter81819f02007-05-06 14:49:36 -07005298static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
5299{
Joe Perchesbf16d192020-12-14 19:14:57 -08005300 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU));
Christoph Lameter81819f02007-05-06 14:49:36 -07005301}
5302SLAB_ATTR_RO(destroy_by_rcu);
5303
Christoph Lameterab4d5ed2010-10-05 13:57:26 -05005304#ifdef CONFIG_SLUB_DEBUG
Christoph Lametera5a84752010-10-05 13:57:27 -05005305static ssize_t slabs_show(struct kmem_cache *s, char *buf)
5306{
5307 return show_slab_objects(s, buf, SO_ALL);
5308}
5309SLAB_ATTR_RO(slabs);
5310
5311static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
5312{
5313 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
5314}
5315SLAB_ATTR_RO(total_objects);
5316
5317static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
5318{
Joe Perchesbf16d192020-12-14 19:14:57 -08005319 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_CONSISTENCY_CHECKS));
Christoph Lametera5a84752010-10-05 13:57:27 -05005320}
Vlastimil Babka060807f2020-08-06 23:18:45 -07005321SLAB_ATTR_RO(sanity_checks);
Christoph Lametera5a84752010-10-05 13:57:27 -05005322
5323static ssize_t trace_show(struct kmem_cache *s, char *buf)
5324{
Joe Perchesbf16d192020-12-14 19:14:57 -08005325 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_TRACE));
Christoph Lametera5a84752010-10-05 13:57:27 -05005326}
Vlastimil Babka060807f2020-08-06 23:18:45 -07005327SLAB_ATTR_RO(trace);
Christoph Lametera5a84752010-10-05 13:57:27 -05005328
Christoph Lameter81819f02007-05-06 14:49:36 -07005329static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
5330{
Joe Perchesbf16d192020-12-14 19:14:57 -08005331 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
Christoph Lameter81819f02007-05-06 14:49:36 -07005332}
5333
Vlastimil Babkaad38b5b2020-08-06 23:18:38 -07005334SLAB_ATTR_RO(red_zone);
Christoph Lameter81819f02007-05-06 14:49:36 -07005335
5336static ssize_t poison_show(struct kmem_cache *s, char *buf)
5337{
Joe Perchesbf16d192020-12-14 19:14:57 -08005338 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_POISON));
Christoph Lameter81819f02007-05-06 14:49:36 -07005339}
5340
Vlastimil Babkaad38b5b2020-08-06 23:18:38 -07005341SLAB_ATTR_RO(poison);
Christoph Lameter81819f02007-05-06 14:49:36 -07005342
5343static ssize_t store_user_show(struct kmem_cache *s, char *buf)
5344{
Joe Perchesbf16d192020-12-14 19:14:57 -08005345 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
Christoph Lameter81819f02007-05-06 14:49:36 -07005346}
5347
Vlastimil Babkaad38b5b2020-08-06 23:18:38 -07005348SLAB_ATTR_RO(store_user);
Christoph Lameter81819f02007-05-06 14:49:36 -07005349
Christoph Lameter53e15af2007-05-06 14:49:43 -07005350static ssize_t validate_show(struct kmem_cache *s, char *buf)
5351{
5352 return 0;
5353}
5354
5355static ssize_t validate_store(struct kmem_cache *s,
5356 const char *buf, size_t length)
5357{
Christoph Lameter434e2452007-07-17 04:03:30 -07005358 int ret = -EINVAL;
5359
5360 if (buf[0] == '1') {
5361 ret = validate_slab_cache(s);
5362 if (ret >= 0)
5363 ret = length;
5364 }
5365 return ret;
Christoph Lameter53e15af2007-05-06 14:49:43 -07005366}
5367SLAB_ATTR(validate);
Christoph Lametera5a84752010-10-05 13:57:27 -05005368
5369static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
5370{
5371 if (!(s->flags & SLAB_STORE_USER))
5372 return -ENOSYS;
5373 return list_locations(s, buf, TRACK_ALLOC);
5374}
5375SLAB_ATTR_RO(alloc_calls);
5376
5377static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
5378{
5379 if (!(s->flags & SLAB_STORE_USER))
5380 return -ENOSYS;
5381 return list_locations(s, buf, TRACK_FREE);
5382}
5383SLAB_ATTR_RO(free_calls);
5384#endif /* CONFIG_SLUB_DEBUG */
5385
5386#ifdef CONFIG_FAILSLAB
5387static ssize_t failslab_show(struct kmem_cache *s, char *buf)
5388{
Joe Perchesbf16d192020-12-14 19:14:57 -08005389 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
Christoph Lametera5a84752010-10-05 13:57:27 -05005390}
Vlastimil Babka060807f2020-08-06 23:18:45 -07005391SLAB_ATTR_RO(failslab);
Christoph Lameterab4d5ed2010-10-05 13:57:26 -05005392#endif
Christoph Lameter53e15af2007-05-06 14:49:43 -07005393
Christoph Lameter2086d262007-05-06 14:49:46 -07005394static ssize_t shrink_show(struct kmem_cache *s, char *buf)
5395{
5396 return 0;
5397}
5398
5399static ssize_t shrink_store(struct kmem_cache *s,
5400 const char *buf, size_t length)
5401{
Vladimir Davydov832f37f2015-02-12 14:59:41 -08005402 if (buf[0] == '1')
Roman Gushchin10befea2020-08-06 23:21:27 -07005403 kmem_cache_shrink(s);
Vladimir Davydov832f37f2015-02-12 14:59:41 -08005404 else
Christoph Lameter2086d262007-05-06 14:49:46 -07005405 return -EINVAL;
5406 return length;
5407}
5408SLAB_ATTR(shrink);
5409
Christoph Lameter81819f02007-05-06 14:49:36 -07005410#ifdef CONFIG_NUMA
Christoph Lameter98246012008-01-07 23:20:26 -08005411static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
Christoph Lameter81819f02007-05-06 14:49:36 -07005412{
Joe Perchesbf16d192020-12-14 19:14:57 -08005413 return sysfs_emit(buf, "%u\n", s->remote_node_defrag_ratio / 10);
Christoph Lameter81819f02007-05-06 14:49:36 -07005414}
5415
Christoph Lameter98246012008-01-07 23:20:26 -08005416static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
Christoph Lameter81819f02007-05-06 14:49:36 -07005417 const char *buf, size_t length)
5418{
Alexey Dobriyaneb7235e2018-04-05 16:20:48 -07005419 unsigned int ratio;
Christoph Lameter0121c6192008-04-29 16:11:12 -07005420 int err;
Christoph Lameter81819f02007-05-06 14:49:36 -07005421
Alexey Dobriyaneb7235e2018-04-05 16:20:48 -07005422 err = kstrtouint(buf, 10, &ratio);
Christoph Lameter0121c6192008-04-29 16:11:12 -07005423 if (err)
5424 return err;
Alexey Dobriyaneb7235e2018-04-05 16:20:48 -07005425 if (ratio > 100)
5426 return -ERANGE;
Christoph Lameter0121c6192008-04-29 16:11:12 -07005427
Alexey Dobriyaneb7235e2018-04-05 16:20:48 -07005428 s->remote_node_defrag_ratio = ratio * 10;
Christoph Lameter0121c6192008-04-29 16:11:12 -07005429
Christoph Lameter81819f02007-05-06 14:49:36 -07005430 return length;
5431}
Christoph Lameter98246012008-01-07 23:20:26 -08005432SLAB_ATTR(remote_node_defrag_ratio);
Christoph Lameter81819f02007-05-06 14:49:36 -07005433#endif
5434
Christoph Lameter8ff12cf2008-02-07 17:47:41 -08005435#ifdef CONFIG_SLUB_STATS
Christoph Lameter8ff12cf2008-02-07 17:47:41 -08005436static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
5437{
5438 unsigned long sum = 0;
5439 int cpu;
Joe Perchesbf16d192020-12-14 19:14:57 -08005440 int len = 0;
Kees Cook6da2ec52018-06-12 13:55:00 -07005441 int *data = kmalloc_array(nr_cpu_ids, sizeof(int), GFP_KERNEL);
Christoph Lameter8ff12cf2008-02-07 17:47:41 -08005442
5443 if (!data)
5444 return -ENOMEM;
5445
5446 for_each_online_cpu(cpu) {
Christoph Lameter9dfc6e62009-12-18 16:26:20 -06005447 unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
Christoph Lameter8ff12cf2008-02-07 17:47:41 -08005448
5449 data[cpu] = x;
5450 sum += x;
5451 }
5452
Joe Perchesbf16d192020-12-14 19:14:57 -08005453 len += sysfs_emit_at(buf, len, "%lu", sum);
Christoph Lameter8ff12cf2008-02-07 17:47:41 -08005454
Christoph Lameter50ef37b2008-04-14 18:52:05 +03005455#ifdef CONFIG_SMP
Christoph Lameter8ff12cf2008-02-07 17:47:41 -08005456 for_each_online_cpu(cpu) {
Joe Perchesbf16d192020-12-14 19:14:57 -08005457 if (data[cpu])
5458 len += sysfs_emit_at(buf, len, " C%d=%u",
5459 cpu, data[cpu]);
Christoph Lameter8ff12cf2008-02-07 17:47:41 -08005460 }
Christoph Lameter50ef37b2008-04-14 18:52:05 +03005461#endif
Christoph Lameter8ff12cf2008-02-07 17:47:41 -08005462 kfree(data);
Joe Perchesbf16d192020-12-14 19:14:57 -08005463 len += sysfs_emit_at(buf, len, "\n");
5464
5465 return len;
Christoph Lameter8ff12cf2008-02-07 17:47:41 -08005466}
5467
David Rientjes78eb00c2009-10-15 02:20:22 -07005468static void clear_stat(struct kmem_cache *s, enum stat_item si)
5469{
5470 int cpu;
5471
5472 for_each_online_cpu(cpu)
Christoph Lameter9dfc6e62009-12-18 16:26:20 -06005473 per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
David Rientjes78eb00c2009-10-15 02:20:22 -07005474}
5475
Christoph Lameter8ff12cf2008-02-07 17:47:41 -08005476#define STAT_ATTR(si, text) \
5477static ssize_t text##_show(struct kmem_cache *s, char *buf) \
5478{ \
5479 return show_stat(s, buf, si); \
5480} \
David Rientjes78eb00c2009-10-15 02:20:22 -07005481static ssize_t text##_store(struct kmem_cache *s, \
5482 const char *buf, size_t length) \
5483{ \
5484 if (buf[0] != '0') \
5485 return -EINVAL; \
5486 clear_stat(s, si); \
5487 return length; \
5488} \
5489SLAB_ATTR(text); \
Christoph Lameter8ff12cf2008-02-07 17:47:41 -08005490
5491STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
5492STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
5493STAT_ATTR(FREE_FASTPATH, free_fastpath);
5494STAT_ATTR(FREE_SLOWPATH, free_slowpath);
5495STAT_ATTR(FREE_FROZEN, free_frozen);
5496STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
5497STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
5498STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
5499STAT_ATTR(ALLOC_SLAB, alloc_slab);
5500STAT_ATTR(ALLOC_REFILL, alloc_refill);
Christoph Lametere36a2652011-06-01 12:25:57 -05005501STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
Christoph Lameter8ff12cf2008-02-07 17:47:41 -08005502STAT_ATTR(FREE_SLAB, free_slab);
5503STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
5504STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
5505STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
5506STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
5507STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
5508STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
Christoph Lameter03e404a2011-06-01 12:25:58 -05005509STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
Christoph Lameter65c33762008-04-14 19:11:40 +03005510STAT_ATTR(ORDER_FALLBACK, order_fallback);
Christoph Lameterb789ef52011-06-01 12:25:49 -05005511STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
5512STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
Christoph Lameter49e22582011-08-09 16:12:27 -05005513STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
5514STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
Alex Shi8028dce2012-02-03 23:34:56 +08005515STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
5516STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
Tobin C. Harding6dfd1b62019-05-13 17:16:09 -07005517#endif /* CONFIG_SLUB_STATS */
Christoph Lameter8ff12cf2008-02-07 17:47:41 -08005518
Pekka Enberg06428782008-01-07 23:20:27 -08005519static struct attribute *slab_attrs[] = {
Christoph Lameter81819f02007-05-06 14:49:36 -07005520 &slab_size_attr.attr,
5521 &object_size_attr.attr,
5522 &objs_per_slab_attr.attr,
5523 &order_attr.attr,
David Rientjes73d342b2009-02-22 17:40:09 -08005524 &min_partial_attr.attr,
Christoph Lameter49e22582011-08-09 16:12:27 -05005525 &cpu_partial_attr.attr,
Christoph Lameter81819f02007-05-06 14:49:36 -07005526 &objects_attr.attr,
Christoph Lameter205ab992008-04-14 19:11:40 +03005527 &objects_partial_attr.attr,
Christoph Lameter81819f02007-05-06 14:49:36 -07005528 &partial_attr.attr,
5529 &cpu_slabs_attr.attr,
5530 &ctor_attr.attr,
Christoph Lameter81819f02007-05-06 14:49:36 -07005531 &aliases_attr.attr,
5532 &align_attr.attr,
Christoph Lameter81819f02007-05-06 14:49:36 -07005533 &hwcache_align_attr.attr,
5534 &reclaim_account_attr.attr,
5535 &destroy_by_rcu_attr.attr,
Christoph Lametera5a84752010-10-05 13:57:27 -05005536 &shrink_attr.attr,
Christoph Lameter49e22582011-08-09 16:12:27 -05005537 &slabs_cpu_partial_attr.attr,
Christoph Lameterab4d5ed2010-10-05 13:57:26 -05005538#ifdef CONFIG_SLUB_DEBUG
Christoph Lametera5a84752010-10-05 13:57:27 -05005539 &total_objects_attr.attr,
5540 &slabs_attr.attr,
5541 &sanity_checks_attr.attr,
5542 &trace_attr.attr,
Christoph Lameter81819f02007-05-06 14:49:36 -07005543 &red_zone_attr.attr,
5544 &poison_attr.attr,
5545 &store_user_attr.attr,
Christoph Lameter53e15af2007-05-06 14:49:43 -07005546 &validate_attr.attr,
Christoph Lameter88a420e2007-05-06 14:49:45 -07005547 &alloc_calls_attr.attr,
5548 &free_calls_attr.attr,
Christoph Lameterab4d5ed2010-10-05 13:57:26 -05005549#endif
Christoph Lameter81819f02007-05-06 14:49:36 -07005550#ifdef CONFIG_ZONE_DMA
5551 &cache_dma_attr.attr,
5552#endif
5553#ifdef CONFIG_NUMA
Christoph Lameter98246012008-01-07 23:20:26 -08005554 &remote_node_defrag_ratio_attr.attr,
Christoph Lameter81819f02007-05-06 14:49:36 -07005555#endif
Christoph Lameter8ff12cf2008-02-07 17:47:41 -08005556#ifdef CONFIG_SLUB_STATS
5557 &alloc_fastpath_attr.attr,
5558 &alloc_slowpath_attr.attr,
5559 &free_fastpath_attr.attr,
5560 &free_slowpath_attr.attr,
5561 &free_frozen_attr.attr,
5562 &free_add_partial_attr.attr,
5563 &free_remove_partial_attr.attr,
5564 &alloc_from_partial_attr.attr,
5565 &alloc_slab_attr.attr,
5566 &alloc_refill_attr.attr,
Christoph Lametere36a2652011-06-01 12:25:57 -05005567 &alloc_node_mismatch_attr.attr,
Christoph Lameter8ff12cf2008-02-07 17:47:41 -08005568 &free_slab_attr.attr,
5569 &cpuslab_flush_attr.attr,
5570 &deactivate_full_attr.attr,
5571 &deactivate_empty_attr.attr,
5572 &deactivate_to_head_attr.attr,
5573 &deactivate_to_tail_attr.attr,
5574 &deactivate_remote_frees_attr.attr,
Christoph Lameter03e404a2011-06-01 12:25:58 -05005575 &deactivate_bypass_attr.attr,
Christoph Lameter65c33762008-04-14 19:11:40 +03005576 &order_fallback_attr.attr,
Christoph Lameterb789ef52011-06-01 12:25:49 -05005577 &cmpxchg_double_fail_attr.attr,
5578 &cmpxchg_double_cpu_fail_attr.attr,
Christoph Lameter49e22582011-08-09 16:12:27 -05005579 &cpu_partial_alloc_attr.attr,
5580 &cpu_partial_free_attr.attr,
Alex Shi8028dce2012-02-03 23:34:56 +08005581 &cpu_partial_node_attr.attr,
5582 &cpu_partial_drain_attr.attr,
Christoph Lameter8ff12cf2008-02-07 17:47:41 -08005583#endif
Dmitry Monakhov4c13dd32010-02-26 09:36:12 +03005584#ifdef CONFIG_FAILSLAB
5585 &failslab_attr.attr,
5586#endif
David Windsor8eb82842017-06-10 22:50:28 -04005587 &usersize_attr.attr,
Dmitry Monakhov4c13dd32010-02-26 09:36:12 +03005588
Christoph Lameter81819f02007-05-06 14:49:36 -07005589 NULL
5590};
5591
Arvind Yadav1fdaaa22017-09-06 16:21:56 -07005592static const struct attribute_group slab_attr_group = {
Christoph Lameter81819f02007-05-06 14:49:36 -07005593 .attrs = slab_attrs,
5594};
5595
5596static ssize_t slab_attr_show(struct kobject *kobj,
5597 struct attribute *attr,
5598 char *buf)
5599{
5600 struct slab_attribute *attribute;
5601 struct kmem_cache *s;
5602 int err;
5603
5604 attribute = to_slab_attr(attr);
5605 s = to_slab(kobj);
5606
5607 if (!attribute->show)
5608 return -EIO;
5609
5610 err = attribute->show(s, buf);
5611
5612 return err;
5613}
5614
5615static ssize_t slab_attr_store(struct kobject *kobj,
5616 struct attribute *attr,
5617 const char *buf, size_t len)
5618{
5619 struct slab_attribute *attribute;
5620 struct kmem_cache *s;
5621 int err;
5622
5623 attribute = to_slab_attr(attr);
5624 s = to_slab(kobj);
5625
5626 if (!attribute->store)
5627 return -EIO;
5628
5629 err = attribute->store(s, buf, len);
Christoph Lameter81819f02007-05-06 14:49:36 -07005630 return err;
5631}
5632
Christoph Lameter41a21282014-05-06 12:50:08 -07005633static void kmem_cache_release(struct kobject *k)
5634{
5635 slab_kmem_cache_release(to_slab(k));
5636}
5637
Emese Revfy52cf25d2010-01-19 02:58:23 +01005638static const struct sysfs_ops slab_sysfs_ops = {
Christoph Lameter81819f02007-05-06 14:49:36 -07005639 .show = slab_attr_show,
5640 .store = slab_attr_store,
5641};
5642
5643static struct kobj_type slab_ktype = {
5644 .sysfs_ops = &slab_sysfs_ops,
Christoph Lameter41a21282014-05-06 12:50:08 -07005645 .release = kmem_cache_release,
Christoph Lameter81819f02007-05-06 14:49:36 -07005646};
5647
Greg Kroah-Hartman27c3a312007-11-01 09:29:06 -06005648static struct kset *slab_kset;
Christoph Lameter81819f02007-05-06 14:49:36 -07005649
Vladimir Davydov9a417072014-04-07 15:39:31 -07005650static inline struct kset *cache_kset(struct kmem_cache *s)
5651{
Vladimir Davydov9a417072014-04-07 15:39:31 -07005652 return slab_kset;
5653}
5654
Christoph Lameter81819f02007-05-06 14:49:36 -07005655#define ID_STR_LENGTH 64
5656
5657/* Create a unique string id for a slab cache:
Christoph Lameter6446faa2008-02-15 23:45:26 -08005658 *
5659 * Format :[flags-]size
Christoph Lameter81819f02007-05-06 14:49:36 -07005660 */
5661static char *create_unique_id(struct kmem_cache *s)
5662{
5663 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
5664 char *p = name;
5665
5666 BUG_ON(!name);
5667
5668 *p++ = ':';
5669 /*
5670 * First flags affecting slabcache operations. We will only
5671 * get here for aliasable slabs so we do not need to support
5672 * too many flags. The flags here must cover all flags that
5673 * are matched during merging to guarantee that the id is
5674 * unique.
5675 */
5676 if (s->flags & SLAB_CACHE_DMA)
5677 *p++ = 'd';
Nicolas Boichat6d6ea1e2019-03-28 20:43:42 -07005678 if (s->flags & SLAB_CACHE_DMA32)
5679 *p++ = 'D';
Christoph Lameter81819f02007-05-06 14:49:36 -07005680 if (s->flags & SLAB_RECLAIM_ACCOUNT)
5681 *p++ = 'a';
Laura Abbottbecfda62016-03-15 14:55:06 -07005682 if (s->flags & SLAB_CONSISTENCY_CHECKS)
Christoph Lameter81819f02007-05-06 14:49:36 -07005683 *p++ = 'F';
Vladimir Davydov230e9fc2016-01-14 15:18:15 -08005684 if (s->flags & SLAB_ACCOUNT)
5685 *p++ = 'A';
Christoph Lameter81819f02007-05-06 14:49:36 -07005686 if (p != name + 1)
5687 *p++ = '-';
Alexey Dobriyan44065b22018-04-05 16:21:20 -07005688 p += sprintf(p, "%07u", s->size);
Glauber Costa2633d7a2012-12-18 14:22:34 -08005689
Christoph Lameter81819f02007-05-06 14:49:36 -07005690 BUG_ON(p > name + ID_STR_LENGTH - 1);
5691 return name;
5692}
5693
5694static int sysfs_slab_add(struct kmem_cache *s)
5695{
5696 int err;
5697 const char *name;
Tejun Heo1663f262017-02-22 15:41:39 -08005698 struct kset *kset = cache_kset(s);
Christoph Lameter45530c42012-11-28 16:23:07 +00005699 int unmergeable = slab_unmergeable(s);
Christoph Lameter81819f02007-05-06 14:49:36 -07005700
Tejun Heo1663f262017-02-22 15:41:39 -08005701 if (!kset) {
5702 kobject_init(&s->kobj, &slab_ktype);
5703 return 0;
5704 }
5705
Miles Chen11066382017-11-15 17:32:25 -08005706 if (!unmergeable && disable_higher_order_debug &&
5707 (slub_debug & DEBUG_METADATA_FLAGS))
5708 unmergeable = 1;
5709
Christoph Lameter81819f02007-05-06 14:49:36 -07005710 if (unmergeable) {
5711 /*
5712 * Slabcache can never be merged so we can use the name proper.
5713 * This is typically the case for debug situations. In that
5714 * case we can catch duplicate names easily.
5715 */
Greg Kroah-Hartman27c3a312007-11-01 09:29:06 -06005716 sysfs_remove_link(&slab_kset->kobj, s->name);
Christoph Lameter81819f02007-05-06 14:49:36 -07005717 name = s->name;
5718 } else {
5719 /*
5720 * Create a unique name for the slab as a target
5721 * for the symlinks.
5722 */
5723 name = create_unique_id(s);
5724 }
5725
Tejun Heo1663f262017-02-22 15:41:39 -08005726 s->kobj.kset = kset;
Tetsuo Handa26e4f202014-01-04 16:32:31 +09005727 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
Wang Hai757fed12021-01-28 19:32:50 +08005728 if (err)
Konstantin Khlebnikov80da0262015-09-04 15:45:51 -07005729 goto out;
Christoph Lameter81819f02007-05-06 14:49:36 -07005730
5731 err = sysfs_create_group(&s->kobj, &slab_attr_group);
Dave Jones54b6a732014-04-07 15:39:32 -07005732 if (err)
5733 goto out_del_kobj;
Vladimir Davydov9a417072014-04-07 15:39:31 -07005734
Christoph Lameter81819f02007-05-06 14:49:36 -07005735 if (!unmergeable) {
5736 /* Setup first alias */
5737 sysfs_slab_alias(s, s->name);
Christoph Lameter81819f02007-05-06 14:49:36 -07005738 }
Dave Jones54b6a732014-04-07 15:39:32 -07005739out:
5740 if (!unmergeable)
5741 kfree(name);
5742 return err;
5743out_del_kobj:
5744 kobject_del(&s->kobj);
Dave Jones54b6a732014-04-07 15:39:32 -07005745 goto out;
Christoph Lameter81819f02007-05-06 14:49:36 -07005746}
5747
Mikulas Patockad50d82f2018-06-27 23:26:09 -07005748void sysfs_slab_unlink(struct kmem_cache *s)
5749{
5750 if (slab_state >= FULL)
5751 kobject_del(&s->kobj);
5752}
5753
Tejun Heobf5eb3d2017-02-22 15:41:11 -08005754void sysfs_slab_release(struct kmem_cache *s)
5755{
5756 if (slab_state >= FULL)
5757 kobject_put(&s->kobj);
Christoph Lameter81819f02007-05-06 14:49:36 -07005758}
5759
5760/*
5761 * Need to buffer aliases during bootup until sysfs becomes
Nick Andrew9f6c708e2008-12-05 14:08:08 +11005762 * available lest we lose that information.
Christoph Lameter81819f02007-05-06 14:49:36 -07005763 */
5764struct saved_alias {
5765 struct kmem_cache *s;
5766 const char *name;
5767 struct saved_alias *next;
5768};
5769
Adrian Bunk5af328a2007-07-17 04:03:27 -07005770static struct saved_alias *alias_list;
Christoph Lameter81819f02007-05-06 14:49:36 -07005771
5772static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
5773{
5774 struct saved_alias *al;
5775
Christoph Lameter97d06602012-07-06 15:25:11 -05005776 if (slab_state == FULL) {
Christoph Lameter81819f02007-05-06 14:49:36 -07005777 /*
5778 * If we have a leftover link then remove it.
5779 */
Greg Kroah-Hartman27c3a312007-11-01 09:29:06 -06005780 sysfs_remove_link(&slab_kset->kobj, name);
5781 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
Christoph Lameter81819f02007-05-06 14:49:36 -07005782 }
5783
5784 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
5785 if (!al)
5786 return -ENOMEM;
5787
5788 al->s = s;
5789 al->name = name;
5790 al->next = alias_list;
5791 alias_list = al;
5792 return 0;
5793}
5794
5795static int __init slab_sysfs_init(void)
5796{
Christoph Lameter5b95a4ac2007-07-17 04:03:19 -07005797 struct kmem_cache *s;
Christoph Lameter81819f02007-05-06 14:49:36 -07005798 int err;
5799
Christoph Lameter18004c52012-07-06 15:25:12 -05005800 mutex_lock(&slab_mutex);
Christoph Lameter2bce6482010-07-19 11:39:11 -05005801
Christoph Lameterd7660ce2020-06-01 21:45:50 -07005802 slab_kset = kset_create_and_add("slab", NULL, kernel_kobj);
Greg Kroah-Hartman27c3a312007-11-01 09:29:06 -06005803 if (!slab_kset) {
Christoph Lameter18004c52012-07-06 15:25:12 -05005804 mutex_unlock(&slab_mutex);
Fabian Frederickf9f58282014-06-04 16:06:34 -07005805 pr_err("Cannot register slab subsystem.\n");
Christoph Lameter81819f02007-05-06 14:49:36 -07005806 return -ENOSYS;
5807 }
5808
Christoph Lameter97d06602012-07-06 15:25:11 -05005809 slab_state = FULL;
Christoph Lameter26a7bd02007-05-09 02:32:39 -07005810
Christoph Lameter5b95a4ac2007-07-17 04:03:19 -07005811 list_for_each_entry(s, &slab_caches, list) {
Christoph Lameter26a7bd02007-05-09 02:32:39 -07005812 err = sysfs_slab_add(s);
Christoph Lameter5d540fb2007-08-30 23:56:26 -07005813 if (err)
Fabian Frederickf9f58282014-06-04 16:06:34 -07005814 pr_err("SLUB: Unable to add boot slab %s to sysfs\n",
5815 s->name);
Christoph Lameter26a7bd02007-05-09 02:32:39 -07005816 }
Christoph Lameter81819f02007-05-06 14:49:36 -07005817
5818 while (alias_list) {
5819 struct saved_alias *al = alias_list;
5820
5821 alias_list = alias_list->next;
5822 err = sysfs_slab_alias(al->s, al->name);
Christoph Lameter5d540fb2007-08-30 23:56:26 -07005823 if (err)
Fabian Frederickf9f58282014-06-04 16:06:34 -07005824 pr_err("SLUB: Unable to add boot slab alias %s to sysfs\n",
5825 al->name);
Christoph Lameter81819f02007-05-06 14:49:36 -07005826 kfree(al);
5827 }
5828
Christoph Lameter18004c52012-07-06 15:25:12 -05005829 mutex_unlock(&slab_mutex);
Christoph Lameter81819f02007-05-06 14:49:36 -07005830 resiliency_test();
5831 return 0;
5832}
5833
5834__initcall(slab_sysfs_init);
Christoph Lameterab4d5ed2010-10-05 13:57:26 -05005835#endif /* CONFIG_SYSFS */
Pekka J Enberg57ed3ed2008-01-01 17:23:28 +01005836
5837/*
5838 * The /proc/slabinfo ABI
5839 */
Yang Shi5b365772017-11-15 17:32:03 -08005840#ifdef CONFIG_SLUB_DEBUG
Glauber Costa0d7561c2012-10-19 18:20:27 +04005841void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
Pekka J Enberg57ed3ed2008-01-01 17:23:28 +01005842{
Pekka J Enberg57ed3ed2008-01-01 17:23:28 +01005843 unsigned long nr_slabs = 0;
Christoph Lameter205ab992008-04-14 19:11:40 +03005844 unsigned long nr_objs = 0;
5845 unsigned long nr_free = 0;
Pekka J Enberg57ed3ed2008-01-01 17:23:28 +01005846 int node;
Christoph Lameterfa45dc22014-08-06 16:04:09 -07005847 struct kmem_cache_node *n;
Pekka J Enberg57ed3ed2008-01-01 17:23:28 +01005848
Christoph Lameterfa45dc22014-08-06 16:04:09 -07005849 for_each_kmem_cache_node(s, node, n) {
Wanpeng Lic17fd132013-07-04 08:33:26 +08005850 nr_slabs += node_nr_slabs(n);
5851 nr_objs += node_nr_objs(n);
Christoph Lameter205ab992008-04-14 19:11:40 +03005852 nr_free += count_partial(n, count_free);
Pekka J Enberg57ed3ed2008-01-01 17:23:28 +01005853 }
5854
Glauber Costa0d7561c2012-10-19 18:20:27 +04005855 sinfo->active_objs = nr_objs - nr_free;
5856 sinfo->num_objs = nr_objs;
5857 sinfo->active_slabs = nr_slabs;
5858 sinfo->num_slabs = nr_slabs;
5859 sinfo->objects_per_slab = oo_objects(s->oo);
5860 sinfo->cache_order = oo_order(s->oo);
Pekka J Enberg57ed3ed2008-01-01 17:23:28 +01005861}
5862
Glauber Costa0d7561c2012-10-19 18:20:27 +04005863void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s)
Alexey Dobriyan7b3c3a52008-10-06 02:42:17 +04005864{
Alexey Dobriyan7b3c3a52008-10-06 02:42:17 +04005865}
5866
Glauber Costab7454ad2012-10-19 18:20:25 +04005867ssize_t slabinfo_write(struct file *file, const char __user *buffer,
5868 size_t count, loff_t *ppos)
Alexey Dobriyan7b3c3a52008-10-06 02:42:17 +04005869{
Glauber Costab7454ad2012-10-19 18:20:25 +04005870 return -EIO;
Alexey Dobriyan7b3c3a52008-10-06 02:42:17 +04005871}
Yang Shi5b365772017-11-15 17:32:03 -08005872#endif /* CONFIG_SLUB_DEBUG */