blob: 205632c94a6aae3a544c12d232fd002602f5c21c [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/mm/slab.c
3 * Written by Mark Hemment, 1996/97.
4 * (markhe@nextd.demon.co.uk)
5 *
6 * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli
7 *
8 * Major cleanup, different bufctl logic, per-cpu arrays
9 * (c) 2000 Manfred Spraul
10 *
11 * Cleanup, make the head arrays unconditional, preparation for NUMA
12 * (c) 2002 Manfred Spraul
13 *
14 * An implementation of the Slab Allocator as described in outline in;
15 * UNIX Internals: The New Frontiers by Uresh Vahalia
16 * Pub: Prentice Hall ISBN 0-13-101908-2
17 * or with a little more detail in;
18 * The Slab Allocator: An Object-Caching Kernel Memory Allocator
19 * Jeff Bonwick (Sun Microsystems).
20 * Presented at: USENIX Summer 1994 Technical Conference
21 *
22 * The memory is organized in caches, one cache for each object type.
23 * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct)
24 * Each cache consists out of many slabs (they are small (usually one
25 * page long) and always contiguous), and each slab contains multiple
26 * initialized objects.
27 *
28 * This means, that your constructor is used only for newly allocated
Simon Arlott183ff222007-10-20 01:27:18 +020029 * slabs and you must pass objects with the same initializations to
Linus Torvalds1da177e2005-04-16 15:20:36 -070030 * kmem_cache_free.
31 *
32 * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM,
33 * normal). If you need a special memory type, then must create a new
34 * cache for that memory type.
35 *
36 * In order to reduce fragmentation, the slabs are sorted in 3 groups:
37 * full slabs with 0 free objects
38 * partial slabs
39 * empty slabs with no allocated objects
40 *
41 * If partial slabs exist, then new allocations come from these slabs,
42 * otherwise from empty slabs or new slabs are allocated.
43 *
44 * kmem_cache_destroy() CAN CRASH if you try to allocate from the cache
45 * during kmem_cache_destroy(). The caller must prevent concurrent allocs.
46 *
47 * Each cache has a short per-cpu head array, most allocs
48 * and frees go into that array, and if that array overflows, then 1/2
49 * of the entries in the array are given back into the global cache.
50 * The head array is strictly LIFO and should improve the cache hit rates.
51 * On SMP, it additionally reduces the spinlock operations.
52 *
Andrew Mortona737b3e2006-03-22 00:08:11 -080053 * The c_cpuarray may not be read with enabled local interrupts -
Linus Torvalds1da177e2005-04-16 15:20:36 -070054 * it's changed with a smp_call_function().
55 *
56 * SMP synchronization:
57 * constructors and destructors are called without any locking.
Pekka Enberg343e0d72006-02-01 03:05:50 -080058 * Several members in struct kmem_cache and struct slab never change, they
Linus Torvalds1da177e2005-04-16 15:20:36 -070059 * are accessed without any locking.
60 * The per-cpu arrays are never accessed from the wrong cpu, no locking,
61 * and local interrupts are disabled so slab code is preempt-safe.
62 * The non-constant members are protected with a per-cache irq spinlock.
63 *
64 * Many thanks to Mark Hemment, who wrote another per-cpu slab patch
65 * in 2000 - many ideas in the current implementation are derived from
66 * his patch.
67 *
68 * Further notes from the original documentation:
69 *
70 * 11 April '97. Started multi-threading - markhe
Christoph Lameter18004c52012-07-06 15:25:12 -050071 * The global cache-chain is protected by the mutex 'slab_mutex'.
Linus Torvalds1da177e2005-04-16 15:20:36 -070072 * The sem is only needed when accessing/extending the cache-chain, which
73 * can never happen inside an interrupt (kmem_cache_create(),
74 * kmem_cache_shrink() and kmem_cache_reap()).
75 *
76 * At present, each engine can be growing a cache. This should be blocked.
77 *
Christoph Lametere498be72005-09-09 13:03:32 -070078 * 15 March 2005. NUMA slab allocator.
79 * Shai Fultheim <shai@scalex86.org>.
80 * Shobhit Dayal <shobhit@calsoftinc.com>
81 * Alok N Kataria <alokk@calsoftinc.com>
82 * Christoph Lameter <christoph@lameter.com>
83 *
84 * Modified the slab allocator to be node aware on NUMA systems.
85 * Each node has its own list of partial, free and full slabs.
86 * All object allocations for a node occur from node specific slab lists.
Linus Torvalds1da177e2005-04-16 15:20:36 -070087 */
88
Linus Torvalds1da177e2005-04-16 15:20:36 -070089#include <linux/slab.h>
90#include <linux/mm.h>
Randy Dunlapc9cf5522006-06-27 02:53:52 -070091#include <linux/poison.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070092#include <linux/swap.h>
93#include <linux/cache.h>
94#include <linux/interrupt.h>
95#include <linux/init.h>
96#include <linux/compiler.h>
Paul Jackson101a5002006-03-24 03:16:07 -080097#include <linux/cpuset.h>
Alexey Dobriyana0ec95a2008-10-06 00:59:10 +040098#include <linux/proc_fs.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070099#include <linux/seq_file.h>
100#include <linux/notifier.h>
101#include <linux/kallsyms.h>
102#include <linux/cpu.h>
103#include <linux/sysctl.h>
104#include <linux/module.h>
105#include <linux/rcupdate.h>
Paulo Marques543537b2005-06-23 00:09:02 -0700106#include <linux/string.h>
Andrew Morton138ae662006-12-06 20:36:41 -0800107#include <linux/uaccess.h>
Christoph Lametere498be72005-09-09 13:03:32 -0700108#include <linux/nodemask.h>
Catalin Marinasd5cff632009-06-11 13:22:40 +0100109#include <linux/kmemleak.h>
Christoph Lameterdc85da12006-01-18 17:42:36 -0800110#include <linux/mempolicy.h>
Ingo Molnarfc0abb12006-01-18 17:42:33 -0800111#include <linux/mutex.h>
Akinobu Mita8a8b6502006-12-08 02:39:44 -0800112#include <linux/fault-inject.h>
Ingo Molnare7eebaf2006-06-27 02:54:55 -0700113#include <linux/rtmutex.h>
Eric Dumazet6a2d7a92006-12-13 00:34:27 -0800114#include <linux/reciprocal_div.h>
Thomas Gleixner3ac7fe52008-04-30 00:55:01 -0700115#include <linux/debugobjects.h>
Pekka Enbergc175eea2008-05-09 20:35:53 +0200116#include <linux/kmemcheck.h>
David Rientjes8f9f8d92010-03-27 19:40:47 -0700117#include <linux/memory.h>
Linus Torvalds268bb0c2011-05-20 12:50:29 -0700118#include <linux/prefetch.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119
Mel Gorman381760e2012-07-31 16:44:30 -0700120#include <net/sock.h>
121
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122#include <asm/cacheflush.h>
123#include <asm/tlbflush.h>
124#include <asm/page.h>
125
Steven Rostedt4dee6b62012-01-09 17:15:42 -0500126#include <trace/events/kmem.h>
127
Mel Gorman072bb0a2012-07-31 16:43:58 -0700128#include "internal.h"
129
Glauber Costab9ce5ef2012-12-18 14:22:46 -0800130#include "slab.h"
131
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132/*
Christoph Lameter50953fe2007-05-06 14:50:16 -0700133 * DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134 * 0 for faster, smaller code (especially in the critical paths).
135 *
136 * STATS - 1 to collect stats for /proc/slabinfo.
137 * 0 for faster, smaller code (especially in the critical paths).
138 *
139 * FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
140 */
141
142#ifdef CONFIG_DEBUG_SLAB
143#define DEBUG 1
144#define STATS 1
145#define FORCED_DEBUG 1
146#else
147#define DEBUG 0
148#define STATS 0
149#define FORCED_DEBUG 0
150#endif
151
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152/* Shouldn't this be in a header file somewhere? */
153#define BYTES_PER_WORD sizeof(void *)
David Woodhouse87a927c2007-07-04 21:26:44 -0400154#define REDZONE_ALIGN max(BYTES_PER_WORD, __alignof__(unsigned long long))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700155
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156#ifndef ARCH_KMALLOC_FLAGS
157#define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
158#endif
159
Joonsoo Kimf315e3f2013-12-02 17:49:41 +0900160#define FREELIST_BYTE_INDEX (((PAGE_SIZE >> BITS_PER_BYTE) \
161 <= SLAB_OBJ_MIN_SIZE) ? 1 : 0)
162
163#if FREELIST_BYTE_INDEX
164typedef unsigned char freelist_idx_t;
165#else
166typedef unsigned short freelist_idx_t;
167#endif
168
David Miller30321c72014-05-05 16:20:04 -0400169#define SLAB_OBJ_MAX_NUM ((1 << sizeof(freelist_idx_t) * BITS_PER_BYTE) - 1)
Joonsoo Kimf315e3f2013-12-02 17:49:41 +0900170
Mel Gorman072bb0a2012-07-31 16:43:58 -0700171/*
172 * true if a page was allocated from pfmemalloc reserves for network-based
173 * swap
174 */
175static bool pfmemalloc_active __read_mostly;
176
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178 * struct array_cache
179 *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180 * Purpose:
181 * - LIFO ordering, to hand out cache-warm objects from _alloc
182 * - reduce the number of linked list operations
183 * - reduce spinlock operations
184 *
185 * The limit is stored in the per-cpu structure to reduce the data cache
186 * footprint.
187 *
188 */
189struct array_cache {
190 unsigned int avail;
191 unsigned int limit;
192 unsigned int batchcount;
193 unsigned int touched;
Christoph Lametere498be72005-09-09 13:03:32 -0700194 spinlock_t lock;
Robert P. J. Daybda5b652007-10-16 23:30:05 -0700195 void *entry[]; /*
Andrew Mortona737b3e2006-03-22 00:08:11 -0800196 * Must have this definition in here for the proper
197 * alignment of array_cache. Also simplifies accessing
198 * the entries.
Mel Gorman072bb0a2012-07-31 16:43:58 -0700199 *
200 * Entries should not be directly dereferenced as
201 * entries belonging to slabs marked pfmemalloc will
202 * have the lower bits set SLAB_OBJ_PFMEMALLOC
Andrew Mortona737b3e2006-03-22 00:08:11 -0800203 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204};
205
Mel Gorman072bb0a2012-07-31 16:43:58 -0700206#define SLAB_OBJ_PFMEMALLOC 1
207static inline bool is_obj_pfmemalloc(void *objp)
208{
209 return (unsigned long)objp & SLAB_OBJ_PFMEMALLOC;
210}
211
212static inline void set_obj_pfmemalloc(void **objp)
213{
214 *objp = (void *)((unsigned long)*objp | SLAB_OBJ_PFMEMALLOC);
215 return;
216}
217
218static inline void clear_obj_pfmemalloc(void **objp)
219{
220 *objp = (void *)((unsigned long)*objp & ~SLAB_OBJ_PFMEMALLOC);
221}
222
Andrew Mortona737b3e2006-03-22 00:08:11 -0800223/*
224 * bootstrap: The caches do not work without cpuarrays anymore, but the
225 * cpuarrays are allocated from the generic caches...
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226 */
227#define BOOT_CPUCACHE_ENTRIES 1
228struct arraycache_init {
229 struct array_cache cache;
Pekka Enbergb28a02d2006-01-08 01:00:37 -0800230 void *entries[BOOT_CPUCACHE_ENTRIES];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231};
232
233/*
Christoph Lametere498be72005-09-09 13:03:32 -0700234 * Need this for bootstrapping a per node allocator.
235 */
Pekka Enberg556a1692008-01-25 08:20:51 +0200236#define NUM_INIT_LISTS (3 * MAX_NUMNODES)
Christoph Lameterce8eb6c2013-01-10 19:14:19 +0000237static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS];
Christoph Lametere498be72005-09-09 13:03:32 -0700238#define CACHE_CACHE 0
Pekka Enberg556a1692008-01-25 08:20:51 +0200239#define SIZE_AC MAX_NUMNODES
Christoph Lameterce8eb6c2013-01-10 19:14:19 +0000240#define SIZE_NODE (2 * MAX_NUMNODES)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241
Christoph Lametered11d9e2006-06-30 01:55:45 -0700242static int drain_freelist(struct kmem_cache *cache,
Christoph Lameterce8eb6c2013-01-10 19:14:19 +0000243 struct kmem_cache_node *n, int tofree);
Christoph Lametered11d9e2006-06-30 01:55:45 -0700244static void free_block(struct kmem_cache *cachep, void **objpp, int len,
245 int node);
Pekka Enberg83b519e2009-06-10 19:40:04 +0300246static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
David Howells65f27f32006-11-22 14:55:48 +0000247static void cache_reap(struct work_struct *unused);
Christoph Lametered11d9e2006-06-30 01:55:45 -0700248
Ingo Molnare0a42722006-06-23 02:03:46 -0700249static int slab_early_init = 1;
250
Christoph Lametere3366012013-01-10 19:14:18 +0000251#define INDEX_AC kmalloc_index(sizeof(struct arraycache_init))
Christoph Lameterce8eb6c2013-01-10 19:14:19 +0000252#define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node))
Christoph Lametere498be72005-09-09 13:03:32 -0700253
Christoph Lameterce8eb6c2013-01-10 19:14:19 +0000254static void kmem_cache_node_init(struct kmem_cache_node *parent)
Christoph Lametere498be72005-09-09 13:03:32 -0700255{
256 INIT_LIST_HEAD(&parent->slabs_full);
257 INIT_LIST_HEAD(&parent->slabs_partial);
258 INIT_LIST_HEAD(&parent->slabs_free);
259 parent->shared = NULL;
260 parent->alien = NULL;
Ravikiran G Thirumalai2e1217c2006-02-04 23:27:56 -0800261 parent->colour_next = 0;
Christoph Lametere498be72005-09-09 13:03:32 -0700262 spin_lock_init(&parent->list_lock);
263 parent->free_objects = 0;
264 parent->free_touched = 0;
265}
266
Andrew Mortona737b3e2006-03-22 00:08:11 -0800267#define MAKE_LIST(cachep, listp, slab, nodeid) \
268 do { \
269 INIT_LIST_HEAD(listp); \
Christoph Lameter18bf8542014-08-06 16:04:11 -0700270 list_splice(&get_node(cachep, nodeid)->slab, listp); \
Christoph Lametere498be72005-09-09 13:03:32 -0700271 } while (0)
272
Andrew Mortona737b3e2006-03-22 00:08:11 -0800273#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \
274 do { \
Christoph Lametere498be72005-09-09 13:03:32 -0700275 MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \
276 MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \
277 MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \
278 } while (0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280#define CFLGS_OFF_SLAB (0x80000000UL)
281#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
282
283#define BATCHREFILL_LIMIT 16
Andrew Mortona737b3e2006-03-22 00:08:11 -0800284/*
285 * Optimization question: fewer reaps means less probability for unnessary
286 * cpucache drain/refill cycles.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287 *
Adrian Bunkdc6f3f22005-11-08 16:44:08 +0100288 * OTOH the cpuarrays can contain lots of objects,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289 * which could lock up otherwise freeable slabs.
290 */
Jianyu Zhan5f0985b2014-03-30 17:02:20 +0800291#define REAPTIMEOUT_AC (2*HZ)
292#define REAPTIMEOUT_NODE (4*HZ)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700293
294#if STATS
295#define STATS_INC_ACTIVE(x) ((x)->num_active++)
296#define STATS_DEC_ACTIVE(x) ((x)->num_active--)
297#define STATS_INC_ALLOCED(x) ((x)->num_allocations++)
298#define STATS_INC_GROWN(x) ((x)->grown++)
Christoph Lametered11d9e2006-06-30 01:55:45 -0700299#define STATS_ADD_REAPED(x,y) ((x)->reaped += (y))
Andrew Mortona737b3e2006-03-22 00:08:11 -0800300#define STATS_SET_HIGH(x) \
301 do { \
302 if ((x)->num_active > (x)->high_mark) \
303 (x)->high_mark = (x)->num_active; \
304 } while (0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305#define STATS_INC_ERR(x) ((x)->errors++)
306#define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++)
Christoph Lametere498be72005-09-09 13:03:32 -0700307#define STATS_INC_NODEFREES(x) ((x)->node_frees++)
Ravikiran G Thirumalaifb7faf32006-04-10 22:52:54 -0700308#define STATS_INC_ACOVERFLOW(x) ((x)->node_overflow++)
Andrew Mortona737b3e2006-03-22 00:08:11 -0800309#define STATS_SET_FREEABLE(x, i) \
310 do { \
311 if ((x)->max_freeable < i) \
312 (x)->max_freeable = i; \
313 } while (0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314#define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit)
315#define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss)
316#define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit)
317#define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss)
318#else
319#define STATS_INC_ACTIVE(x) do { } while (0)
320#define STATS_DEC_ACTIVE(x) do { } while (0)
321#define STATS_INC_ALLOCED(x) do { } while (0)
322#define STATS_INC_GROWN(x) do { } while (0)
Andi Kleen4e60c862010-08-09 17:19:03 -0700323#define STATS_ADD_REAPED(x,y) do { (void)(y); } while (0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324#define STATS_SET_HIGH(x) do { } while (0)
325#define STATS_INC_ERR(x) do { } while (0)
326#define STATS_INC_NODEALLOCS(x) do { } while (0)
Christoph Lametere498be72005-09-09 13:03:32 -0700327#define STATS_INC_NODEFREES(x) do { } while (0)
Ravikiran G Thirumalaifb7faf32006-04-10 22:52:54 -0700328#define STATS_INC_ACOVERFLOW(x) do { } while (0)
Andrew Mortona737b3e2006-03-22 00:08:11 -0800329#define STATS_SET_FREEABLE(x, i) do { } while (0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330#define STATS_INC_ALLOCHIT(x) do { } while (0)
331#define STATS_INC_ALLOCMISS(x) do { } while (0)
332#define STATS_INC_FREEHIT(x) do { } while (0)
333#define STATS_INC_FREEMISS(x) do { } while (0)
334#endif
335
336#if DEBUG
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337
Andrew Mortona737b3e2006-03-22 00:08:11 -0800338/*
339 * memory layout of objects:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700340 * 0 : objp
Manfred Spraul3dafccf2006-02-01 03:05:42 -0800341 * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342 * the end of an object is aligned with the end of the real
343 * allocation. Catches writes behind the end of the allocation.
Manfred Spraul3dafccf2006-02-01 03:05:42 -0800344 * cachep->obj_offset - BYTES_PER_WORD .. cachep->obj_offset - 1:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345 * redzone word.
Manfred Spraul3dafccf2006-02-01 03:05:42 -0800346 * cachep->obj_offset: The real object.
Christoph Lameter3b0efdf2012-06-13 10:24:57 -0500347 * cachep->size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long]
348 * cachep->size - 1* BYTES_PER_WORD: last caller address
Andrew Mortona737b3e2006-03-22 00:08:11 -0800349 * [BYTES_PER_WORD long]
Linus Torvalds1da177e2005-04-16 15:20:36 -0700350 */
Pekka Enberg343e0d72006-02-01 03:05:50 -0800351static int obj_offset(struct kmem_cache *cachep)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700352{
Manfred Spraul3dafccf2006-02-01 03:05:42 -0800353 return cachep->obj_offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700354}
355
David Woodhouseb46b8f12007-05-08 00:22:59 -0700356static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357{
358 BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
David Woodhouseb46b8f12007-05-08 00:22:59 -0700359 return (unsigned long long*) (objp + obj_offset(cachep) -
360 sizeof(unsigned long long));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361}
362
David Woodhouseb46b8f12007-05-08 00:22:59 -0700363static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364{
365 BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
366 if (cachep->flags & SLAB_STORE_USER)
Christoph Lameter3b0efdf2012-06-13 10:24:57 -0500367 return (unsigned long long *)(objp + cachep->size -
David Woodhouseb46b8f12007-05-08 00:22:59 -0700368 sizeof(unsigned long long) -
David Woodhouse87a927c2007-07-04 21:26:44 -0400369 REDZONE_ALIGN);
Christoph Lameter3b0efdf2012-06-13 10:24:57 -0500370 return (unsigned long long *) (objp + cachep->size -
David Woodhouseb46b8f12007-05-08 00:22:59 -0700371 sizeof(unsigned long long));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700372}
373
Pekka Enberg343e0d72006-02-01 03:05:50 -0800374static void **dbg_userword(struct kmem_cache *cachep, void *objp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375{
376 BUG_ON(!(cachep->flags & SLAB_STORE_USER));
Christoph Lameter3b0efdf2012-06-13 10:24:57 -0500377 return (void **)(objp + cachep->size - BYTES_PER_WORD);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700378}
379
380#else
381
Manfred Spraul3dafccf2006-02-01 03:05:42 -0800382#define obj_offset(x) 0
David Woodhouseb46b8f12007-05-08 00:22:59 -0700383#define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
384#define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385#define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;})
386
387#endif
388
Joonsoo Kim03787302014-06-23 13:22:06 -0700389#define OBJECT_FREE (0)
390#define OBJECT_ACTIVE (1)
391
392#ifdef CONFIG_DEBUG_SLAB_LEAK
393
394static void set_obj_status(struct page *page, int idx, int val)
395{
396 int freelist_size;
397 char *status;
398 struct kmem_cache *cachep = page->slab_cache;
399
400 freelist_size = cachep->num * sizeof(freelist_idx_t);
401 status = (char *)page->freelist + freelist_size;
402 status[idx] = val;
403}
404
405static inline unsigned int get_obj_status(struct page *page, int idx)
406{
407 int freelist_size;
408 char *status;
409 struct kmem_cache *cachep = page->slab_cache;
410
411 freelist_size = cachep->num * sizeof(freelist_idx_t);
412 status = (char *)page->freelist + freelist_size;
413
414 return status[idx];
415}
416
417#else
418static inline void set_obj_status(struct page *page, int idx, int val) {}
419
420#endif
421
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422/*
David Rientjes3df1ccc2011-10-18 22:09:28 -0700423 * Do not go above this order unless 0 objects fit into the slab or
424 * overridden on the command line.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425 */
David Rientjes543585c2011-10-18 22:09:24 -0700426#define SLAB_MAX_ORDER_HI 1
427#define SLAB_MAX_ORDER_LO 0
428static int slab_max_order = SLAB_MAX_ORDER_LO;
David Rientjes3df1ccc2011-10-18 22:09:28 -0700429static bool slab_max_order_set __initdata;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430
Pekka Enberg6ed5eb2212006-02-01 03:05:49 -0800431static inline struct kmem_cache *virt_to_cache(const void *obj)
432{
Christoph Lameterb49af682007-05-06 14:49:41 -0700433 struct page *page = virt_to_head_page(obj);
Christoph Lameter35026082012-06-13 10:24:56 -0500434 return page->slab_cache;
Pekka Enberg6ed5eb2212006-02-01 03:05:49 -0800435}
436
Joonsoo Kim8456a642013-10-24 10:07:49 +0900437static inline void *index_to_obj(struct kmem_cache *cache, struct page *page,
Pekka Enberg8fea4e92006-03-22 00:08:10 -0800438 unsigned int idx)
439{
Joonsoo Kim8456a642013-10-24 10:07:49 +0900440 return page->s_mem + cache->size * idx;
Pekka Enberg8fea4e92006-03-22 00:08:10 -0800441}
442
Eric Dumazet6a2d7a92006-12-13 00:34:27 -0800443/*
Christoph Lameter3b0efdf2012-06-13 10:24:57 -0500444 * We want to avoid an expensive divide : (offset / cache->size)
445 * Using the fact that size is a constant for a particular cache,
446 * we can replace (offset / cache->size) by
Eric Dumazet6a2d7a92006-12-13 00:34:27 -0800447 * reciprocal_divide(offset, cache->reciprocal_buffer_size)
448 */
449static inline unsigned int obj_to_index(const struct kmem_cache *cache,
Joonsoo Kim8456a642013-10-24 10:07:49 +0900450 const struct page *page, void *obj)
Pekka Enberg8fea4e92006-03-22 00:08:10 -0800451{
Joonsoo Kim8456a642013-10-24 10:07:49 +0900452 u32 offset = (obj - page->s_mem);
Eric Dumazet6a2d7a92006-12-13 00:34:27 -0800453 return reciprocal_divide(offset, cache->reciprocal_buffer_size);
Pekka Enberg8fea4e92006-03-22 00:08:10 -0800454}
455
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456static struct arraycache_init initarray_generic =
Pekka Enbergb28a02d2006-01-08 01:00:37 -0800457 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700458
459/* internal cache of cache description objs */
Christoph Lameter9b030cb2012-09-05 00:20:33 +0000460static struct kmem_cache kmem_cache_boot = {
Pekka Enbergb28a02d2006-01-08 01:00:37 -0800461 .batchcount = 1,
462 .limit = BOOT_CPUCACHE_ENTRIES,
463 .shared = 1,
Christoph Lameter3b0efdf2012-06-13 10:24:57 -0500464 .size = sizeof(struct kmem_cache),
Pekka Enbergb28a02d2006-01-08 01:00:37 -0800465 .name = "kmem_cache",
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466};
467
Ravikiran G Thirumalai056c6242006-09-25 23:31:38 -0700468#define BAD_ALIEN_MAGIC 0x01020304ul
469
Pekka Enbergce79ddc2009-11-23 22:01:15 +0200470#ifdef CONFIG_LOCKDEP
471
472/*
473 * Slab sometimes uses the kmalloc slabs to store the slab headers
474 * for other slabs "off slab".
475 * The locking for this is tricky in that it nests within the locks
476 * of all other slabs in a few places; to deal with this special
477 * locking we put on-slab caches into a separate lock-class.
478 *
479 * We set lock class for alien array caches which are up during init.
480 * The lock annotation will be lost if all cpus of a node goes down and
481 * then comes back up during hotplug
482 */
483static struct lock_class_key on_slab_l3_key;
484static struct lock_class_key on_slab_alc_key;
485
Peter Zijlstra83835b32011-07-22 15:26:05 +0200486static struct lock_class_key debugobj_l3_key;
487static struct lock_class_key debugobj_alc_key;
488
489static void slab_set_lock_classes(struct kmem_cache *cachep,
490 struct lock_class_key *l3_key, struct lock_class_key *alc_key,
Christoph Lameter18bf8542014-08-06 16:04:11 -0700491 struct kmem_cache_node *n)
Peter Zijlstra83835b32011-07-22 15:26:05 +0200492{
493 struct array_cache **alc;
Peter Zijlstra83835b32011-07-22 15:26:05 +0200494 int r;
495
Christoph Lameterce8eb6c2013-01-10 19:14:19 +0000496 lockdep_set_class(&n->list_lock, l3_key);
497 alc = n->alien;
Peter Zijlstra83835b32011-07-22 15:26:05 +0200498 /*
499 * FIXME: This check for BAD_ALIEN_MAGIC
500 * should go away when common slab code is taught to
501 * work even without alien caches.
502 * Currently, non NUMA code returns BAD_ALIEN_MAGIC
503 * for alloc_alien_cache,
504 */
505 if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
506 return;
507 for_each_node(r) {
508 if (alc[r])
509 lockdep_set_class(&alc[r]->lock, alc_key);
510 }
511}
512
Christoph Lameter18bf8542014-08-06 16:04:11 -0700513static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep,
514 struct kmem_cache_node *n)
Peter Zijlstra83835b32011-07-22 15:26:05 +0200515{
Christoph Lameter18bf8542014-08-06 16:04:11 -0700516 slab_set_lock_classes(cachep, &debugobj_l3_key, &debugobj_alc_key, n);
Peter Zijlstra83835b32011-07-22 15:26:05 +0200517}
518
519static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
520{
521 int node;
Christoph Lameter18bf8542014-08-06 16:04:11 -0700522 struct kmem_cache_node *n;
Peter Zijlstra83835b32011-07-22 15:26:05 +0200523
Christoph Lameter18bf8542014-08-06 16:04:11 -0700524 for_each_kmem_cache_node(cachep, node, n)
525 slab_set_debugobj_lock_classes_node(cachep, n);
Peter Zijlstra83835b32011-07-22 15:26:05 +0200526}
527
Pekka Enbergce79ddc2009-11-23 22:01:15 +0200528static void init_node_lock_keys(int q)
529{
Christoph Lametere3366012013-01-10 19:14:18 +0000530 int i;
Pekka Enbergce79ddc2009-11-23 22:01:15 +0200531
Christoph Lameter97d06602012-07-06 15:25:11 -0500532 if (slab_state < UP)
Pekka Enbergce79ddc2009-11-23 22:01:15 +0200533 return;
534
Christoph Lameter0f8f8092013-07-02 12:12:10 -0700535 for (i = 1; i <= KMALLOC_SHIFT_HIGH; i++) {
Christoph Lameterce8eb6c2013-01-10 19:14:19 +0000536 struct kmem_cache_node *n;
Christoph Lametere3366012013-01-10 19:14:18 +0000537 struct kmem_cache *cache = kmalloc_caches[i];
Pekka Enbergce79ddc2009-11-23 22:01:15 +0200538
Christoph Lametere3366012013-01-10 19:14:18 +0000539 if (!cache)
Pekka Enberg00afa752009-12-27 14:33:14 +0200540 continue;
Peter Zijlstra83835b32011-07-22 15:26:05 +0200541
Christoph Lameter18bf8542014-08-06 16:04:11 -0700542 n = get_node(cache, q);
Christoph Lameterce8eb6c2013-01-10 19:14:19 +0000543 if (!n || OFF_SLAB(cache))
Christoph Lametere3366012013-01-10 19:14:18 +0000544 continue;
545
546 slab_set_lock_classes(cache, &on_slab_l3_key,
Christoph Lameter18bf8542014-08-06 16:04:11 -0700547 &on_slab_alc_key, n);
Pekka Enbergce79ddc2009-11-23 22:01:15 +0200548 }
549}
550
Christoph Lameter18bf8542014-08-06 16:04:11 -0700551static void on_slab_lock_classes_node(struct kmem_cache *cachep,
552 struct kmem_cache_node *n)
Glauber Costa6ccfb5b2012-12-18 14:22:31 -0800553{
Glauber Costa6ccfb5b2012-12-18 14:22:31 -0800554 slab_set_lock_classes(cachep, &on_slab_l3_key,
Christoph Lameter18bf8542014-08-06 16:04:11 -0700555 &on_slab_alc_key, n);
Glauber Costa6ccfb5b2012-12-18 14:22:31 -0800556}
557
558static inline void on_slab_lock_classes(struct kmem_cache *cachep)
559{
560 int node;
Christoph Lameter18bf8542014-08-06 16:04:11 -0700561 struct kmem_cache_node *n;
Glauber Costa6ccfb5b2012-12-18 14:22:31 -0800562
563 VM_BUG_ON(OFF_SLAB(cachep));
Christoph Lameter18bf8542014-08-06 16:04:11 -0700564 for_each_kmem_cache_node(cachep, node, n)
565 on_slab_lock_classes_node(cachep, n);
Glauber Costa6ccfb5b2012-12-18 14:22:31 -0800566}
567
Fabian Frederick1536cb32014-08-06 16:04:05 -0700568static inline void __init init_lock_keys(void)
Pekka Enbergce79ddc2009-11-23 22:01:15 +0200569{
570 int node;
571
572 for_each_node(node)
573 init_node_lock_keys(node);
574}
575#else
Fabian Frederick1536cb32014-08-06 16:04:05 -0700576static void __init init_node_lock_keys(int q)
Pekka Enbergce79ddc2009-11-23 22:01:15 +0200577{
578}
579
580static inline void init_lock_keys(void)
581{
582}
Peter Zijlstra83835b32011-07-22 15:26:05 +0200583
Glauber Costa6ccfb5b2012-12-18 14:22:31 -0800584static inline void on_slab_lock_classes(struct kmem_cache *cachep)
585{
586}
587
Christoph Lameter18bf8542014-08-06 16:04:11 -0700588static inline void on_slab_lock_classes_node(struct kmem_cache *cachep,
589 struct kmem_cache_node *n)
Glauber Costa6ccfb5b2012-12-18 14:22:31 -0800590{
591}
592
Christoph Lameter18bf8542014-08-06 16:04:11 -0700593static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep,
594 struct kmem_cache_node *n)
Peter Zijlstra83835b32011-07-22 15:26:05 +0200595{
596}
597
598static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
599{
600}
Pekka Enbergce79ddc2009-11-23 22:01:15 +0200601#endif
602
Tejun Heo1871e522009-10-29 22:34:13 +0900603static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604
Pekka Enberg343e0d72006-02-01 03:05:50 -0800605static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700606{
607 return cachep->array[smp_processor_id()];
608}
609
Joonsoo Kim03787302014-06-23 13:22:06 -0700610static size_t calculate_freelist_size(int nr_objs, size_t align)
611{
612 size_t freelist_size;
613
614 freelist_size = nr_objs * sizeof(freelist_idx_t);
615 if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK))
616 freelist_size += nr_objs * sizeof(char);
617
618 if (align)
619 freelist_size = ALIGN(freelist_size, align);
620
621 return freelist_size;
622}
623
Joonsoo Kim9cef2e22013-12-02 17:49:39 +0900624static int calculate_nr_objs(size_t slab_size, size_t buffer_size,
625 size_t idx_size, size_t align)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626{
Joonsoo Kim9cef2e22013-12-02 17:49:39 +0900627 int nr_objs;
Joonsoo Kim03787302014-06-23 13:22:06 -0700628 size_t remained_size;
Joonsoo Kim9cef2e22013-12-02 17:49:39 +0900629 size_t freelist_size;
Joonsoo Kim03787302014-06-23 13:22:06 -0700630 int extra_space = 0;
Joonsoo Kim9cef2e22013-12-02 17:49:39 +0900631
Joonsoo Kim03787302014-06-23 13:22:06 -0700632 if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK))
633 extra_space = sizeof(char);
Joonsoo Kim9cef2e22013-12-02 17:49:39 +0900634 /*
635 * Ignore padding for the initial guess. The padding
636 * is at most @align-1 bytes, and @buffer_size is at
637 * least @align. In the worst case, this result will
638 * be one greater than the number of objects that fit
639 * into the memory allocation when taking the padding
640 * into account.
641 */
Joonsoo Kim03787302014-06-23 13:22:06 -0700642 nr_objs = slab_size / (buffer_size + idx_size + extra_space);
Joonsoo Kim9cef2e22013-12-02 17:49:39 +0900643
644 /*
645 * This calculated number will be either the right
646 * amount, or one greater than what we want.
647 */
Joonsoo Kim03787302014-06-23 13:22:06 -0700648 remained_size = slab_size - nr_objs * buffer_size;
649 freelist_size = calculate_freelist_size(nr_objs, align);
650 if (remained_size < freelist_size)
Joonsoo Kim9cef2e22013-12-02 17:49:39 +0900651 nr_objs--;
652
653 return nr_objs;
Steven Rostedtfbaccac2006-02-01 03:05:45 -0800654}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655
Andrew Mortona737b3e2006-03-22 00:08:11 -0800656/*
657 * Calculate the number of objects and left-over bytes for a given buffer size.
658 */
Steven Rostedtfbaccac2006-02-01 03:05:45 -0800659static void cache_estimate(unsigned long gfporder, size_t buffer_size,
660 size_t align, int flags, size_t *left_over,
661 unsigned int *num)
662{
663 int nr_objs;
664 size_t mgmt_size;
665 size_t slab_size = PAGE_SIZE << gfporder;
666
667 /*
668 * The slab management structure can be either off the slab or
669 * on it. For the latter case, the memory allocated for a
670 * slab is used for:
671 *
Joonsoo Kim16025172013-10-24 10:07:46 +0900672 * - One unsigned int for each object
Steven Rostedtfbaccac2006-02-01 03:05:45 -0800673 * - Padding to respect alignment of @align
674 * - @buffer_size bytes for each object
675 *
676 * If the slab management structure is off the slab, then the
677 * alignment will already be calculated into the size. Because
678 * the slabs are all pages aligned, the objects will be at the
679 * correct alignment when allocated.
680 */
681 if (flags & CFLGS_OFF_SLAB) {
682 mgmt_size = 0;
683 nr_objs = slab_size / buffer_size;
684
Steven Rostedtfbaccac2006-02-01 03:05:45 -0800685 } else {
Joonsoo Kim9cef2e22013-12-02 17:49:39 +0900686 nr_objs = calculate_nr_objs(slab_size, buffer_size,
Joonsoo Kima41adfa2013-12-02 17:49:42 +0900687 sizeof(freelist_idx_t), align);
Joonsoo Kim03787302014-06-23 13:22:06 -0700688 mgmt_size = calculate_freelist_size(nr_objs, align);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689 }
Steven Rostedtfbaccac2006-02-01 03:05:45 -0800690 *num = nr_objs;
691 *left_over = slab_size - nr_objs*buffer_size - mgmt_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692}
693
Christoph Lameterf28510d2012-09-11 19:49:38 +0000694#if DEBUG
Harvey Harrisond40cee22008-04-30 00:55:07 -0700695#define slab_error(cachep, msg) __slab_error(__func__, cachep, msg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700696
Andrew Mortona737b3e2006-03-22 00:08:11 -0800697static void __slab_error(const char *function, struct kmem_cache *cachep,
698 char *msg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699{
700 printk(KERN_ERR "slab error in %s(): cache `%s': %s\n",
Pekka Enbergb28a02d2006-01-08 01:00:37 -0800701 function, cachep->name, msg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702 dump_stack();
Rusty Russell373d4d02013-01-21 17:17:39 +1030703 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704}
Christoph Lameterf28510d2012-09-11 19:49:38 +0000705#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706
Paul Menage3395ee02006-12-06 20:32:16 -0800707/*
708 * By default on NUMA we use alien caches to stage the freeing of
709 * objects allocated from other nodes. This causes massive memory
710 * inefficiencies when using fake NUMA setup to split memory into a
711 * large number of small nodes, so it can be disabled on the command
712 * line
713 */
714
715static int use_alien_caches __read_mostly = 1;
716static int __init noaliencache_setup(char *s)
717{
718 use_alien_caches = 0;
719 return 1;
720}
721__setup("noaliencache", noaliencache_setup);
722
David Rientjes3df1ccc2011-10-18 22:09:28 -0700723static int __init slab_max_order_setup(char *str)
724{
725 get_option(&str, &slab_max_order);
726 slab_max_order = slab_max_order < 0 ? 0 :
727 min(slab_max_order, MAX_ORDER - 1);
728 slab_max_order_set = true;
729
730 return 1;
731}
732__setup("slab_max_order=", slab_max_order_setup);
733
Christoph Lameter8fce4d82006-03-09 17:33:54 -0800734#ifdef CONFIG_NUMA
735/*
736 * Special reaping functions for NUMA systems called from cache_reap().
737 * These take care of doing round robin flushing of alien caches (containing
738 * objects freed on different nodes from which they were allocated) and the
739 * flushing of remote pcps by calling drain_node_pages.
740 */
Tejun Heo1871e522009-10-29 22:34:13 +0900741static DEFINE_PER_CPU(unsigned long, slab_reap_node);
Christoph Lameter8fce4d82006-03-09 17:33:54 -0800742
743static void init_reap_node(int cpu)
744{
745 int node;
746
Lee Schermerhorn7d6e6d02010-05-26 14:45:03 -0700747 node = next_node(cpu_to_mem(cpu), node_online_map);
Christoph Lameter8fce4d82006-03-09 17:33:54 -0800748 if (node == MAX_NUMNODES)
Paul Jackson442295c2006-03-22 00:09:11 -0800749 node = first_node(node_online_map);
Christoph Lameter8fce4d82006-03-09 17:33:54 -0800750
Tejun Heo1871e522009-10-29 22:34:13 +0900751 per_cpu(slab_reap_node, cpu) = node;
Christoph Lameter8fce4d82006-03-09 17:33:54 -0800752}
753
754static void next_reap_node(void)
755{
Christoph Lameter909ea962010-12-08 16:22:55 +0100756 int node = __this_cpu_read(slab_reap_node);
Christoph Lameter8fce4d82006-03-09 17:33:54 -0800757
Christoph Lameter8fce4d82006-03-09 17:33:54 -0800758 node = next_node(node, node_online_map);
759 if (unlikely(node >= MAX_NUMNODES))
760 node = first_node(node_online_map);
Christoph Lameter909ea962010-12-08 16:22:55 +0100761 __this_cpu_write(slab_reap_node, node);
Christoph Lameter8fce4d82006-03-09 17:33:54 -0800762}
763
764#else
765#define init_reap_node(cpu) do { } while (0)
766#define next_reap_node(void) do { } while (0)
767#endif
768
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769/*
770 * Initiate the reap timer running on the target CPU. We run at around 1 to 2Hz
771 * via the workqueue/eventd.
772 * Add the CPU number into the expiration time to minimize the possibility of
773 * the CPUs getting into lockstep and contending for the global cache chain
774 * lock.
775 */
Paul Gortmaker0db06282013-06-19 14:53:51 -0400776static void start_cpu_timer(int cpu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777{
Tejun Heo1871e522009-10-29 22:34:13 +0900778 struct delayed_work *reap_work = &per_cpu(slab_reap_work, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779
780 /*
781 * When this gets called from do_initcalls via cpucache_init(),
782 * init_workqueues() has already run, so keventd will be setup
783 * at that time.
784 */
David Howells52bad642006-11-22 14:54:01 +0000785 if (keventd_up() && reap_work->work.func == NULL) {
Christoph Lameter8fce4d82006-03-09 17:33:54 -0800786 init_reap_node(cpu);
Tejun Heo203b42f2012-08-21 13:18:23 -0700787 INIT_DEFERRABLE_WORK(reap_work, cache_reap);
Arjan van de Ven2b284212006-12-10 02:21:28 -0800788 schedule_delayed_work_on(cpu, reap_work,
789 __round_jiffies_relative(HZ, cpu));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700790 }
791}
792
Christoph Lametere498be72005-09-09 13:03:32 -0700793static struct array_cache *alloc_arraycache(int node, int entries,
Pekka Enberg83b519e2009-06-10 19:40:04 +0300794 int batchcount, gfp_t gfp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700795{
Pekka Enbergb28a02d2006-01-08 01:00:37 -0800796 int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700797 struct array_cache *nc = NULL;
798
Pekka Enberg83b519e2009-06-10 19:40:04 +0300799 nc = kmalloc_node(memsize, gfp, node);
Catalin Marinasd5cff632009-06-11 13:22:40 +0100800 /*
801 * The array_cache structures contain pointers to free object.
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300802 * However, when such objects are allocated or transferred to another
Catalin Marinasd5cff632009-06-11 13:22:40 +0100803 * cache the pointers are not cleared and they could be counted as
804 * valid references during a kmemleak scan. Therefore, kmemleak must
805 * not scan such objects.
806 */
807 kmemleak_no_scan(nc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808 if (nc) {
809 nc->avail = 0;
810 nc->limit = entries;
811 nc->batchcount = batchcount;
812 nc->touched = 0;
Christoph Lametere498be72005-09-09 13:03:32 -0700813 spin_lock_init(&nc->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700814 }
815 return nc;
816}
817
Joonsoo Kim8456a642013-10-24 10:07:49 +0900818static inline bool is_slab_pfmemalloc(struct page *page)
Mel Gorman072bb0a2012-07-31 16:43:58 -0700819{
Mel Gorman072bb0a2012-07-31 16:43:58 -0700820 return PageSlabPfmemalloc(page);
821}
822
823/* Clears pfmemalloc_active if no slabs have pfmalloc set */
824static void recheck_pfmemalloc_active(struct kmem_cache *cachep,
825 struct array_cache *ac)
826{
Christoph Lameter18bf8542014-08-06 16:04:11 -0700827 struct kmem_cache_node *n = get_node(cachep, numa_mem_id());
Joonsoo Kim8456a642013-10-24 10:07:49 +0900828 struct page *page;
Mel Gorman072bb0a2012-07-31 16:43:58 -0700829 unsigned long flags;
830
831 if (!pfmemalloc_active)
832 return;
833
Christoph Lameterce8eb6c2013-01-10 19:14:19 +0000834 spin_lock_irqsave(&n->list_lock, flags);
Joonsoo Kim8456a642013-10-24 10:07:49 +0900835 list_for_each_entry(page, &n->slabs_full, lru)
836 if (is_slab_pfmemalloc(page))
Mel Gorman072bb0a2012-07-31 16:43:58 -0700837 goto out;
838
Joonsoo Kim8456a642013-10-24 10:07:49 +0900839 list_for_each_entry(page, &n->slabs_partial, lru)
840 if (is_slab_pfmemalloc(page))
Mel Gorman072bb0a2012-07-31 16:43:58 -0700841 goto out;
842
Joonsoo Kim8456a642013-10-24 10:07:49 +0900843 list_for_each_entry(page, &n->slabs_free, lru)
844 if (is_slab_pfmemalloc(page))
Mel Gorman072bb0a2012-07-31 16:43:58 -0700845 goto out;
846
847 pfmemalloc_active = false;
848out:
Christoph Lameterce8eb6c2013-01-10 19:14:19 +0000849 spin_unlock_irqrestore(&n->list_lock, flags);
Mel Gorman072bb0a2012-07-31 16:43:58 -0700850}
851
Mel Gorman381760e2012-07-31 16:44:30 -0700852static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
Mel Gorman072bb0a2012-07-31 16:43:58 -0700853 gfp_t flags, bool force_refill)
854{
855 int i;
856 void *objp = ac->entry[--ac->avail];
857
858 /* Ensure the caller is allowed to use objects from PFMEMALLOC slab */
859 if (unlikely(is_obj_pfmemalloc(objp))) {
Christoph Lameterce8eb6c2013-01-10 19:14:19 +0000860 struct kmem_cache_node *n;
Mel Gorman072bb0a2012-07-31 16:43:58 -0700861
862 if (gfp_pfmemalloc_allowed(flags)) {
863 clear_obj_pfmemalloc(&objp);
864 return objp;
865 }
866
867 /* The caller cannot use PFMEMALLOC objects, find another one */
Joonsoo Kimd014dc22012-09-17 14:09:06 -0700868 for (i = 0; i < ac->avail; i++) {
Mel Gorman072bb0a2012-07-31 16:43:58 -0700869 /* If a !PFMEMALLOC object is found, swap them */
870 if (!is_obj_pfmemalloc(ac->entry[i])) {
871 objp = ac->entry[i];
872 ac->entry[i] = ac->entry[ac->avail];
873 ac->entry[ac->avail] = objp;
874 return objp;
875 }
876 }
877
878 /*
879 * If there are empty slabs on the slabs_free list and we are
880 * being forced to refill the cache, mark this one !pfmemalloc.
881 */
Christoph Lameter18bf8542014-08-06 16:04:11 -0700882 n = get_node(cachep, numa_mem_id());
Christoph Lameterce8eb6c2013-01-10 19:14:19 +0000883 if (!list_empty(&n->slabs_free) && force_refill) {
Joonsoo Kim8456a642013-10-24 10:07:49 +0900884 struct page *page = virt_to_head_page(objp);
Joonsoo Kim7ecccf92013-10-24 10:07:50 +0900885 ClearPageSlabPfmemalloc(page);
Mel Gorman072bb0a2012-07-31 16:43:58 -0700886 clear_obj_pfmemalloc(&objp);
887 recheck_pfmemalloc_active(cachep, ac);
888 return objp;
889 }
890
891 /* No !PFMEMALLOC objects available */
892 ac->avail++;
893 objp = NULL;
894 }
895
896 return objp;
897}
898
Mel Gorman381760e2012-07-31 16:44:30 -0700899static inline void *ac_get_obj(struct kmem_cache *cachep,
900 struct array_cache *ac, gfp_t flags, bool force_refill)
901{
902 void *objp;
903
904 if (unlikely(sk_memalloc_socks()))
905 objp = __ac_get_obj(cachep, ac, flags, force_refill);
906 else
907 objp = ac->entry[--ac->avail];
908
909 return objp;
910}
911
912static void *__ac_put_obj(struct kmem_cache *cachep, struct array_cache *ac,
Mel Gorman072bb0a2012-07-31 16:43:58 -0700913 void *objp)
914{
915 if (unlikely(pfmemalloc_active)) {
916 /* Some pfmemalloc slabs exist, check if this is one */
Mel Gorman30c29be2012-09-17 14:09:03 -0700917 struct page *page = virt_to_head_page(objp);
Mel Gorman072bb0a2012-07-31 16:43:58 -0700918 if (PageSlabPfmemalloc(page))
919 set_obj_pfmemalloc(&objp);
920 }
921
Mel Gorman381760e2012-07-31 16:44:30 -0700922 return objp;
923}
924
925static inline void ac_put_obj(struct kmem_cache *cachep, struct array_cache *ac,
926 void *objp)
927{
928 if (unlikely(sk_memalloc_socks()))
929 objp = __ac_put_obj(cachep, ac, objp);
930
Mel Gorman072bb0a2012-07-31 16:43:58 -0700931 ac->entry[ac->avail++] = objp;
932}
933
Christoph Lameter3ded1752006-03-25 03:06:44 -0800934/*
935 * Transfer objects in one arraycache to another.
936 * Locking must be handled by the caller.
937 *
938 * Return the number of entries transferred.
939 */
940static int transfer_objects(struct array_cache *to,
941 struct array_cache *from, unsigned int max)
942{
943 /* Figure out how many entries to transfer */
Hagen Paul Pfeifer732eacc2010-10-26 14:22:23 -0700944 int nr = min3(from->avail, max, to->limit - to->avail);
Christoph Lameter3ded1752006-03-25 03:06:44 -0800945
946 if (!nr)
947 return 0;
948
949 memcpy(to->entry + to->avail, from->entry + from->avail -nr,
950 sizeof(void *) *nr);
951
952 from->avail -= nr;
953 to->avail += nr;
Christoph Lameter3ded1752006-03-25 03:06:44 -0800954 return nr;
955}
956
Christoph Lameter765c4502006-09-27 01:50:08 -0700957#ifndef CONFIG_NUMA
958
959#define drain_alien_cache(cachep, alien) do { } while (0)
Christoph Lameterce8eb6c2013-01-10 19:14:19 +0000960#define reap_alien(cachep, n) do { } while (0)
Christoph Lameter765c4502006-09-27 01:50:08 -0700961
Pekka Enberg83b519e2009-06-10 19:40:04 +0300962static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
Christoph Lameter765c4502006-09-27 01:50:08 -0700963{
964 return (struct array_cache **)BAD_ALIEN_MAGIC;
965}
966
967static inline void free_alien_cache(struct array_cache **ac_ptr)
968{
969}
970
971static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
972{
973 return 0;
974}
975
976static inline void *alternate_node_alloc(struct kmem_cache *cachep,
977 gfp_t flags)
978{
979 return NULL;
980}
981
Christoph Hellwig8b98c162006-12-06 20:32:30 -0800982static inline void *____cache_alloc_node(struct kmem_cache *cachep,
Christoph Lameter765c4502006-09-27 01:50:08 -0700983 gfp_t flags, int nodeid)
984{
985 return NULL;
986}
987
988#else /* CONFIG_NUMA */
989
Christoph Hellwig8b98c162006-12-06 20:32:30 -0800990static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
Paul Jacksonc61afb12006-03-24 03:16:08 -0800991static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
Christoph Lameterdc85da12006-01-18 17:42:36 -0800992
Pekka Enberg83b519e2009-06-10 19:40:04 +0300993static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
Christoph Lametere498be72005-09-09 13:03:32 -0700994{
995 struct array_cache **ac_ptr;
Christoph Lameter8ef82862007-02-20 13:57:52 -0800996 int memsize = sizeof(void *) * nr_node_ids;
Christoph Lametere498be72005-09-09 13:03:32 -0700997 int i;
998
999 if (limit > 1)
1000 limit = 12;
Haicheng Lif3186a92010-01-06 15:25:23 +08001001 ac_ptr = kzalloc_node(memsize, gfp, node);
Christoph Lametere498be72005-09-09 13:03:32 -07001002 if (ac_ptr) {
1003 for_each_node(i) {
Haicheng Lif3186a92010-01-06 15:25:23 +08001004 if (i == node || !node_online(i))
Christoph Lametere498be72005-09-09 13:03:32 -07001005 continue;
Pekka Enberg83b519e2009-06-10 19:40:04 +03001006 ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp);
Christoph Lametere498be72005-09-09 13:03:32 -07001007 if (!ac_ptr[i]) {
Akinobu Mitacc550de2007-11-14 16:58:35 -08001008 for (i--; i >= 0; i--)
Christoph Lametere498be72005-09-09 13:03:32 -07001009 kfree(ac_ptr[i]);
1010 kfree(ac_ptr);
1011 return NULL;
1012 }
1013 }
1014 }
1015 return ac_ptr;
1016}
1017
Pekka Enberg5295a742006-02-01 03:05:48 -08001018static void free_alien_cache(struct array_cache **ac_ptr)
Christoph Lametere498be72005-09-09 13:03:32 -07001019{
1020 int i;
1021
1022 if (!ac_ptr)
1023 return;
Christoph Lametere498be72005-09-09 13:03:32 -07001024 for_each_node(i)
Pekka Enbergb28a02d2006-01-08 01:00:37 -08001025 kfree(ac_ptr[i]);
Christoph Lametere498be72005-09-09 13:03:32 -07001026 kfree(ac_ptr);
1027}
1028
Pekka Enberg343e0d72006-02-01 03:05:50 -08001029static void __drain_alien_cache(struct kmem_cache *cachep,
Pekka Enberg5295a742006-02-01 03:05:48 -08001030 struct array_cache *ac, int node)
Christoph Lametere498be72005-09-09 13:03:32 -07001031{
Christoph Lameter18bf8542014-08-06 16:04:11 -07001032 struct kmem_cache_node *n = get_node(cachep, node);
Christoph Lametere498be72005-09-09 13:03:32 -07001033
1034 if (ac->avail) {
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001035 spin_lock(&n->list_lock);
Christoph Lametere00946f2006-03-25 03:06:45 -08001036 /*
1037 * Stuff objects into the remote nodes shared array first.
1038 * That way we could avoid the overhead of putting the objects
1039 * into the free lists and getting them back later.
1040 */
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001041 if (n->shared)
1042 transfer_objects(n->shared, ac, ac->limit);
Christoph Lametere00946f2006-03-25 03:06:45 -08001043
Christoph Lameterff694162005-09-22 21:44:02 -07001044 free_block(cachep, ac->entry, ac->avail, node);
Christoph Lametere498be72005-09-09 13:03:32 -07001045 ac->avail = 0;
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001046 spin_unlock(&n->list_lock);
Christoph Lametere498be72005-09-09 13:03:32 -07001047 }
1048}
1049
Christoph Lameter8fce4d82006-03-09 17:33:54 -08001050/*
1051 * Called from cache_reap() to regularly drain alien caches round robin.
1052 */
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001053static void reap_alien(struct kmem_cache *cachep, struct kmem_cache_node *n)
Christoph Lameter8fce4d82006-03-09 17:33:54 -08001054{
Christoph Lameter909ea962010-12-08 16:22:55 +01001055 int node = __this_cpu_read(slab_reap_node);
Christoph Lameter8fce4d82006-03-09 17:33:54 -08001056
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001057 if (n->alien) {
1058 struct array_cache *ac = n->alien[node];
Christoph Lametere00946f2006-03-25 03:06:45 -08001059
1060 if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
Christoph Lameter8fce4d82006-03-09 17:33:54 -08001061 __drain_alien_cache(cachep, ac, node);
1062 spin_unlock_irq(&ac->lock);
1063 }
1064 }
1065}
1066
Andrew Mortona737b3e2006-03-22 00:08:11 -08001067static void drain_alien_cache(struct kmem_cache *cachep,
1068 struct array_cache **alien)
Christoph Lametere498be72005-09-09 13:03:32 -07001069{
Pekka Enbergb28a02d2006-01-08 01:00:37 -08001070 int i = 0;
Christoph Lametere498be72005-09-09 13:03:32 -07001071 struct array_cache *ac;
1072 unsigned long flags;
1073
1074 for_each_online_node(i) {
Ravikiran G Thirumalai4484ebf2006-02-04 23:27:59 -08001075 ac = alien[i];
Christoph Lametere498be72005-09-09 13:03:32 -07001076 if (ac) {
1077 spin_lock_irqsave(&ac->lock, flags);
1078 __drain_alien_cache(cachep, ac, i);
1079 spin_unlock_irqrestore(&ac->lock, flags);
1080 }
1081 }
1082}
Pekka Enberg729bd0b2006-06-23 02:03:05 -07001083
Ingo Molnar873623d2006-07-13 14:44:38 +02001084static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
Pekka Enberg729bd0b2006-06-23 02:03:05 -07001085{
Joonsoo Kim1ea991b2013-10-24 10:07:40 +09001086 int nodeid = page_to_nid(virt_to_page(objp));
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001087 struct kmem_cache_node *n;
Pekka Enberg729bd0b2006-06-23 02:03:05 -07001088 struct array_cache *alien = NULL;
Pekka Enberg1ca4cb22006-10-06 00:43:52 -07001089 int node;
1090
Lee Schermerhorn7d6e6d02010-05-26 14:45:03 -07001091 node = numa_mem_id();
Pekka Enberg729bd0b2006-06-23 02:03:05 -07001092
1093 /*
1094 * Make sure we are not freeing a object from another node to the array
1095 * cache on this cpu.
1096 */
Joonsoo Kim1ea991b2013-10-24 10:07:40 +09001097 if (likely(nodeid == node))
Pekka Enberg729bd0b2006-06-23 02:03:05 -07001098 return 0;
1099
Christoph Lameter18bf8542014-08-06 16:04:11 -07001100 n = get_node(cachep, node);
Pekka Enberg729bd0b2006-06-23 02:03:05 -07001101 STATS_INC_NODEFREES(cachep);
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001102 if (n->alien && n->alien[nodeid]) {
1103 alien = n->alien[nodeid];
Ingo Molnar873623d2006-07-13 14:44:38 +02001104 spin_lock(&alien->lock);
Pekka Enberg729bd0b2006-06-23 02:03:05 -07001105 if (unlikely(alien->avail == alien->limit)) {
1106 STATS_INC_ACOVERFLOW(cachep);
1107 __drain_alien_cache(cachep, alien, nodeid);
1108 }
Mel Gorman072bb0a2012-07-31 16:43:58 -07001109 ac_put_obj(cachep, alien, objp);
Pekka Enberg729bd0b2006-06-23 02:03:05 -07001110 spin_unlock(&alien->lock);
1111 } else {
Christoph Lameter18bf8542014-08-06 16:04:11 -07001112 n = get_node(cachep, nodeid);
1113 spin_lock(&n->list_lock);
Pekka Enberg729bd0b2006-06-23 02:03:05 -07001114 free_block(cachep, &objp, 1, nodeid);
Christoph Lameter18bf8542014-08-06 16:04:11 -07001115 spin_unlock(&n->list_lock);
Pekka Enberg729bd0b2006-06-23 02:03:05 -07001116 }
1117 return 1;
1118}
Christoph Lametere498be72005-09-09 13:03:32 -07001119#endif
1120
David Rientjes8f9f8d92010-03-27 19:40:47 -07001121/*
Christoph Lameter6a673682013-01-10 19:14:19 +00001122 * Allocates and initializes node for a node on each slab cache, used for
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001123 * either memory or cpu hotplug. If memory is being hot-added, the kmem_cache_node
David Rientjes8f9f8d92010-03-27 19:40:47 -07001124 * will be allocated off-node since memory is not yet online for the new node.
Christoph Lameter6a673682013-01-10 19:14:19 +00001125 * When hotplugging memory or a cpu, existing node are not replaced if
David Rientjes8f9f8d92010-03-27 19:40:47 -07001126 * already in use.
1127 *
Christoph Lameter18004c52012-07-06 15:25:12 -05001128 * Must hold slab_mutex.
David Rientjes8f9f8d92010-03-27 19:40:47 -07001129 */
Christoph Lameter6a673682013-01-10 19:14:19 +00001130static int init_cache_node_node(int node)
David Rientjes8f9f8d92010-03-27 19:40:47 -07001131{
1132 struct kmem_cache *cachep;
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001133 struct kmem_cache_node *n;
Christoph Lameter6744f082013-01-10 19:12:17 +00001134 const int memsize = sizeof(struct kmem_cache_node);
David Rientjes8f9f8d92010-03-27 19:40:47 -07001135
Christoph Lameter18004c52012-07-06 15:25:12 -05001136 list_for_each_entry(cachep, &slab_caches, list) {
David Rientjes8f9f8d92010-03-27 19:40:47 -07001137 /*
Jianyu Zhan5f0985b2014-03-30 17:02:20 +08001138 * Set up the kmem_cache_node for cpu before we can
David Rientjes8f9f8d92010-03-27 19:40:47 -07001139 * begin anything. Make sure some other cpu on this
1140 * node has not already allocated this
1141 */
Christoph Lameter18bf8542014-08-06 16:04:11 -07001142 n = get_node(cachep, node);
1143 if (!n) {
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001144 n = kmalloc_node(memsize, GFP_KERNEL, node);
1145 if (!n)
David Rientjes8f9f8d92010-03-27 19:40:47 -07001146 return -ENOMEM;
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001147 kmem_cache_node_init(n);
Jianyu Zhan5f0985b2014-03-30 17:02:20 +08001148 n->next_reap = jiffies + REAPTIMEOUT_NODE +
1149 ((unsigned long)cachep) % REAPTIMEOUT_NODE;
David Rientjes8f9f8d92010-03-27 19:40:47 -07001150
1151 /*
Jianyu Zhan5f0985b2014-03-30 17:02:20 +08001152 * The kmem_cache_nodes don't come and go as CPUs
1153 * come and go. slab_mutex is sufficient
David Rientjes8f9f8d92010-03-27 19:40:47 -07001154 * protection here.
1155 */
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001156 cachep->node[node] = n;
David Rientjes8f9f8d92010-03-27 19:40:47 -07001157 }
1158
Christoph Lameter18bf8542014-08-06 16:04:11 -07001159 spin_lock_irq(&n->list_lock);
1160 n->free_limit =
David Rientjes8f9f8d92010-03-27 19:40:47 -07001161 (1 + nr_cpus_node(node)) *
1162 cachep->batchcount + cachep->num;
Christoph Lameter18bf8542014-08-06 16:04:11 -07001163 spin_unlock_irq(&n->list_lock);
David Rientjes8f9f8d92010-03-27 19:40:47 -07001164 }
1165 return 0;
1166}
1167
Wanpeng Li0fa81032013-07-04 08:33:22 +08001168static inline int slabs_tofree(struct kmem_cache *cachep,
1169 struct kmem_cache_node *n)
1170{
1171 return (n->free_objects + cachep->num - 1) / cachep->num;
1172}
1173
Paul Gortmaker0db06282013-06-19 14:53:51 -04001174static void cpuup_canceled(long cpu)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175{
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001176 struct kmem_cache *cachep;
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001177 struct kmem_cache_node *n = NULL;
Lee Schermerhorn7d6e6d02010-05-26 14:45:03 -07001178 int node = cpu_to_mem(cpu);
Rusty Russella70f7302009-03-13 14:49:46 +10301179 const struct cpumask *mask = cpumask_of_node(node);
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001180
Christoph Lameter18004c52012-07-06 15:25:12 -05001181 list_for_each_entry(cachep, &slab_caches, list) {
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001182 struct array_cache *nc;
1183 struct array_cache *shared;
1184 struct array_cache **alien;
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001185
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001186 /* cpu is dead; no one can alloc from it. */
1187 nc = cachep->array[cpu];
1188 cachep->array[cpu] = NULL;
Christoph Lameter18bf8542014-08-06 16:04:11 -07001189 n = get_node(cachep, node);
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001190
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001191 if (!n)
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001192 goto free_array_cache;
1193
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001194 spin_lock_irq(&n->list_lock);
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001195
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001196 /* Free limit for this kmem_cache_node */
1197 n->free_limit -= cachep->batchcount;
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001198 if (nc)
1199 free_block(cachep, nc->entry, nc->avail, node);
1200
Rusty Russell58463c12009-12-17 11:43:12 -06001201 if (!cpumask_empty(mask)) {
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001202 spin_unlock_irq(&n->list_lock);
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001203 goto free_array_cache;
1204 }
1205
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001206 shared = n->shared;
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001207 if (shared) {
1208 free_block(cachep, shared->entry,
1209 shared->avail, node);
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001210 n->shared = NULL;
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001211 }
1212
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001213 alien = n->alien;
1214 n->alien = NULL;
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001215
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001216 spin_unlock_irq(&n->list_lock);
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001217
1218 kfree(shared);
1219 if (alien) {
1220 drain_alien_cache(cachep, alien);
1221 free_alien_cache(alien);
1222 }
1223free_array_cache:
1224 kfree(nc);
1225 }
1226 /*
1227 * In the previous loop, all the objects were freed to
1228 * the respective cache's slabs, now we can go ahead and
1229 * shrink each nodelist to its limit.
1230 */
Christoph Lameter18004c52012-07-06 15:25:12 -05001231 list_for_each_entry(cachep, &slab_caches, list) {
Christoph Lameter18bf8542014-08-06 16:04:11 -07001232 n = get_node(cachep, node);
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001233 if (!n)
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001234 continue;
Wanpeng Li0fa81032013-07-04 08:33:22 +08001235 drain_freelist(cachep, n, slabs_tofree(cachep, n));
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001236 }
1237}
1238
Paul Gortmaker0db06282013-06-19 14:53:51 -04001239static int cpuup_prepare(long cpu)
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001240{
Pekka Enberg343e0d72006-02-01 03:05:50 -08001241 struct kmem_cache *cachep;
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001242 struct kmem_cache_node *n = NULL;
Lee Schermerhorn7d6e6d02010-05-26 14:45:03 -07001243 int node = cpu_to_mem(cpu);
David Rientjes8f9f8d92010-03-27 19:40:47 -07001244 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001245
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001246 /*
1247 * We need to do this right in the beginning since
1248 * alloc_arraycache's are going to use this list.
1249 * kmalloc_node allows us to add the slab to the right
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001250 * kmem_cache_node and not this cpu's kmem_cache_node
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001251 */
Christoph Lameter6a673682013-01-10 19:14:19 +00001252 err = init_cache_node_node(node);
David Rientjes8f9f8d92010-03-27 19:40:47 -07001253 if (err < 0)
1254 goto bad;
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001255
1256 /*
1257 * Now we can go ahead with allocating the shared arrays and
1258 * array caches
1259 */
Christoph Lameter18004c52012-07-06 15:25:12 -05001260 list_for_each_entry(cachep, &slab_caches, list) {
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001261 struct array_cache *nc;
1262 struct array_cache *shared = NULL;
1263 struct array_cache **alien = NULL;
1264
1265 nc = alloc_arraycache(node, cachep->limit,
Pekka Enberg83b519e2009-06-10 19:40:04 +03001266 cachep->batchcount, GFP_KERNEL);
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001267 if (!nc)
1268 goto bad;
1269 if (cachep->shared) {
1270 shared = alloc_arraycache(node,
1271 cachep->shared * cachep->batchcount,
Pekka Enberg83b519e2009-06-10 19:40:04 +03001272 0xbaadf00d, GFP_KERNEL);
Akinobu Mita12d00f62007-10-18 03:05:11 -07001273 if (!shared) {
1274 kfree(nc);
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001275 goto bad;
Akinobu Mita12d00f62007-10-18 03:05:11 -07001276 }
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001277 }
1278 if (use_alien_caches) {
Pekka Enberg83b519e2009-06-10 19:40:04 +03001279 alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL);
Akinobu Mita12d00f62007-10-18 03:05:11 -07001280 if (!alien) {
1281 kfree(shared);
1282 kfree(nc);
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001283 goto bad;
Akinobu Mita12d00f62007-10-18 03:05:11 -07001284 }
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001285 }
1286 cachep->array[cpu] = nc;
Christoph Lameter18bf8542014-08-06 16:04:11 -07001287 n = get_node(cachep, node);
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001288 BUG_ON(!n);
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001289
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001290 spin_lock_irq(&n->list_lock);
1291 if (!n->shared) {
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001292 /*
1293 * We are serialised from CPU_DEAD or
1294 * CPU_UP_CANCELLED by the cpucontrol lock
1295 */
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001296 n->shared = shared;
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001297 shared = NULL;
1298 }
1299#ifdef CONFIG_NUMA
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001300 if (!n->alien) {
1301 n->alien = alien;
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001302 alien = NULL;
1303 }
1304#endif
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001305 spin_unlock_irq(&n->list_lock);
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001306 kfree(shared);
1307 free_alien_cache(alien);
Peter Zijlstra83835b32011-07-22 15:26:05 +02001308 if (cachep->flags & SLAB_DEBUG_OBJECTS)
Christoph Lameter18bf8542014-08-06 16:04:11 -07001309 slab_set_debugobj_lock_classes_node(cachep, n);
Glauber Costa6ccfb5b2012-12-18 14:22:31 -08001310 else if (!OFF_SLAB(cachep) &&
1311 !(cachep->flags & SLAB_DESTROY_BY_RCU))
Christoph Lameter18bf8542014-08-06 16:04:11 -07001312 on_slab_lock_classes_node(cachep, n);
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001313 }
Pekka Enbergce79ddc2009-11-23 22:01:15 +02001314 init_node_lock_keys(node);
1315
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001316 return 0;
1317bad:
Akinobu Mita12d00f62007-10-18 03:05:11 -07001318 cpuup_canceled(cpu);
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001319 return -ENOMEM;
1320}
1321
Paul Gortmaker0db06282013-06-19 14:53:51 -04001322static int cpuup_callback(struct notifier_block *nfb,
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001323 unsigned long action, void *hcpu)
1324{
1325 long cpu = (long)hcpu;
1326 int err = 0;
1327
Linus Torvalds1da177e2005-04-16 15:20:36 -07001328 switch (action) {
Heiko Carstens38c3bd92007-05-09 02:34:05 -07001329 case CPU_UP_PREPARE:
Rafael J. Wysocki8bb78442007-05-09 02:35:10 -07001330 case CPU_UP_PREPARE_FROZEN:
Christoph Lameter18004c52012-07-06 15:25:12 -05001331 mutex_lock(&slab_mutex);
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001332 err = cpuup_prepare(cpu);
Christoph Lameter18004c52012-07-06 15:25:12 -05001333 mutex_unlock(&slab_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334 break;
1335 case CPU_ONLINE:
Rafael J. Wysocki8bb78442007-05-09 02:35:10 -07001336 case CPU_ONLINE_FROZEN:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001337 start_cpu_timer(cpu);
1338 break;
1339#ifdef CONFIG_HOTPLUG_CPU
Christoph Lameter5830c592007-05-09 02:34:22 -07001340 case CPU_DOWN_PREPARE:
Rafael J. Wysocki8bb78442007-05-09 02:35:10 -07001341 case CPU_DOWN_PREPARE_FROZEN:
Christoph Lameter5830c592007-05-09 02:34:22 -07001342 /*
Christoph Lameter18004c52012-07-06 15:25:12 -05001343 * Shutdown cache reaper. Note that the slab_mutex is
Christoph Lameter5830c592007-05-09 02:34:22 -07001344 * held so that if cache_reap() is invoked it cannot do
1345 * anything expensive but will only modify reap_work
1346 * and reschedule the timer.
1347 */
Tejun Heoafe2c512010-12-14 16:21:17 +01001348 cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu));
Christoph Lameter5830c592007-05-09 02:34:22 -07001349 /* Now the cache_reaper is guaranteed to be not running. */
Tejun Heo1871e522009-10-29 22:34:13 +09001350 per_cpu(slab_reap_work, cpu).work.func = NULL;
Christoph Lameter5830c592007-05-09 02:34:22 -07001351 break;
1352 case CPU_DOWN_FAILED:
Rafael J. Wysocki8bb78442007-05-09 02:35:10 -07001353 case CPU_DOWN_FAILED_FROZEN:
Christoph Lameter5830c592007-05-09 02:34:22 -07001354 start_cpu_timer(cpu);
1355 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001356 case CPU_DEAD:
Rafael J. Wysocki8bb78442007-05-09 02:35:10 -07001357 case CPU_DEAD_FROZEN:
Ravikiran G Thirumalai4484ebf2006-02-04 23:27:59 -08001358 /*
1359 * Even if all the cpus of a node are down, we don't free the
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001360 * kmem_cache_node of any cache. This to avoid a race between
Ravikiran G Thirumalai4484ebf2006-02-04 23:27:59 -08001361 * cpu_down, and a kmalloc allocation from another cpu for
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001362 * memory from the node of the cpu going down. The node
Ravikiran G Thirumalai4484ebf2006-02-04 23:27:59 -08001363 * structure is usually allocated from kmem_cache_create() and
1364 * gets destroyed at kmem_cache_destroy().
1365 */
Simon Arlott183ff222007-10-20 01:27:18 +02001366 /* fall through */
Ravikiran G Thirumalai8f5be202006-12-06 20:32:14 -08001367#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001368 case CPU_UP_CANCELED:
Rafael J. Wysocki8bb78442007-05-09 02:35:10 -07001369 case CPU_UP_CANCELED_FROZEN:
Christoph Lameter18004c52012-07-06 15:25:12 -05001370 mutex_lock(&slab_mutex);
Akinobu Mitafbf1e472007-10-18 03:05:09 -07001371 cpuup_canceled(cpu);
Christoph Lameter18004c52012-07-06 15:25:12 -05001372 mutex_unlock(&slab_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001373 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001374 }
Akinobu Mitaeac40682010-05-26 14:43:32 -07001375 return notifier_from_errno(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001376}
1377
Paul Gortmaker0db06282013-06-19 14:53:51 -04001378static struct notifier_block cpucache_notifier = {
Chandra Seetharaman74b85f32006-06-27 02:54:09 -07001379 &cpuup_callback, NULL, 0
1380};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001381
David Rientjes8f9f8d92010-03-27 19:40:47 -07001382#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
1383/*
1384 * Drains freelist for a node on each slab cache, used for memory hot-remove.
1385 * Returns -EBUSY if all objects cannot be drained so that the node is not
1386 * removed.
1387 *
Christoph Lameter18004c52012-07-06 15:25:12 -05001388 * Must hold slab_mutex.
David Rientjes8f9f8d92010-03-27 19:40:47 -07001389 */
Christoph Lameter6a673682013-01-10 19:14:19 +00001390static int __meminit drain_cache_node_node(int node)
David Rientjes8f9f8d92010-03-27 19:40:47 -07001391{
1392 struct kmem_cache *cachep;
1393 int ret = 0;
1394
Christoph Lameter18004c52012-07-06 15:25:12 -05001395 list_for_each_entry(cachep, &slab_caches, list) {
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001396 struct kmem_cache_node *n;
David Rientjes8f9f8d92010-03-27 19:40:47 -07001397
Christoph Lameter18bf8542014-08-06 16:04:11 -07001398 n = get_node(cachep, node);
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001399 if (!n)
David Rientjes8f9f8d92010-03-27 19:40:47 -07001400 continue;
1401
Wanpeng Li0fa81032013-07-04 08:33:22 +08001402 drain_freelist(cachep, n, slabs_tofree(cachep, n));
David Rientjes8f9f8d92010-03-27 19:40:47 -07001403
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001404 if (!list_empty(&n->slabs_full) ||
1405 !list_empty(&n->slabs_partial)) {
David Rientjes8f9f8d92010-03-27 19:40:47 -07001406 ret = -EBUSY;
1407 break;
1408 }
1409 }
1410 return ret;
1411}
1412
1413static int __meminit slab_memory_callback(struct notifier_block *self,
1414 unsigned long action, void *arg)
1415{
1416 struct memory_notify *mnb = arg;
1417 int ret = 0;
1418 int nid;
1419
1420 nid = mnb->status_change_nid;
1421 if (nid < 0)
1422 goto out;
1423
1424 switch (action) {
1425 case MEM_GOING_ONLINE:
Christoph Lameter18004c52012-07-06 15:25:12 -05001426 mutex_lock(&slab_mutex);
Christoph Lameter6a673682013-01-10 19:14:19 +00001427 ret = init_cache_node_node(nid);
Christoph Lameter18004c52012-07-06 15:25:12 -05001428 mutex_unlock(&slab_mutex);
David Rientjes8f9f8d92010-03-27 19:40:47 -07001429 break;
1430 case MEM_GOING_OFFLINE:
Christoph Lameter18004c52012-07-06 15:25:12 -05001431 mutex_lock(&slab_mutex);
Christoph Lameter6a673682013-01-10 19:14:19 +00001432 ret = drain_cache_node_node(nid);
Christoph Lameter18004c52012-07-06 15:25:12 -05001433 mutex_unlock(&slab_mutex);
David Rientjes8f9f8d92010-03-27 19:40:47 -07001434 break;
1435 case MEM_ONLINE:
1436 case MEM_OFFLINE:
1437 case MEM_CANCEL_ONLINE:
1438 case MEM_CANCEL_OFFLINE:
1439 break;
1440 }
1441out:
Prarit Bhargava5fda1bd2011-03-22 16:30:49 -07001442 return notifier_from_errno(ret);
David Rientjes8f9f8d92010-03-27 19:40:47 -07001443}
1444#endif /* CONFIG_NUMA && CONFIG_MEMORY_HOTPLUG */
1445
Christoph Lametere498be72005-09-09 13:03:32 -07001446/*
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001447 * swap the static kmem_cache_node with kmalloced memory
Christoph Lametere498be72005-09-09 13:03:32 -07001448 */
Christoph Lameter6744f082013-01-10 19:12:17 +00001449static void __init init_list(struct kmem_cache *cachep, struct kmem_cache_node *list,
David Rientjes8f9f8d92010-03-27 19:40:47 -07001450 int nodeid)
Christoph Lametere498be72005-09-09 13:03:32 -07001451{
Christoph Lameter6744f082013-01-10 19:12:17 +00001452 struct kmem_cache_node *ptr;
Christoph Lametere498be72005-09-09 13:03:32 -07001453
Christoph Lameter6744f082013-01-10 19:12:17 +00001454 ptr = kmalloc_node(sizeof(struct kmem_cache_node), GFP_NOWAIT, nodeid);
Christoph Lametere498be72005-09-09 13:03:32 -07001455 BUG_ON(!ptr);
1456
Christoph Lameter6744f082013-01-10 19:12:17 +00001457 memcpy(ptr, list, sizeof(struct kmem_cache_node));
Ingo Molnar2b2d5492006-07-03 00:25:28 -07001458 /*
1459 * Do not assume that spinlocks can be initialized via memcpy:
1460 */
1461 spin_lock_init(&ptr->list_lock);
1462
Christoph Lametere498be72005-09-09 13:03:32 -07001463 MAKE_ALL_LISTS(cachep, ptr, nodeid);
Christoph Lameter6a673682013-01-10 19:14:19 +00001464 cachep->node[nodeid] = ptr;
Christoph Lametere498be72005-09-09 13:03:32 -07001465}
1466
Andrew Mortona737b3e2006-03-22 00:08:11 -08001467/*
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001468 * For setting up all the kmem_cache_node for cache whose buffer_size is same as
1469 * size of kmem_cache_node.
Pekka Enberg556a1692008-01-25 08:20:51 +02001470 */
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001471static void __init set_up_node(struct kmem_cache *cachep, int index)
Pekka Enberg556a1692008-01-25 08:20:51 +02001472{
1473 int node;
1474
1475 for_each_online_node(node) {
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001476 cachep->node[node] = &init_kmem_cache_node[index + node];
Christoph Lameter6a673682013-01-10 19:14:19 +00001477 cachep->node[node]->next_reap = jiffies +
Jianyu Zhan5f0985b2014-03-30 17:02:20 +08001478 REAPTIMEOUT_NODE +
1479 ((unsigned long)cachep) % REAPTIMEOUT_NODE;
Pekka Enberg556a1692008-01-25 08:20:51 +02001480 }
1481}
1482
1483/*
Christoph Lameter3c583462012-11-28 16:23:01 +00001484 * The memory after the last cpu cache pointer is used for the
Christoph Lameter6a673682013-01-10 19:14:19 +00001485 * the node pointer.
Christoph Lameter3c583462012-11-28 16:23:01 +00001486 */
Christoph Lameter6a673682013-01-10 19:14:19 +00001487static void setup_node_pointer(struct kmem_cache *cachep)
Christoph Lameter3c583462012-11-28 16:23:01 +00001488{
Christoph Lameter6a673682013-01-10 19:14:19 +00001489 cachep->node = (struct kmem_cache_node **)&cachep->array[nr_cpu_ids];
Christoph Lameter3c583462012-11-28 16:23:01 +00001490}
1491
1492/*
Andrew Mortona737b3e2006-03-22 00:08:11 -08001493 * Initialisation. Called after the page allocator have been initialised and
1494 * before smp_init().
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495 */
1496void __init kmem_cache_init(void)
1497{
Christoph Lametere498be72005-09-09 13:03:32 -07001498 int i;
1499
Joonsoo Kim68126702013-10-24 10:07:42 +09001500 BUILD_BUG_ON(sizeof(((struct page *)NULL)->lru) <
1501 sizeof(struct rcu_head));
Christoph Lameter9b030cb2012-09-05 00:20:33 +00001502 kmem_cache = &kmem_cache_boot;
Christoph Lameter6a673682013-01-10 19:14:19 +00001503 setup_node_pointer(kmem_cache);
Christoph Lameter9b030cb2012-09-05 00:20:33 +00001504
Mel Gormanb6e68bc2009-06-16 15:32:16 -07001505 if (num_possible_nodes() == 1)
Siddha, Suresh B62918a02007-05-02 19:27:18 +02001506 use_alien_caches = 0;
1507
Christoph Lameter3c583462012-11-28 16:23:01 +00001508 for (i = 0; i < NUM_INIT_LISTS; i++)
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001509 kmem_cache_node_init(&init_kmem_cache_node[i]);
Christoph Lameter3c583462012-11-28 16:23:01 +00001510
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001511 set_up_node(kmem_cache, CACHE_CACHE);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001512
1513 /*
1514 * Fragmentation resistance on low memory - only use bigger
David Rientjes3df1ccc2011-10-18 22:09:28 -07001515 * page orders on machines with more than 32MB of memory if
1516 * not overridden on the command line.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001517 */
David Rientjes3df1ccc2011-10-18 22:09:28 -07001518 if (!slab_max_order_set && totalram_pages > (32 << 20) >> PAGE_SHIFT)
David Rientjes543585c2011-10-18 22:09:24 -07001519 slab_max_order = SLAB_MAX_ORDER_HI;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520
Linus Torvalds1da177e2005-04-16 15:20:36 -07001521 /* Bootstrap is tricky, because several objects are allocated
1522 * from caches that do not exist yet:
Christoph Lameter9b030cb2012-09-05 00:20:33 +00001523 * 1) initialize the kmem_cache cache: it contains the struct
1524 * kmem_cache structures of all caches, except kmem_cache itself:
1525 * kmem_cache is statically allocated.
Christoph Lametere498be72005-09-09 13:03:32 -07001526 * Initially an __init data area is used for the head array and the
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001527 * kmem_cache_node structures, it's replaced with a kmalloc allocated
Christoph Lametere498be72005-09-09 13:03:32 -07001528 * array at the end of the bootstrap.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001529 * 2) Create the first kmalloc cache.
Pekka Enberg343e0d72006-02-01 03:05:50 -08001530 * The struct kmem_cache for the new cache is allocated normally.
Christoph Lametere498be72005-09-09 13:03:32 -07001531 * An __init data area is used for the head array.
1532 * 3) Create the remaining kmalloc caches, with minimally sized
1533 * head arrays.
Christoph Lameter9b030cb2012-09-05 00:20:33 +00001534 * 4) Replace the __init data head arrays for kmem_cache and the first
Linus Torvalds1da177e2005-04-16 15:20:36 -07001535 * kmalloc cache with kmalloc allocated arrays.
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001536 * 5) Replace the __init data for kmem_cache_node for kmem_cache and
Christoph Lametere498be72005-09-09 13:03:32 -07001537 * the other cache's with kmalloc allocated memory.
1538 * 6) Resize the head arrays of the kmalloc caches to their final sizes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001539 */
1540
Christoph Lameter9b030cb2012-09-05 00:20:33 +00001541 /* 1) create the kmem_cache */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001542
Eric Dumazet8da34302007-05-06 14:49:29 -07001543 /*
Eric Dumazetb56efcf2011-07-20 19:04:23 +02001544 * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
Eric Dumazet8da34302007-05-06 14:49:29 -07001545 */
Christoph Lameter2f9baa92012-11-28 16:23:09 +00001546 create_boot_cache(kmem_cache, "kmem_cache",
1547 offsetof(struct kmem_cache, array[nr_cpu_ids]) +
Christoph Lameter6744f082013-01-10 19:12:17 +00001548 nr_node_ids * sizeof(struct kmem_cache_node *),
Christoph Lameter2f9baa92012-11-28 16:23:09 +00001549 SLAB_HWCACHE_ALIGN);
1550 list_add(&kmem_cache->list, &slab_caches);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001551
1552 /* 2+3) create the kmalloc caches */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001553
Andrew Mortona737b3e2006-03-22 00:08:11 -08001554 /*
1555 * Initialize the caches that provide memory for the array cache and the
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001556 * kmem_cache_node structures first. Without this, further allocations will
Andrew Mortona737b3e2006-03-22 00:08:11 -08001557 * bug.
Christoph Lametere498be72005-09-09 13:03:32 -07001558 */
1559
Christoph Lametere3366012013-01-10 19:14:18 +00001560 kmalloc_caches[INDEX_AC] = create_kmalloc_cache("kmalloc-ac",
1561 kmalloc_size(INDEX_AC), ARCH_KMALLOC_FLAGS);
Christoph Lametere498be72005-09-09 13:03:32 -07001562
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001563 if (INDEX_AC != INDEX_NODE)
1564 kmalloc_caches[INDEX_NODE] =
1565 create_kmalloc_cache("kmalloc-node",
1566 kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS);
Christoph Lametere498be72005-09-09 13:03:32 -07001567
Ingo Molnare0a42722006-06-23 02:03:46 -07001568 slab_early_init = 0;
1569
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570 /* 4) Replace the bootstrap head arrays */
1571 {
Ingo Molnar2b2d5492006-07-03 00:25:28 -07001572 struct array_cache *ptr;
Christoph Lametere498be72005-09-09 13:03:32 -07001573
Pekka Enberg83b519e2009-06-10 19:40:04 +03001574 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
Christoph Lametere498be72005-09-09 13:03:32 -07001575
Christoph Lameter9b030cb2012-09-05 00:20:33 +00001576 memcpy(ptr, cpu_cache_get(kmem_cache),
Pekka Enbergb28a02d2006-01-08 01:00:37 -08001577 sizeof(struct arraycache_init));
Ingo Molnar2b2d5492006-07-03 00:25:28 -07001578 /*
1579 * Do not assume that spinlocks can be initialized via memcpy:
1580 */
1581 spin_lock_init(&ptr->lock);
1582
Christoph Lameter9b030cb2012-09-05 00:20:33 +00001583 kmem_cache->array[smp_processor_id()] = ptr;
Christoph Lametere498be72005-09-09 13:03:32 -07001584
Pekka Enberg83b519e2009-06-10 19:40:04 +03001585 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
Christoph Lametere498be72005-09-09 13:03:32 -07001586
Christoph Lametere3366012013-01-10 19:14:18 +00001587 BUG_ON(cpu_cache_get(kmalloc_caches[INDEX_AC])
Pekka Enbergb28a02d2006-01-08 01:00:37 -08001588 != &initarray_generic.cache);
Christoph Lametere3366012013-01-10 19:14:18 +00001589 memcpy(ptr, cpu_cache_get(kmalloc_caches[INDEX_AC]),
Pekka Enbergb28a02d2006-01-08 01:00:37 -08001590 sizeof(struct arraycache_init));
Ingo Molnar2b2d5492006-07-03 00:25:28 -07001591 /*
1592 * Do not assume that spinlocks can be initialized via memcpy:
1593 */
1594 spin_lock_init(&ptr->lock);
1595
Christoph Lametere3366012013-01-10 19:14:18 +00001596 kmalloc_caches[INDEX_AC]->array[smp_processor_id()] = ptr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001597 }
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001598 /* 5) Replace the bootstrap kmem_cache_node */
Christoph Lametere498be72005-09-09 13:03:32 -07001599 {
Pekka Enberg1ca4cb22006-10-06 00:43:52 -07001600 int nid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001601
Mel Gorman9c09a952008-01-24 05:49:54 -08001602 for_each_online_node(nid) {
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001603 init_list(kmem_cache, &init_kmem_cache_node[CACHE_CACHE + nid], nid);
Pekka Enberg556a1692008-01-25 08:20:51 +02001604
Christoph Lametere3366012013-01-10 19:14:18 +00001605 init_list(kmalloc_caches[INDEX_AC],
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001606 &init_kmem_cache_node[SIZE_AC + nid], nid);
Christoph Lametere498be72005-09-09 13:03:32 -07001607
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001608 if (INDEX_AC != INDEX_NODE) {
1609 init_list(kmalloc_caches[INDEX_NODE],
1610 &init_kmem_cache_node[SIZE_NODE + nid], nid);
Christoph Lametere498be72005-09-09 13:03:32 -07001611 }
1612 }
1613 }
1614
Christoph Lameterf97d5f62013-01-10 19:12:17 +00001615 create_kmalloc_caches(ARCH_KMALLOC_FLAGS);
Pekka Enberg8429db52009-06-12 15:58:59 +03001616}
Ravikiran G Thirumalai056c6242006-09-25 23:31:38 -07001617
Pekka Enberg8429db52009-06-12 15:58:59 +03001618void __init kmem_cache_init_late(void)
1619{
1620 struct kmem_cache *cachep;
1621
Christoph Lameter97d06602012-07-06 15:25:11 -05001622 slab_state = UP;
Peter Zijlstra52cef182011-11-28 21:12:40 +01001623
Pekka Enberg8429db52009-06-12 15:58:59 +03001624 /* 6) resize the head arrays to their final sizes */
Christoph Lameter18004c52012-07-06 15:25:12 -05001625 mutex_lock(&slab_mutex);
1626 list_for_each_entry(cachep, &slab_caches, list)
Pekka Enberg8429db52009-06-12 15:58:59 +03001627 if (enable_cpucache(cachep, GFP_NOWAIT))
1628 BUG();
Christoph Lameter18004c52012-07-06 15:25:12 -05001629 mutex_unlock(&slab_mutex);
Ravikiran G Thirumalai056c6242006-09-25 23:31:38 -07001630
Michael Wang947ca182012-09-05 10:33:18 +08001631 /* Annotate slab for lockdep -- annotate the malloc caches */
1632 init_lock_keys();
1633
Christoph Lameter97d06602012-07-06 15:25:11 -05001634 /* Done! */
1635 slab_state = FULL;
1636
Andrew Mortona737b3e2006-03-22 00:08:11 -08001637 /*
1638 * Register a cpu startup notifier callback that initializes
1639 * cpu_cache_get for all new cpus
Linus Torvalds1da177e2005-04-16 15:20:36 -07001640 */
1641 register_cpu_notifier(&cpucache_notifier);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001642
David Rientjes8f9f8d92010-03-27 19:40:47 -07001643#ifdef CONFIG_NUMA
1644 /*
1645 * Register a memory hotplug callback that initializes and frees
Christoph Lameter6a673682013-01-10 19:14:19 +00001646 * node.
David Rientjes8f9f8d92010-03-27 19:40:47 -07001647 */
1648 hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
1649#endif
1650
Andrew Mortona737b3e2006-03-22 00:08:11 -08001651 /*
1652 * The reap timers are started later, with a module init call: That part
1653 * of the kernel is not yet operational.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001654 */
1655}
1656
1657static int __init cpucache_init(void)
1658{
1659 int cpu;
1660
Andrew Mortona737b3e2006-03-22 00:08:11 -08001661 /*
1662 * Register the timers that return unneeded pages to the page allocator
Linus Torvalds1da177e2005-04-16 15:20:36 -07001663 */
Christoph Lametere498be72005-09-09 13:03:32 -07001664 for_each_online_cpu(cpu)
Andrew Mortona737b3e2006-03-22 00:08:11 -08001665 start_cpu_timer(cpu);
Glauber Costaa164f8962012-06-21 00:59:18 +04001666
1667 /* Done! */
Christoph Lameter97d06602012-07-06 15:25:11 -05001668 slab_state = FULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001669 return 0;
1670}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001671__initcall(cpucache_init);
1672
Rafael Aquini8bdec192012-03-09 17:27:27 -03001673static noinline void
1674slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
1675{
David Rientjes9a02d692014-06-04 16:06:36 -07001676#if DEBUG
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001677 struct kmem_cache_node *n;
Joonsoo Kim8456a642013-10-24 10:07:49 +09001678 struct page *page;
Rafael Aquini8bdec192012-03-09 17:27:27 -03001679 unsigned long flags;
1680 int node;
David Rientjes9a02d692014-06-04 16:06:36 -07001681 static DEFINE_RATELIMIT_STATE(slab_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
1682 DEFAULT_RATELIMIT_BURST);
1683
1684 if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slab_oom_rs))
1685 return;
Rafael Aquini8bdec192012-03-09 17:27:27 -03001686
1687 printk(KERN_WARNING
1688 "SLAB: Unable to allocate memory on node %d (gfp=0x%x)\n",
1689 nodeid, gfpflags);
1690 printk(KERN_WARNING " cache: %s, object size: %d, order: %d\n",
Christoph Lameter3b0efdf2012-06-13 10:24:57 -05001691 cachep->name, cachep->size, cachep->gfporder);
Rafael Aquini8bdec192012-03-09 17:27:27 -03001692
Christoph Lameter18bf8542014-08-06 16:04:11 -07001693 for_each_kmem_cache_node(cachep, node, n) {
Rafael Aquini8bdec192012-03-09 17:27:27 -03001694 unsigned long active_objs = 0, num_objs = 0, free_objects = 0;
1695 unsigned long active_slabs = 0, num_slabs = 0;
1696
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001697 spin_lock_irqsave(&n->list_lock, flags);
Joonsoo Kim8456a642013-10-24 10:07:49 +09001698 list_for_each_entry(page, &n->slabs_full, lru) {
Rafael Aquini8bdec192012-03-09 17:27:27 -03001699 active_objs += cachep->num;
1700 active_slabs++;
1701 }
Joonsoo Kim8456a642013-10-24 10:07:49 +09001702 list_for_each_entry(page, &n->slabs_partial, lru) {
1703 active_objs += page->active;
Rafael Aquini8bdec192012-03-09 17:27:27 -03001704 active_slabs++;
1705 }
Joonsoo Kim8456a642013-10-24 10:07:49 +09001706 list_for_each_entry(page, &n->slabs_free, lru)
Rafael Aquini8bdec192012-03-09 17:27:27 -03001707 num_slabs++;
1708
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00001709 free_objects += n->free_objects;
1710 spin_unlock_irqrestore(&n->list_lock, flags);
Rafael Aquini8bdec192012-03-09 17:27:27 -03001711
1712 num_slabs += active_slabs;
1713 num_objs = num_slabs * cachep->num;
1714 printk(KERN_WARNING
1715 " node %d: slabs: %ld/%ld, objs: %ld/%ld, free: %ld\n",
1716 node, active_slabs, num_slabs, active_objs, num_objs,
1717 free_objects);
1718 }
David Rientjes9a02d692014-06-04 16:06:36 -07001719#endif
Rafael Aquini8bdec192012-03-09 17:27:27 -03001720}
1721
Linus Torvalds1da177e2005-04-16 15:20:36 -07001722/*
1723 * Interface to system's page allocator. No need to hold the cache-lock.
1724 *
1725 * If we requested dmaable memory, we will get it. Even if we
1726 * did not request dmaable memory, we might get it, but that
1727 * would be relatively rare and ignorable.
1728 */
Joonsoo Kim0c3aa832013-10-24 10:07:38 +09001729static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
1730 int nodeid)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001731{
1732 struct page *page;
Christoph Hellwige1b6aa62006-06-23 02:03:17 -07001733 int nr_pages;
Christoph Lameter765c4502006-09-27 01:50:08 -07001734
Glauber Costaa618e892012-06-14 16:17:21 +04001735 flags |= cachep->allocflags;
Mel Gormane12ba742007-10-16 01:25:52 -07001736 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1737 flags |= __GFP_RECLAIMABLE;
Christoph Hellwige1b6aa62006-06-23 02:03:17 -07001738
Vladimir Davydov5dfb4172014-06-04 16:06:38 -07001739 if (memcg_charge_slab(cachep, flags, cachep->gfporder))
1740 return NULL;
1741
Linus Torvalds517d0862009-06-16 19:50:13 -07001742 page = alloc_pages_exact_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder);
Rafael Aquini8bdec192012-03-09 17:27:27 -03001743 if (!page) {
Vladimir Davydov5dfb4172014-06-04 16:06:38 -07001744 memcg_uncharge_slab(cachep, cachep->gfporder);
David Rientjes9a02d692014-06-04 16:06:36 -07001745 slab_out_of_memory(cachep, flags, nodeid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001746 return NULL;
Rafael Aquini8bdec192012-03-09 17:27:27 -03001747 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001748
Mel Gormanb37f1dd2012-07-31 16:44:03 -07001749 /* Record if ALLOC_NO_WATERMARKS was set when allocating the slab */
Mel Gorman072bb0a2012-07-31 16:43:58 -07001750 if (unlikely(page->pfmemalloc))
1751 pfmemalloc_active = true;
1752
Christoph Hellwige1b6aa62006-06-23 02:03:17 -07001753 nr_pages = (1 << cachep->gfporder);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001754 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
Christoph Lameter972d1a72006-09-25 23:31:51 -07001755 add_zone_page_state(page_zone(page),
1756 NR_SLAB_RECLAIMABLE, nr_pages);
1757 else
1758 add_zone_page_state(page_zone(page),
1759 NR_SLAB_UNRECLAIMABLE, nr_pages);
Joonsoo Kima57a4982013-10-24 10:07:44 +09001760 __SetPageSlab(page);
1761 if (page->pfmemalloc)
1762 SetPageSlabPfmemalloc(page);
Mel Gorman072bb0a2012-07-31 16:43:58 -07001763
Vegard Nossumb1eeab62008-11-25 16:55:53 +01001764 if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
1765 kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);
1766
1767 if (cachep->ctor)
1768 kmemcheck_mark_uninitialized_pages(page, nr_pages);
1769 else
1770 kmemcheck_mark_unallocated_pages(page, nr_pages);
1771 }
Pekka Enbergc175eea2008-05-09 20:35:53 +02001772
Joonsoo Kim0c3aa832013-10-24 10:07:38 +09001773 return page;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001774}
1775
1776/*
1777 * Interface to system's page release.
1778 */
Joonsoo Kim0c3aa832013-10-24 10:07:38 +09001779static void kmem_freepages(struct kmem_cache *cachep, struct page *page)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001780{
Joonsoo Kima57a4982013-10-24 10:07:44 +09001781 const unsigned long nr_freed = (1 << cachep->gfporder);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001782
Vegard Nossumb1eeab62008-11-25 16:55:53 +01001783 kmemcheck_free_shadow(page, cachep->gfporder);
Pekka Enbergc175eea2008-05-09 20:35:53 +02001784
Christoph Lameter972d1a72006-09-25 23:31:51 -07001785 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1786 sub_zone_page_state(page_zone(page),
1787 NR_SLAB_RECLAIMABLE, nr_freed);
1788 else
1789 sub_zone_page_state(page_zone(page),
1790 NR_SLAB_UNRECLAIMABLE, nr_freed);
Joonsoo Kim73293c22013-10-24 10:07:37 +09001791
Joonsoo Kima57a4982013-10-24 10:07:44 +09001792 BUG_ON(!PageSlab(page));
Joonsoo Kim73293c22013-10-24 10:07:37 +09001793 __ClearPageSlabPfmemalloc(page);
Joonsoo Kima57a4982013-10-24 10:07:44 +09001794 __ClearPageSlab(page);
Joonsoo Kim8456a642013-10-24 10:07:49 +09001795 page_mapcount_reset(page);
1796 page->mapping = NULL;
Glauber Costa1f458cb2012-12-18 14:22:50 -08001797
Linus Torvalds1da177e2005-04-16 15:20:36 -07001798 if (current->reclaim_state)
1799 current->reclaim_state->reclaimed_slab += nr_freed;
Vladimir Davydov5dfb4172014-06-04 16:06:38 -07001800 __free_pages(page, cachep->gfporder);
1801 memcg_uncharge_slab(cachep, cachep->gfporder);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802}
1803
1804static void kmem_rcu_free(struct rcu_head *head)
1805{
Joonsoo Kim68126702013-10-24 10:07:42 +09001806 struct kmem_cache *cachep;
1807 struct page *page;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001808
Joonsoo Kim68126702013-10-24 10:07:42 +09001809 page = container_of(head, struct page, rcu_head);
1810 cachep = page->slab_cache;
1811
1812 kmem_freepages(cachep, page);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001813}
1814
1815#if DEBUG
1816
1817#ifdef CONFIG_DEBUG_PAGEALLOC
Pekka Enberg343e0d72006-02-01 03:05:50 -08001818static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
Pekka Enbergb28a02d2006-01-08 01:00:37 -08001819 unsigned long caller)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001820{
Christoph Lameter8c138bc2012-06-13 10:24:58 -05001821 int size = cachep->object_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001822
Manfred Spraul3dafccf2006-02-01 03:05:42 -08001823 addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001824
Pekka Enbergb28a02d2006-01-08 01:00:37 -08001825 if (size < 5 * sizeof(unsigned long))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001826 return;
1827
Pekka Enbergb28a02d2006-01-08 01:00:37 -08001828 *addr++ = 0x12345678;
1829 *addr++ = caller;
1830 *addr++ = smp_processor_id();
1831 size -= 3 * sizeof(unsigned long);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001832 {
1833 unsigned long *sptr = &caller;
1834 unsigned long svalue;
1835
1836 while (!kstack_end(sptr)) {
1837 svalue = *sptr++;
1838 if (kernel_text_address(svalue)) {
Pekka Enbergb28a02d2006-01-08 01:00:37 -08001839 *addr++ = svalue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001840 size -= sizeof(unsigned long);
1841 if (size <= sizeof(unsigned long))
1842 break;
1843 }
1844 }
1845
1846 }
Pekka Enbergb28a02d2006-01-08 01:00:37 -08001847 *addr++ = 0x87654321;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001848}
1849#endif
1850
Pekka Enberg343e0d72006-02-01 03:05:50 -08001851static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001852{
Christoph Lameter8c138bc2012-06-13 10:24:58 -05001853 int size = cachep->object_size;
Manfred Spraul3dafccf2006-02-01 03:05:42 -08001854 addr = &((char *)addr)[obj_offset(cachep)];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001855
1856 memset(addr, val, size);
Pekka Enbergb28a02d2006-01-08 01:00:37 -08001857 *(unsigned char *)(addr + size - 1) = POISON_END;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001858}
1859
1860static void dump_line(char *data, int offset, int limit)
1861{
1862 int i;
Dave Jonesaa83aa42006-09-29 01:59:51 -07001863 unsigned char error = 0;
1864 int bad_count = 0;
1865
Sebastian Andrzej Siewiorfdde6ab2011-07-29 18:22:13 +02001866 printk(KERN_ERR "%03x: ", offset);
Dave Jonesaa83aa42006-09-29 01:59:51 -07001867 for (i = 0; i < limit; i++) {
1868 if (data[offset + i] != POISON_FREE) {
1869 error = data[offset + i];
1870 bad_count++;
1871 }
Dave Jonesaa83aa42006-09-29 01:59:51 -07001872 }
Sebastian Andrzej Siewiorfdde6ab2011-07-29 18:22:13 +02001873 print_hex_dump(KERN_CONT, "", 0, 16, 1,
1874 &data[offset], limit, 1);
Dave Jonesaa83aa42006-09-29 01:59:51 -07001875
1876 if (bad_count == 1) {
1877 error ^= POISON_FREE;
1878 if (!(error & (error - 1))) {
1879 printk(KERN_ERR "Single bit error detected. Probably "
1880 "bad RAM.\n");
1881#ifdef CONFIG_X86
1882 printk(KERN_ERR "Run memtest86+ or a similar memory "
1883 "test tool.\n");
1884#else
1885 printk(KERN_ERR "Run a memory test tool.\n");
1886#endif
1887 }
1888 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001889}
1890#endif
1891
1892#if DEBUG
1893
Pekka Enberg343e0d72006-02-01 03:05:50 -08001894static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001895{
1896 int i, size;
1897 char *realobj;
1898
1899 if (cachep->flags & SLAB_RED_ZONE) {
David Woodhouseb46b8f12007-05-08 00:22:59 -07001900 printk(KERN_ERR "Redzone: 0x%llx/0x%llx.\n",
Andrew Mortona737b3e2006-03-22 00:08:11 -08001901 *dbg_redzone1(cachep, objp),
1902 *dbg_redzone2(cachep, objp));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001903 }
1904
1905 if (cachep->flags & SLAB_STORE_USER) {
Joe Perches071361d2012-12-12 10:19:12 -08001906 printk(KERN_ERR "Last user: [<%p>](%pSR)\n",
1907 *dbg_userword(cachep, objp),
1908 *dbg_userword(cachep, objp));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001909 }
Manfred Spraul3dafccf2006-02-01 03:05:42 -08001910 realobj = (char *)objp + obj_offset(cachep);
Christoph Lameter8c138bc2012-06-13 10:24:58 -05001911 size = cachep->object_size;
Pekka Enbergb28a02d2006-01-08 01:00:37 -08001912 for (i = 0; i < size && lines; i += 16, lines--) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001913 int limit;
1914 limit = 16;
Pekka Enbergb28a02d2006-01-08 01:00:37 -08001915 if (i + limit > size)
1916 limit = size - i;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001917 dump_line(realobj, i, limit);
1918 }
1919}
1920
Pekka Enberg343e0d72006-02-01 03:05:50 -08001921static void check_poison_obj(struct kmem_cache *cachep, void *objp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001922{
1923 char *realobj;
1924 int size, i;
1925 int lines = 0;
1926
Manfred Spraul3dafccf2006-02-01 03:05:42 -08001927 realobj = (char *)objp + obj_offset(cachep);
Christoph Lameter8c138bc2012-06-13 10:24:58 -05001928 size = cachep->object_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001929
Pekka Enbergb28a02d2006-01-08 01:00:37 -08001930 for (i = 0; i < size; i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001931 char exp = POISON_FREE;
Pekka Enbergb28a02d2006-01-08 01:00:37 -08001932 if (i == size - 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001933 exp = POISON_END;
1934 if (realobj[i] != exp) {
1935 int limit;
1936 /* Mismatch ! */
1937 /* Print header */
1938 if (lines == 0) {
Pekka Enbergb28a02d2006-01-08 01:00:37 -08001939 printk(KERN_ERR
Dave Jonesface37f2011-11-15 15:03:52 -08001940 "Slab corruption (%s): %s start=%p, len=%d\n",
1941 print_tainted(), cachep->name, realobj, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001942 print_objinfo(cachep, objp, 0);
1943 }
1944 /* Hexdump the affected line */
Pekka Enbergb28a02d2006-01-08 01:00:37 -08001945 i = (i / 16) * 16;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001946 limit = 16;
Pekka Enbergb28a02d2006-01-08 01:00:37 -08001947 if (i + limit > size)
1948 limit = size - i;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001949 dump_line(realobj, i, limit);
1950 i += 16;
1951 lines++;
1952 /* Limit to 5 lines */
1953 if (lines > 5)
1954 break;
1955 }
1956 }
1957 if (lines != 0) {
1958 /* Print some data about the neighboring objects, if they
1959 * exist:
1960 */
Joonsoo Kim8456a642013-10-24 10:07:49 +09001961 struct page *page = virt_to_head_page(objp);
Pekka Enberg8fea4e92006-03-22 00:08:10 -08001962 unsigned int objnr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001963
Joonsoo Kim8456a642013-10-24 10:07:49 +09001964 objnr = obj_to_index(cachep, page, objp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001965 if (objnr) {
Joonsoo Kim8456a642013-10-24 10:07:49 +09001966 objp = index_to_obj(cachep, page, objnr - 1);
Manfred Spraul3dafccf2006-02-01 03:05:42 -08001967 realobj = (char *)objp + obj_offset(cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001968 printk(KERN_ERR "Prev obj: start=%p, len=%d\n",
Pekka Enbergb28a02d2006-01-08 01:00:37 -08001969 realobj, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001970 print_objinfo(cachep, objp, 2);
1971 }
Pekka Enbergb28a02d2006-01-08 01:00:37 -08001972 if (objnr + 1 < cachep->num) {
Joonsoo Kim8456a642013-10-24 10:07:49 +09001973 objp = index_to_obj(cachep, page, objnr + 1);
Manfred Spraul3dafccf2006-02-01 03:05:42 -08001974 realobj = (char *)objp + obj_offset(cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001975 printk(KERN_ERR "Next obj: start=%p, len=%d\n",
Pekka Enbergb28a02d2006-01-08 01:00:37 -08001976 realobj, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001977 print_objinfo(cachep, objp, 2);
1978 }
1979 }
1980}
1981#endif
1982
Linus Torvalds1da177e2005-04-16 15:20:36 -07001983#if DEBUG
Joonsoo Kim8456a642013-10-24 10:07:49 +09001984static void slab_destroy_debugcheck(struct kmem_cache *cachep,
1985 struct page *page)
Matthew Dobson12dd36f2006-02-01 03:05:46 -08001986{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001987 int i;
1988 for (i = 0; i < cachep->num; i++) {
Joonsoo Kim8456a642013-10-24 10:07:49 +09001989 void *objp = index_to_obj(cachep, page, i);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001990
1991 if (cachep->flags & SLAB_POISON) {
1992#ifdef CONFIG_DEBUG_PAGEALLOC
Christoph Lameter3b0efdf2012-06-13 10:24:57 -05001993 if (cachep->size % PAGE_SIZE == 0 &&
Andrew Mortona737b3e2006-03-22 00:08:11 -08001994 OFF_SLAB(cachep))
Pekka Enbergb28a02d2006-01-08 01:00:37 -08001995 kernel_map_pages(virt_to_page(objp),
Christoph Lameter3b0efdf2012-06-13 10:24:57 -05001996 cachep->size / PAGE_SIZE, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001997 else
1998 check_poison_obj(cachep, objp);
1999#else
2000 check_poison_obj(cachep, objp);
2001#endif
2002 }
2003 if (cachep->flags & SLAB_RED_ZONE) {
2004 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
2005 slab_error(cachep, "start of a freed object "
Pekka Enbergb28a02d2006-01-08 01:00:37 -08002006 "was overwritten");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002007 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
2008 slab_error(cachep, "end of a freed object "
Pekka Enbergb28a02d2006-01-08 01:00:37 -08002009 "was overwritten");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002010 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002011 }
Matthew Dobson12dd36f2006-02-01 03:05:46 -08002012}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002013#else
Joonsoo Kim8456a642013-10-24 10:07:49 +09002014static void slab_destroy_debugcheck(struct kmem_cache *cachep,
2015 struct page *page)
Matthew Dobson12dd36f2006-02-01 03:05:46 -08002016{
Matthew Dobson12dd36f2006-02-01 03:05:46 -08002017}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002018#endif
2019
Randy Dunlap911851e2006-03-22 00:08:14 -08002020/**
2021 * slab_destroy - destroy and release all objects in a slab
2022 * @cachep: cache pointer being destroyed
Masanari Iidacb8ee1a2014-01-28 02:57:08 +09002023 * @page: page pointer being destroyed
Randy Dunlap911851e2006-03-22 00:08:14 -08002024 *
Matthew Dobson12dd36f2006-02-01 03:05:46 -08002025 * Destroy all the objs in a slab, and release the mem back to the system.
Andrew Mortona737b3e2006-03-22 00:08:11 -08002026 * Before calling the slab must have been unlinked from the cache. The
2027 * cache-lock is not held/needed.
Matthew Dobson12dd36f2006-02-01 03:05:46 -08002028 */
Joonsoo Kim8456a642013-10-24 10:07:49 +09002029static void slab_destroy(struct kmem_cache *cachep, struct page *page)
Matthew Dobson12dd36f2006-02-01 03:05:46 -08002030{
Joonsoo Kim7e007352013-10-30 19:04:01 +09002031 void *freelist;
Matthew Dobson12dd36f2006-02-01 03:05:46 -08002032
Joonsoo Kim8456a642013-10-24 10:07:49 +09002033 freelist = page->freelist;
2034 slab_destroy_debugcheck(cachep, page);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002035 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) {
Joonsoo Kim68126702013-10-24 10:07:42 +09002036 struct rcu_head *head;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002037
Joonsoo Kim68126702013-10-24 10:07:42 +09002038 /*
2039 * RCU free overloads the RCU head over the LRU.
2040 * slab_page has been overloeaded over the LRU,
2041 * however it is not used from now on so that
2042 * we can use it safely.
2043 */
2044 head = (void *)&page->rcu_head;
2045 call_rcu(head, kmem_rcu_free);
2046
Linus Torvalds1da177e2005-04-16 15:20:36 -07002047 } else {
Joonsoo Kim0c3aa832013-10-24 10:07:38 +09002048 kmem_freepages(cachep, page);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002049 }
Joonsoo Kim68126702013-10-24 10:07:42 +09002050
2051 /*
Joonsoo Kim8456a642013-10-24 10:07:49 +09002052 * From now on, we don't use freelist
Joonsoo Kim68126702013-10-24 10:07:42 +09002053 * although actual page can be freed in rcu context
2054 */
2055 if (OFF_SLAB(cachep))
Joonsoo Kim8456a642013-10-24 10:07:49 +09002056 kmem_cache_free(cachep->freelist_cache, freelist);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002057}
2058
2059/**
Randy.Dunlapa70773d2006-02-01 03:05:52 -08002060 * calculate_slab_order - calculate size (page order) of slabs
2061 * @cachep: pointer to the cache that is being created
2062 * @size: size of objects to be created in this cache.
2063 * @align: required alignment for the objects.
2064 * @flags: slab allocation flags
2065 *
2066 * Also calculates the number of objects per slab.
Pekka Enberg4d268eb2006-01-08 01:00:36 -08002067 *
2068 * This could be made much more intelligent. For now, try to avoid using
2069 * high order pages for slabs. When the gfp() functions are more friendly
2070 * towards high-order requests, this should be changed.
2071 */
Andrew Mortona737b3e2006-03-22 00:08:11 -08002072static size_t calculate_slab_order(struct kmem_cache *cachep,
Randy Dunlapee13d782006-02-01 03:05:53 -08002073 size_t size, size_t align, unsigned long flags)
Pekka Enberg4d268eb2006-01-08 01:00:36 -08002074{
Ingo Molnarb1ab41c2006-06-02 15:44:58 +02002075 unsigned long offslab_limit;
Pekka Enberg4d268eb2006-01-08 01:00:36 -08002076 size_t left_over = 0;
Linus Torvalds9888e6f2006-03-06 17:44:43 -08002077 int gfporder;
Pekka Enberg4d268eb2006-01-08 01:00:36 -08002078
Christoph Lameter0aa817f2007-05-16 22:11:01 -07002079 for (gfporder = 0; gfporder <= KMALLOC_MAX_ORDER; gfporder++) {
Pekka Enberg4d268eb2006-01-08 01:00:36 -08002080 unsigned int num;
2081 size_t remainder;
2082
Linus Torvalds9888e6f2006-03-06 17:44:43 -08002083 cache_estimate(gfporder, size, align, flags, &remainder, &num);
Pekka Enberg4d268eb2006-01-08 01:00:36 -08002084 if (!num)
2085 continue;
Linus Torvalds9888e6f2006-03-06 17:44:43 -08002086
Joonsoo Kimf315e3f2013-12-02 17:49:41 +09002087 /* Can't handle number of objects more than SLAB_OBJ_MAX_NUM */
2088 if (num > SLAB_OBJ_MAX_NUM)
2089 break;
2090
Ingo Molnarb1ab41c2006-06-02 15:44:58 +02002091 if (flags & CFLGS_OFF_SLAB) {
Joonsoo Kim03787302014-06-23 13:22:06 -07002092 size_t freelist_size_per_obj = sizeof(freelist_idx_t);
Ingo Molnarb1ab41c2006-06-02 15:44:58 +02002093 /*
2094 * Max number of objs-per-slab for caches which
2095 * use off-slab slabs. Needed to avoid a possible
2096 * looping condition in cache_grow().
2097 */
Joonsoo Kim03787302014-06-23 13:22:06 -07002098 if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK))
2099 freelist_size_per_obj += sizeof(char);
Joonsoo Kim8456a642013-10-24 10:07:49 +09002100 offslab_limit = size;
Joonsoo Kim03787302014-06-23 13:22:06 -07002101 offslab_limit /= freelist_size_per_obj;
Ingo Molnarb1ab41c2006-06-02 15:44:58 +02002102
2103 if (num > offslab_limit)
2104 break;
2105 }
Pekka Enberg4d268eb2006-01-08 01:00:36 -08002106
Linus Torvalds9888e6f2006-03-06 17:44:43 -08002107 /* Found something acceptable - save it away */
Pekka Enberg4d268eb2006-01-08 01:00:36 -08002108 cachep->num = num;
Linus Torvalds9888e6f2006-03-06 17:44:43 -08002109 cachep->gfporder = gfporder;
Pekka Enberg4d268eb2006-01-08 01:00:36 -08002110 left_over = remainder;
2111
2112 /*
Linus Torvaldsf78bb8a2006-03-08 10:33:05 -08002113 * A VFS-reclaimable slab tends to have most allocations
2114 * as GFP_NOFS and we really don't want to have to be allocating
2115 * higher-order pages when we are unable to shrink dcache.
2116 */
2117 if (flags & SLAB_RECLAIM_ACCOUNT)
2118 break;
2119
2120 /*
Pekka Enberg4d268eb2006-01-08 01:00:36 -08002121 * Large number of objects is good, but very large slabs are
2122 * currently bad for the gfp()s.
2123 */
David Rientjes543585c2011-10-18 22:09:24 -07002124 if (gfporder >= slab_max_order)
Pekka Enberg4d268eb2006-01-08 01:00:36 -08002125 break;
2126
Linus Torvalds9888e6f2006-03-06 17:44:43 -08002127 /*
2128 * Acceptable internal fragmentation?
2129 */
Andrew Mortona737b3e2006-03-22 00:08:11 -08002130 if (left_over * 8 <= (PAGE_SIZE << gfporder))
Pekka Enberg4d268eb2006-01-08 01:00:36 -08002131 break;
2132 }
2133 return left_over;
2134}
2135
Pekka Enberg83b519e2009-06-10 19:40:04 +03002136static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
Pekka Enbergf30cf7d2006-03-22 00:08:11 -08002137{
Christoph Lameter97d06602012-07-06 15:25:11 -05002138 if (slab_state >= FULL)
Pekka Enberg83b519e2009-06-10 19:40:04 +03002139 return enable_cpucache(cachep, gfp);
Christoph Lameter2ed3a4e2006-09-25 23:31:38 -07002140
Christoph Lameter97d06602012-07-06 15:25:11 -05002141 if (slab_state == DOWN) {
Pekka Enbergf30cf7d2006-03-22 00:08:11 -08002142 /*
Christoph Lameter2f9baa92012-11-28 16:23:09 +00002143 * Note: Creation of first cache (kmem_cache).
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00002144 * The setup_node is taken care
Christoph Lameter2f9baa92012-11-28 16:23:09 +00002145 * of by the caller of __kmem_cache_create
2146 */
2147 cachep->array[smp_processor_id()] = &initarray_generic.cache;
2148 slab_state = PARTIAL;
2149 } else if (slab_state == PARTIAL) {
2150 /*
2151 * Note: the second kmem_cache_create must create the cache
Pekka Enbergf30cf7d2006-03-22 00:08:11 -08002152 * that's used by kmalloc(24), otherwise the creation of
2153 * further caches will BUG().
2154 */
2155 cachep->array[smp_processor_id()] = &initarray_generic.cache;
2156
2157 /*
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00002158 * If the cache that's used by kmalloc(sizeof(kmem_cache_node)) is
2159 * the second cache, then we need to set up all its node/,
Pekka Enbergf30cf7d2006-03-22 00:08:11 -08002160 * otherwise the creation of further caches will BUG().
2161 */
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00002162 set_up_node(cachep, SIZE_AC);
2163 if (INDEX_AC == INDEX_NODE)
2164 slab_state = PARTIAL_NODE;
Pekka Enbergf30cf7d2006-03-22 00:08:11 -08002165 else
Christoph Lameter97d06602012-07-06 15:25:11 -05002166 slab_state = PARTIAL_ARRAYCACHE;
Pekka Enbergf30cf7d2006-03-22 00:08:11 -08002167 } else {
Christoph Lameter2f9baa92012-11-28 16:23:09 +00002168 /* Remaining boot caches */
Pekka Enbergf30cf7d2006-03-22 00:08:11 -08002169 cachep->array[smp_processor_id()] =
Pekka Enberg83b519e2009-06-10 19:40:04 +03002170 kmalloc(sizeof(struct arraycache_init), gfp);
Pekka Enbergf30cf7d2006-03-22 00:08:11 -08002171
Christoph Lameter97d06602012-07-06 15:25:11 -05002172 if (slab_state == PARTIAL_ARRAYCACHE) {
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00002173 set_up_node(cachep, SIZE_NODE);
2174 slab_state = PARTIAL_NODE;
Pekka Enbergf30cf7d2006-03-22 00:08:11 -08002175 } else {
2176 int node;
Pekka Enberg556a1692008-01-25 08:20:51 +02002177 for_each_online_node(node) {
Christoph Lameter6a673682013-01-10 19:14:19 +00002178 cachep->node[node] =
Christoph Lameter6744f082013-01-10 19:12:17 +00002179 kmalloc_node(sizeof(struct kmem_cache_node),
Pekka Enbergeb91f1d2009-06-12 14:56:09 +03002180 gfp, node);
Christoph Lameter6a673682013-01-10 19:14:19 +00002181 BUG_ON(!cachep->node[node]);
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00002182 kmem_cache_node_init(cachep->node[node]);
Pekka Enbergf30cf7d2006-03-22 00:08:11 -08002183 }
2184 }
2185 }
Christoph Lameter6a673682013-01-10 19:14:19 +00002186 cachep->node[numa_mem_id()]->next_reap =
Jianyu Zhan5f0985b2014-03-30 17:02:20 +08002187 jiffies + REAPTIMEOUT_NODE +
2188 ((unsigned long)cachep) % REAPTIMEOUT_NODE;
Pekka Enbergf30cf7d2006-03-22 00:08:11 -08002189
2190 cpu_cache_get(cachep)->avail = 0;
2191 cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
2192 cpu_cache_get(cachep)->batchcount = 1;
2193 cpu_cache_get(cachep)->touched = 0;
2194 cachep->batchcount = 1;
2195 cachep->limit = BOOT_CPUCACHE_ENTRIES;
Christoph Lameter2ed3a4e2006-09-25 23:31:38 -07002196 return 0;
Pekka Enbergf30cf7d2006-03-22 00:08:11 -08002197}
2198
Pekka Enberg4d268eb2006-01-08 01:00:36 -08002199/**
Christoph Lameter039363f2012-07-06 15:25:10 -05002200 * __kmem_cache_create - Create a cache.
Randy Dunlapa755b762012-11-06 17:10:10 -08002201 * @cachep: cache management descriptor
Linus Torvalds1da177e2005-04-16 15:20:36 -07002202 * @flags: SLAB flags
Linus Torvalds1da177e2005-04-16 15:20:36 -07002203 *
2204 * Returns a ptr to the cache on success, NULL on failure.
2205 * Cannot be called within a int, but can be interrupted.
Paul Mundt20c2df82007-07-20 10:11:58 +09002206 * The @ctor is run when new pages are allocated by the cache.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002207 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07002208 * The flags are
2209 *
2210 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
2211 * to catch references to uninitialised memory.
2212 *
2213 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
2214 * for buffer overruns.
2215 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07002216 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
2217 * cacheline. This can be beneficial if you're counting cycles as closely
2218 * as davem.
2219 */
Christoph Lameter278b1bb2012-09-05 00:20:34 +00002220int
Christoph Lameter8a13a4c2012-09-04 23:18:33 +00002221__kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002222{
Joonsoo Kim8456a642013-10-24 10:07:49 +09002223 size_t left_over, freelist_size, ralign;
Pekka Enberg83b519e2009-06-10 19:40:04 +03002224 gfp_t gfp;
Christoph Lameter278b1bb2012-09-05 00:20:34 +00002225 int err;
Christoph Lameter8a13a4c2012-09-04 23:18:33 +00002226 size_t size = cachep->size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002227
Linus Torvalds1da177e2005-04-16 15:20:36 -07002228#if DEBUG
Linus Torvalds1da177e2005-04-16 15:20:36 -07002229#if FORCED_DEBUG
2230 /*
2231 * Enable redzoning and last user accounting, except for caches with
2232 * large objects, if the increased size would increase the object size
2233 * above the next power of two: caches with object sizes just above a
2234 * power of two have a significant amount of internal fragmentation.
2235 */
David Woodhouse87a927c2007-07-04 21:26:44 -04002236 if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN +
2237 2 * sizeof(unsigned long long)))
Pekka Enbergb28a02d2006-01-08 01:00:37 -08002238 flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002239 if (!(flags & SLAB_DESTROY_BY_RCU))
2240 flags |= SLAB_POISON;
2241#endif
2242 if (flags & SLAB_DESTROY_BY_RCU)
2243 BUG_ON(flags & SLAB_POISON);
2244#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002245
Andrew Mortona737b3e2006-03-22 00:08:11 -08002246 /*
2247 * Check that size is in terms of words. This is needed to avoid
Linus Torvalds1da177e2005-04-16 15:20:36 -07002248 * unaligned accesses for some archs when redzoning is used, and makes
2249 * sure any on-slab bufctl's are also correctly aligned.
2250 */
Pekka Enbergb28a02d2006-01-08 01:00:37 -08002251 if (size & (BYTES_PER_WORD - 1)) {
2252 size += (BYTES_PER_WORD - 1);
2253 size &= ~(BYTES_PER_WORD - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002254 }
2255
Pekka Enbergca5f9702006-09-25 23:31:25 -07002256 /*
David Woodhouse87a927c2007-07-04 21:26:44 -04002257 * Redzoning and user store require word alignment or possibly larger.
2258 * Note this will be overridden by architecture or caller mandated
2259 * alignment if either is greater than BYTES_PER_WORD.
Pekka Enbergca5f9702006-09-25 23:31:25 -07002260 */
David Woodhouse87a927c2007-07-04 21:26:44 -04002261 if (flags & SLAB_STORE_USER)
2262 ralign = BYTES_PER_WORD;
2263
2264 if (flags & SLAB_RED_ZONE) {
2265 ralign = REDZONE_ALIGN;
2266 /* If redzoning, ensure that the second redzone is suitably
2267 * aligned, by adjusting the object size accordingly. */
2268 size += REDZONE_ALIGN - 1;
2269 size &= ~(REDZONE_ALIGN - 1);
2270 }
Pekka Enbergca5f9702006-09-25 23:31:25 -07002271
Kevin Hilmana44b56d2006-12-06 20:32:11 -08002272 /* 3) caller mandated alignment */
Christoph Lameter8a13a4c2012-09-04 23:18:33 +00002273 if (ralign < cachep->align) {
2274 ralign = cachep->align;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002275 }
Pekka Enberg3ff84a72011-02-14 17:46:21 +02002276 /* disable debug if necessary */
2277 if (ralign > __alignof__(unsigned long long))
Kevin Hilmana44b56d2006-12-06 20:32:11 -08002278 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
Andrew Mortona737b3e2006-03-22 00:08:11 -08002279 /*
Pekka Enbergca5f9702006-09-25 23:31:25 -07002280 * 4) Store it.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002281 */
Christoph Lameter8a13a4c2012-09-04 23:18:33 +00002282 cachep->align = ralign;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002283
Pekka Enberg83b519e2009-06-10 19:40:04 +03002284 if (slab_is_available())
2285 gfp = GFP_KERNEL;
2286 else
2287 gfp = GFP_NOWAIT;
2288
Christoph Lameter6a673682013-01-10 19:14:19 +00002289 setup_node_pointer(cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002290#if DEBUG
Linus Torvalds1da177e2005-04-16 15:20:36 -07002291
Pekka Enbergca5f9702006-09-25 23:31:25 -07002292 /*
2293 * Both debugging options require word-alignment which is calculated
2294 * into align above.
2295 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002296 if (flags & SLAB_RED_ZONE) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002297 /* add space for red zone words */
Pekka Enberg3ff84a72011-02-14 17:46:21 +02002298 cachep->obj_offset += sizeof(unsigned long long);
2299 size += 2 * sizeof(unsigned long long);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002300 }
2301 if (flags & SLAB_STORE_USER) {
Pekka Enbergca5f9702006-09-25 23:31:25 -07002302 /* user store requires one word storage behind the end of
David Woodhouse87a927c2007-07-04 21:26:44 -04002303 * the real object. But if the second red zone needs to be
2304 * aligned to 64 bits, we must allow that much space.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002305 */
David Woodhouse87a927c2007-07-04 21:26:44 -04002306 if (flags & SLAB_RED_ZONE)
2307 size += REDZONE_ALIGN;
2308 else
2309 size += BYTES_PER_WORD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002310 }
2311#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00002312 if (size >= kmalloc_size(INDEX_NODE + 1)
Tetsuo Handa608da7e2012-09-30 17:28:25 +09002313 && cachep->object_size > cache_line_size()
2314 && ALIGN(size, cachep->align) < PAGE_SIZE) {
2315 cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002316 size = PAGE_SIZE;
2317 }
2318#endif
2319#endif
2320
Ingo Molnare0a42722006-06-23 02:03:46 -07002321 /*
2322 * Determine if the slab management is 'on' or 'off' slab.
2323 * (bootstrapping cannot cope with offslab caches so don't do
Catalin Marinase7cb55b2009-10-28 13:33:08 +00002324 * it too early on. Always use on-slab management when
2325 * SLAB_NOLEAKTRACE to avoid recursive calls into kmemleak)
Ingo Molnare0a42722006-06-23 02:03:46 -07002326 */
Joonsoo Kim8fc9cf42013-12-02 17:49:43 +09002327 if ((size >= (PAGE_SIZE >> 5)) && !slab_early_init &&
Catalin Marinase7cb55b2009-10-28 13:33:08 +00002328 !(flags & SLAB_NOLEAKTRACE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002329 /*
2330 * Size is large, assume best to place the slab management obj
2331 * off-slab (should allow better packing of objs).
2332 */
2333 flags |= CFLGS_OFF_SLAB;
2334
Christoph Lameter8a13a4c2012-09-04 23:18:33 +00002335 size = ALIGN(size, cachep->align);
Joonsoo Kimf315e3f2013-12-02 17:49:41 +09002336 /*
2337 * We should restrict the number of objects in a slab to implement
2338 * byte sized index. Refer comment on SLAB_OBJ_MIN_SIZE definition.
2339 */
2340 if (FREELIST_BYTE_INDEX && size < SLAB_OBJ_MIN_SIZE)
2341 size = ALIGN(SLAB_OBJ_MIN_SIZE, cachep->align);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002342
Christoph Lameter8a13a4c2012-09-04 23:18:33 +00002343 left_over = calculate_slab_order(cachep, size, cachep->align, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002344
Christoph Lameter8a13a4c2012-09-04 23:18:33 +00002345 if (!cachep->num)
Christoph Lameter278b1bb2012-09-05 00:20:34 +00002346 return -E2BIG;
Christoph Lameter8a13a4c2012-09-04 23:18:33 +00002347
Joonsoo Kim03787302014-06-23 13:22:06 -07002348 freelist_size = calculate_freelist_size(cachep->num, cachep->align);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002349
2350 /*
2351 * If the slab has been placed off-slab, and we have enough space then
2352 * move it on-slab. This is at the expense of any extra colouring.
2353 */
Joonsoo Kim8456a642013-10-24 10:07:49 +09002354 if (flags & CFLGS_OFF_SLAB && left_over >= freelist_size) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002355 flags &= ~CFLGS_OFF_SLAB;
Joonsoo Kim8456a642013-10-24 10:07:49 +09002356 left_over -= freelist_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002357 }
2358
2359 if (flags & CFLGS_OFF_SLAB) {
2360 /* really off slab. No need for manual alignment */
Joonsoo Kim03787302014-06-23 13:22:06 -07002361 freelist_size = calculate_freelist_size(cachep->num, 0);
Ron Lee67461362009-05-22 04:58:22 +09302362
2363#ifdef CONFIG_PAGE_POISONING
2364 /* If we're going to use the generic kernel_map_pages()
2365 * poisoning, then it's going to smash the contents of
2366 * the redzone and userword anyhow, so switch them off.
2367 */
2368 if (size % PAGE_SIZE == 0 && flags & SLAB_POISON)
2369 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2370#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002371 }
2372
2373 cachep->colour_off = cache_line_size();
2374 /* Offset must be a multiple of the alignment. */
Christoph Lameter8a13a4c2012-09-04 23:18:33 +00002375 if (cachep->colour_off < cachep->align)
2376 cachep->colour_off = cachep->align;
Pekka Enbergb28a02d2006-01-08 01:00:37 -08002377 cachep->colour = left_over / cachep->colour_off;
Joonsoo Kim8456a642013-10-24 10:07:49 +09002378 cachep->freelist_size = freelist_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002379 cachep->flags = flags;
Joonsoo Kima57a4982013-10-24 10:07:44 +09002380 cachep->allocflags = __GFP_COMP;
Christoph Lameter4b51d662007-02-10 01:43:10 -08002381 if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
Glauber Costaa618e892012-06-14 16:17:21 +04002382 cachep->allocflags |= GFP_DMA;
Christoph Lameter3b0efdf2012-06-13 10:24:57 -05002383 cachep->size = size;
Eric Dumazet6a2d7a92006-12-13 00:34:27 -08002384 cachep->reciprocal_buffer_size = reciprocal_value(size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002385
Ravikiran G Thirumalaie5ac9c52006-09-25 23:31:34 -07002386 if (flags & CFLGS_OFF_SLAB) {
Joonsoo Kim8456a642013-10-24 10:07:49 +09002387 cachep->freelist_cache = kmalloc_slab(freelist_size, 0u);
Ravikiran G Thirumalaie5ac9c52006-09-25 23:31:34 -07002388 /*
Jianyu Zhan5f0985b2014-03-30 17:02:20 +08002389 * This is a possibility for one of the kmalloc_{dma,}_caches.
Ravikiran G Thirumalaie5ac9c52006-09-25 23:31:34 -07002390 * But since we go off slab only for object size greater than
Jianyu Zhan5f0985b2014-03-30 17:02:20 +08002391 * PAGE_SIZE/8, and kmalloc_{dma,}_caches get created
2392 * in ascending order,this should not happen at all.
Ravikiran G Thirumalaie5ac9c52006-09-25 23:31:34 -07002393 * But leave a BUG_ON for some lucky dude.
2394 */
Joonsoo Kim8456a642013-10-24 10:07:49 +09002395 BUG_ON(ZERO_OR_NULL_PTR(cachep->freelist_cache));
Ravikiran G Thirumalaie5ac9c52006-09-25 23:31:34 -07002396 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002397
Christoph Lameter278b1bb2012-09-05 00:20:34 +00002398 err = setup_cpu_cache(cachep, gfp);
2399 if (err) {
Christoph Lameter12c36672012-09-04 23:38:33 +00002400 __kmem_cache_shutdown(cachep);
Christoph Lameter278b1bb2012-09-05 00:20:34 +00002401 return err;
Christoph Lameter2ed3a4e2006-09-25 23:31:38 -07002402 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002403
Peter Zijlstra83835b32011-07-22 15:26:05 +02002404 if (flags & SLAB_DEBUG_OBJECTS) {
2405 /*
2406 * Would deadlock through slab_destroy()->call_rcu()->
2407 * debug_object_activate()->kmem_cache_alloc().
2408 */
2409 WARN_ON_ONCE(flags & SLAB_DESTROY_BY_RCU);
2410
2411 slab_set_debugobj_lock_classes(cachep);
Glauber Costa6ccfb5b2012-12-18 14:22:31 -08002412 } else if (!OFF_SLAB(cachep) && !(flags & SLAB_DESTROY_BY_RCU))
2413 on_slab_lock_classes(cachep);
Peter Zijlstra83835b32011-07-22 15:26:05 +02002414
Christoph Lameter278b1bb2012-09-05 00:20:34 +00002415 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002416}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002417
2418#if DEBUG
2419static void check_irq_off(void)
2420{
2421 BUG_ON(!irqs_disabled());
2422}
2423
2424static void check_irq_on(void)
2425{
2426 BUG_ON(irqs_disabled());
2427}
2428
Pekka Enberg343e0d72006-02-01 03:05:50 -08002429static void check_spinlock_acquired(struct kmem_cache *cachep)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002430{
2431#ifdef CONFIG_SMP
2432 check_irq_off();
Christoph Lameter18bf8542014-08-06 16:04:11 -07002433 assert_spin_locked(&get_node(cachep, numa_mem_id())->list_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002434#endif
2435}
Christoph Lametere498be72005-09-09 13:03:32 -07002436
Pekka Enberg343e0d72006-02-01 03:05:50 -08002437static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
Christoph Lametere498be72005-09-09 13:03:32 -07002438{
2439#ifdef CONFIG_SMP
2440 check_irq_off();
Christoph Lameter18bf8542014-08-06 16:04:11 -07002441 assert_spin_locked(&get_node(cachep, node)->list_lock);
Christoph Lametere498be72005-09-09 13:03:32 -07002442#endif
2443}
2444
Linus Torvalds1da177e2005-04-16 15:20:36 -07002445#else
2446#define check_irq_off() do { } while(0)
2447#define check_irq_on() do { } while(0)
2448#define check_spinlock_acquired(x) do { } while(0)
Christoph Lametere498be72005-09-09 13:03:32 -07002449#define check_spinlock_acquired_node(x, y) do { } while(0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002450#endif
2451
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00002452static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
Christoph Lameteraab22072006-03-22 00:09:06 -08002453 struct array_cache *ac,
2454 int force, int node);
2455
Linus Torvalds1da177e2005-04-16 15:20:36 -07002456static void do_drain(void *arg)
2457{
Andrew Mortona737b3e2006-03-22 00:08:11 -08002458 struct kmem_cache *cachep = arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002459 struct array_cache *ac;
Lee Schermerhorn7d6e6d02010-05-26 14:45:03 -07002460 int node = numa_mem_id();
Christoph Lameter18bf8542014-08-06 16:04:11 -07002461 struct kmem_cache_node *n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002462
2463 check_irq_off();
Pekka Enberg9a2dba42006-02-01 03:05:49 -08002464 ac = cpu_cache_get(cachep);
Christoph Lameter18bf8542014-08-06 16:04:11 -07002465 n = get_node(cachep, node);
2466 spin_lock(&n->list_lock);
Christoph Lameterff694162005-09-22 21:44:02 -07002467 free_block(cachep, ac->entry, ac->avail, node);
Christoph Lameter18bf8542014-08-06 16:04:11 -07002468 spin_unlock(&n->list_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002469 ac->avail = 0;
2470}
2471
Pekka Enberg343e0d72006-02-01 03:05:50 -08002472static void drain_cpu_caches(struct kmem_cache *cachep)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002473{
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00002474 struct kmem_cache_node *n;
Christoph Lametere498be72005-09-09 13:03:32 -07002475 int node;
2476
Jens Axboe15c8b6c2008-05-09 09:39:44 +02002477 on_each_cpu(do_drain, cachep, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002478 check_irq_on();
Christoph Lameter18bf8542014-08-06 16:04:11 -07002479 for_each_kmem_cache_node(cachep, node, n)
2480 if (n->alien)
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00002481 drain_alien_cache(cachep, n->alien);
Roland Dreiera4523a82006-05-15 11:41:00 -07002482
Christoph Lameter18bf8542014-08-06 16:04:11 -07002483 for_each_kmem_cache_node(cachep, node, n)
2484 drain_array(cachep, n, n->shared, 1, node);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002485}
2486
Christoph Lametered11d9e2006-06-30 01:55:45 -07002487/*
2488 * Remove slabs from the list of free slabs.
2489 * Specify the number of slabs to drain in tofree.
2490 *
2491 * Returns the actual number of slabs released.
2492 */
2493static int drain_freelist(struct kmem_cache *cache,
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00002494 struct kmem_cache_node *n, int tofree)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002495{
Christoph Lametered11d9e2006-06-30 01:55:45 -07002496 struct list_head *p;
2497 int nr_freed;
Joonsoo Kim8456a642013-10-24 10:07:49 +09002498 struct page *page;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002499
Christoph Lametered11d9e2006-06-30 01:55:45 -07002500 nr_freed = 0;
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00002501 while (nr_freed < tofree && !list_empty(&n->slabs_free)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002502
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00002503 spin_lock_irq(&n->list_lock);
2504 p = n->slabs_free.prev;
2505 if (p == &n->slabs_free) {
2506 spin_unlock_irq(&n->list_lock);
Christoph Lametered11d9e2006-06-30 01:55:45 -07002507 goto out;
2508 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002509
Joonsoo Kim8456a642013-10-24 10:07:49 +09002510 page = list_entry(p, struct page, lru);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002511#if DEBUG
Joonsoo Kim8456a642013-10-24 10:07:49 +09002512 BUG_ON(page->active);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002513#endif
Joonsoo Kim8456a642013-10-24 10:07:49 +09002514 list_del(&page->lru);
Christoph Lametered11d9e2006-06-30 01:55:45 -07002515 /*
2516 * Safe to drop the lock. The slab is no longer linked
2517 * to the cache.
2518 */
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00002519 n->free_objects -= cache->num;
2520 spin_unlock_irq(&n->list_lock);
Joonsoo Kim8456a642013-10-24 10:07:49 +09002521 slab_destroy(cache, page);
Christoph Lametered11d9e2006-06-30 01:55:45 -07002522 nr_freed++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002523 }
Christoph Lametered11d9e2006-06-30 01:55:45 -07002524out:
2525 return nr_freed;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002526}
2527
Vladimir Davydov03afc0e2014-06-04 16:07:20 -07002528int __kmem_cache_shrink(struct kmem_cache *cachep)
Christoph Lametere498be72005-09-09 13:03:32 -07002529{
Christoph Lameter18bf8542014-08-06 16:04:11 -07002530 int ret = 0;
2531 int node;
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00002532 struct kmem_cache_node *n;
Christoph Lametere498be72005-09-09 13:03:32 -07002533
2534 drain_cpu_caches(cachep);
2535
2536 check_irq_on();
Christoph Lameter18bf8542014-08-06 16:04:11 -07002537 for_each_kmem_cache_node(cachep, node, n) {
Wanpeng Li0fa81032013-07-04 08:33:22 +08002538 drain_freelist(cachep, n, slabs_tofree(cachep, n));
Christoph Lametered11d9e2006-06-30 01:55:45 -07002539
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00002540 ret += !list_empty(&n->slabs_full) ||
2541 !list_empty(&n->slabs_partial);
Christoph Lametere498be72005-09-09 13:03:32 -07002542 }
2543 return (ret ? 1 : 0);
2544}
2545
Christoph Lameter945cf2b2012-09-04 23:18:33 +00002546int __kmem_cache_shutdown(struct kmem_cache *cachep)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002547{
Christoph Lameter12c36672012-09-04 23:38:33 +00002548 int i;
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00002549 struct kmem_cache_node *n;
Vladimir Davydov03afc0e2014-06-04 16:07:20 -07002550 int rc = __kmem_cache_shrink(cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002551
Christoph Lameter12c36672012-09-04 23:38:33 +00002552 if (rc)
2553 return rc;
2554
2555 for_each_online_cpu(i)
2556 kfree(cachep->array[i]);
2557
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00002558 /* NUMA: free the node structures */
Christoph Lameter18bf8542014-08-06 16:04:11 -07002559 for_each_kmem_cache_node(cachep, i, n) {
2560 kfree(n->shared);
2561 free_alien_cache(n->alien);
2562 kfree(n);
2563 cachep->node[i] = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002564 }
Christoph Lameter12c36672012-09-04 23:38:33 +00002565 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002566}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002567
Ravikiran G Thirumalaie5ac9c52006-09-25 23:31:34 -07002568/*
2569 * Get the memory for a slab management obj.
Jianyu Zhan5f0985b2014-03-30 17:02:20 +08002570 *
2571 * For a slab cache when the slab descriptor is off-slab, the
2572 * slab descriptor can't come from the same cache which is being created,
2573 * Because if it is the case, that means we defer the creation of
2574 * the kmalloc_{dma,}_cache of size sizeof(slab descriptor) to this point.
2575 * And we eventually call down to __kmem_cache_create(), which
2576 * in turn looks up in the kmalloc_{dma,}_caches for the disired-size one.
2577 * This is a "chicken-and-egg" problem.
2578 *
2579 * So the off-slab slab descriptor shall come from the kmalloc_{dma,}_caches,
2580 * which are all initialized during kmem_cache_init().
Ravikiran G Thirumalaie5ac9c52006-09-25 23:31:34 -07002581 */
Joonsoo Kim7e007352013-10-30 19:04:01 +09002582static void *alloc_slabmgmt(struct kmem_cache *cachep,
Joonsoo Kim0c3aa832013-10-24 10:07:38 +09002583 struct page *page, int colour_off,
2584 gfp_t local_flags, int nodeid)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002585{
Joonsoo Kim7e007352013-10-30 19:04:01 +09002586 void *freelist;
Joonsoo Kim0c3aa832013-10-24 10:07:38 +09002587 void *addr = page_address(page);
Pekka Enbergb28a02d2006-01-08 01:00:37 -08002588
Linus Torvalds1da177e2005-04-16 15:20:36 -07002589 if (OFF_SLAB(cachep)) {
2590 /* Slab management obj is off-slab. */
Joonsoo Kim8456a642013-10-24 10:07:49 +09002591 freelist = kmem_cache_alloc_node(cachep->freelist_cache,
Pekka Enberg8759ec52008-11-26 10:01:31 +02002592 local_flags, nodeid);
Joonsoo Kim8456a642013-10-24 10:07:49 +09002593 if (!freelist)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002594 return NULL;
2595 } else {
Joonsoo Kim8456a642013-10-24 10:07:49 +09002596 freelist = addr + colour_off;
2597 colour_off += cachep->freelist_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002598 }
Joonsoo Kim8456a642013-10-24 10:07:49 +09002599 page->active = 0;
2600 page->s_mem = addr + colour_off;
2601 return freelist;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002602}
2603
Joonsoo Kim7cc689732014-04-18 16:24:09 +09002604static inline freelist_idx_t get_free_obj(struct page *page, unsigned int idx)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002605{
Joonsoo Kima41adfa2013-12-02 17:49:42 +09002606 return ((freelist_idx_t *)page->freelist)[idx];
Joonsoo Kime5c58df2013-12-02 17:49:40 +09002607}
2608
2609static inline void set_free_obj(struct page *page,
Joonsoo Kim7cc689732014-04-18 16:24:09 +09002610 unsigned int idx, freelist_idx_t val)
Joonsoo Kime5c58df2013-12-02 17:49:40 +09002611{
Joonsoo Kima41adfa2013-12-02 17:49:42 +09002612 ((freelist_idx_t *)(page->freelist))[idx] = val;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002613}
2614
Pekka Enberg343e0d72006-02-01 03:05:50 -08002615static void cache_init_objs(struct kmem_cache *cachep,
Joonsoo Kim8456a642013-10-24 10:07:49 +09002616 struct page *page)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002617{
2618 int i;
2619
2620 for (i = 0; i < cachep->num; i++) {
Joonsoo Kim8456a642013-10-24 10:07:49 +09002621 void *objp = index_to_obj(cachep, page, i);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002622#if DEBUG
2623 /* need to poison the objs? */
2624 if (cachep->flags & SLAB_POISON)
2625 poison_obj(cachep, objp, POISON_FREE);
2626 if (cachep->flags & SLAB_STORE_USER)
2627 *dbg_userword(cachep, objp) = NULL;
2628
2629 if (cachep->flags & SLAB_RED_ZONE) {
2630 *dbg_redzone1(cachep, objp) = RED_INACTIVE;
2631 *dbg_redzone2(cachep, objp) = RED_INACTIVE;
2632 }
2633 /*
Andrew Mortona737b3e2006-03-22 00:08:11 -08002634 * Constructors are not allowed to allocate memory from the same
2635 * cache which they are a constructor for. Otherwise, deadlock.
2636 * They must also be threaded.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002637 */
2638 if (cachep->ctor && !(cachep->flags & SLAB_POISON))
Alexey Dobriyan51cc5062008-07-25 19:45:34 -07002639 cachep->ctor(objp + obj_offset(cachep));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002640
2641 if (cachep->flags & SLAB_RED_ZONE) {
2642 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
2643 slab_error(cachep, "constructor overwrote the"
Pekka Enbergb28a02d2006-01-08 01:00:37 -08002644 " end of an object");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002645 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
2646 slab_error(cachep, "constructor overwrote the"
Pekka Enbergb28a02d2006-01-08 01:00:37 -08002647 " start of an object");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002648 }
Christoph Lameter3b0efdf2012-06-13 10:24:57 -05002649 if ((cachep->size % PAGE_SIZE) == 0 &&
Andrew Mortona737b3e2006-03-22 00:08:11 -08002650 OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)
Pekka Enbergb28a02d2006-01-08 01:00:37 -08002651 kernel_map_pages(virt_to_page(objp),
Christoph Lameter3b0efdf2012-06-13 10:24:57 -05002652 cachep->size / PAGE_SIZE, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002653#else
2654 if (cachep->ctor)
Alexey Dobriyan51cc5062008-07-25 19:45:34 -07002655 cachep->ctor(objp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002656#endif
Joonsoo Kim03787302014-06-23 13:22:06 -07002657 set_obj_status(page, i, OBJECT_FREE);
Joonsoo Kime5c58df2013-12-02 17:49:40 +09002658 set_free_obj(page, i, i);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002659 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002660}
2661
Pekka Enberg343e0d72006-02-01 03:05:50 -08002662static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002663{
Christoph Lameter4b51d662007-02-10 01:43:10 -08002664 if (CONFIG_ZONE_DMA_FLAG) {
2665 if (flags & GFP_DMA)
Glauber Costaa618e892012-06-14 16:17:21 +04002666 BUG_ON(!(cachep->allocflags & GFP_DMA));
Christoph Lameter4b51d662007-02-10 01:43:10 -08002667 else
Glauber Costaa618e892012-06-14 16:17:21 +04002668 BUG_ON(cachep->allocflags & GFP_DMA);
Christoph Lameter4b51d662007-02-10 01:43:10 -08002669 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002670}
2671
Joonsoo Kim8456a642013-10-24 10:07:49 +09002672static void *slab_get_obj(struct kmem_cache *cachep, struct page *page,
Andrew Mortona737b3e2006-03-22 00:08:11 -08002673 int nodeid)
Matthew Dobson78d382d2006-02-01 03:05:47 -08002674{
Joonsoo Kimb1cb0982013-10-24 10:07:45 +09002675 void *objp;
Matthew Dobson78d382d2006-02-01 03:05:47 -08002676
Joonsoo Kime5c58df2013-12-02 17:49:40 +09002677 objp = index_to_obj(cachep, page, get_free_obj(page, page->active));
Joonsoo Kim8456a642013-10-24 10:07:49 +09002678 page->active++;
Matthew Dobson78d382d2006-02-01 03:05:47 -08002679#if DEBUG
Joonsoo Kim1ea991b2013-10-24 10:07:40 +09002680 WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid);
Matthew Dobson78d382d2006-02-01 03:05:47 -08002681#endif
Matthew Dobson78d382d2006-02-01 03:05:47 -08002682
2683 return objp;
2684}
2685
Joonsoo Kim8456a642013-10-24 10:07:49 +09002686static void slab_put_obj(struct kmem_cache *cachep, struct page *page,
Andrew Mortona737b3e2006-03-22 00:08:11 -08002687 void *objp, int nodeid)
Matthew Dobson78d382d2006-02-01 03:05:47 -08002688{
Joonsoo Kim8456a642013-10-24 10:07:49 +09002689 unsigned int objnr = obj_to_index(cachep, page, objp);
Matthew Dobson78d382d2006-02-01 03:05:47 -08002690#if DEBUG
Joonsoo Kim16025172013-10-24 10:07:46 +09002691 unsigned int i;
Matthew Dobson78d382d2006-02-01 03:05:47 -08002692
Matthew Dobson78d382d2006-02-01 03:05:47 -08002693 /* Verify that the slab belongs to the intended node */
Joonsoo Kim1ea991b2013-10-24 10:07:40 +09002694 WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid);
Matthew Dobson78d382d2006-02-01 03:05:47 -08002695
Joonsoo Kimb1cb0982013-10-24 10:07:45 +09002696 /* Verify double free bug */
Joonsoo Kim8456a642013-10-24 10:07:49 +09002697 for (i = page->active; i < cachep->num; i++) {
Joonsoo Kime5c58df2013-12-02 17:49:40 +09002698 if (get_free_obj(page, i) == objnr) {
Joonsoo Kimb1cb0982013-10-24 10:07:45 +09002699 printk(KERN_ERR "slab: double free detected in cache "
2700 "'%s', objp %p\n", cachep->name, objp);
2701 BUG();
2702 }
Matthew Dobson78d382d2006-02-01 03:05:47 -08002703 }
2704#endif
Joonsoo Kim8456a642013-10-24 10:07:49 +09002705 page->active--;
Joonsoo Kime5c58df2013-12-02 17:49:40 +09002706 set_free_obj(page, page->active, objnr);
Matthew Dobson78d382d2006-02-01 03:05:47 -08002707}
2708
Pekka Enberg47768742006-06-23 02:03:07 -07002709/*
2710 * Map pages beginning at addr to the given cache and slab. This is required
2711 * for the slab allocator to be able to lookup the cache and slab of a
Nick Pigginccd35fb2011-01-07 17:49:17 +11002712 * virtual address for kfree, ksize, and slab debugging.
Pekka Enberg47768742006-06-23 02:03:07 -07002713 */
Joonsoo Kim8456a642013-10-24 10:07:49 +09002714static void slab_map_pages(struct kmem_cache *cache, struct page *page,
Joonsoo Kim7e007352013-10-30 19:04:01 +09002715 void *freelist)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002716{
Joonsoo Kima57a4982013-10-24 10:07:44 +09002717 page->slab_cache = cache;
Joonsoo Kim8456a642013-10-24 10:07:49 +09002718 page->freelist = freelist;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002719}
2720
2721/*
2722 * Grow (by 1) the number of slabs within a cache. This is called by
2723 * kmem_cache_alloc() when there are no active objs left in a cache.
2724 */
Christoph Lameter3c517a62006-12-06 20:33:29 -08002725static int cache_grow(struct kmem_cache *cachep,
Joonsoo Kim0c3aa832013-10-24 10:07:38 +09002726 gfp_t flags, int nodeid, struct page *page)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002727{
Joonsoo Kim7e007352013-10-30 19:04:01 +09002728 void *freelist;
Pekka Enbergb28a02d2006-01-08 01:00:37 -08002729 size_t offset;
2730 gfp_t local_flags;
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00002731 struct kmem_cache_node *n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002732
Andrew Mortona737b3e2006-03-22 00:08:11 -08002733 /*
2734 * Be lazy and only check for valid flags here, keeping it out of the
2735 * critical path in kmem_cache_alloc().
Linus Torvalds1da177e2005-04-16 15:20:36 -07002736 */
Christoph Lameter6cb06222007-10-16 01:25:41 -07002737 BUG_ON(flags & GFP_SLAB_BUG_MASK);
2738 local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002739
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00002740 /* Take the node list lock to change the colour_next on this node */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002741 check_irq_off();
Christoph Lameter18bf8542014-08-06 16:04:11 -07002742 n = get_node(cachep, nodeid);
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00002743 spin_lock(&n->list_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002744
2745 /* Get colour for the slab, and cal the next value. */
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00002746 offset = n->colour_next;
2747 n->colour_next++;
2748 if (n->colour_next >= cachep->colour)
2749 n->colour_next = 0;
2750 spin_unlock(&n->list_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002751
Ravikiran G Thirumalai2e1217c2006-02-04 23:27:56 -08002752 offset *= cachep->colour_off;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002753
2754 if (local_flags & __GFP_WAIT)
2755 local_irq_enable();
2756
2757 /*
2758 * The test for missing atomic flag is performed here, rather than
2759 * the more obvious place, simply to reduce the critical path length
2760 * in kmem_cache_alloc(). If a caller is seriously mis-behaving they
2761 * will eventually be caught here (where it matters).
2762 */
2763 kmem_flagcheck(cachep, flags);
2764
Andrew Mortona737b3e2006-03-22 00:08:11 -08002765 /*
2766 * Get mem for the objs. Attempt to allocate a physical page from
2767 * 'nodeid'.
Christoph Lametere498be72005-09-09 13:03:32 -07002768 */
Joonsoo Kim0c3aa832013-10-24 10:07:38 +09002769 if (!page)
2770 page = kmem_getpages(cachep, local_flags, nodeid);
2771 if (!page)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002772 goto failed;
2773
2774 /* Get slab management. */
Joonsoo Kim8456a642013-10-24 10:07:49 +09002775 freelist = alloc_slabmgmt(cachep, page, offset,
Christoph Lameter6cb06222007-10-16 01:25:41 -07002776 local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
Joonsoo Kim8456a642013-10-24 10:07:49 +09002777 if (!freelist)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002778 goto opps1;
2779
Joonsoo Kim8456a642013-10-24 10:07:49 +09002780 slab_map_pages(cachep, page, freelist);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002781
Joonsoo Kim8456a642013-10-24 10:07:49 +09002782 cache_init_objs(cachep, page);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002783
2784 if (local_flags & __GFP_WAIT)
2785 local_irq_disable();
2786 check_irq_off();
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00002787 spin_lock(&n->list_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002788
2789 /* Make slab active. */
Joonsoo Kim8456a642013-10-24 10:07:49 +09002790 list_add_tail(&page->lru, &(n->slabs_free));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002791 STATS_INC_GROWN(cachep);
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00002792 n->free_objects += cachep->num;
2793 spin_unlock(&n->list_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002794 return 1;
Andrew Mortona737b3e2006-03-22 00:08:11 -08002795opps1:
Joonsoo Kim0c3aa832013-10-24 10:07:38 +09002796 kmem_freepages(cachep, page);
Andrew Mortona737b3e2006-03-22 00:08:11 -08002797failed:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002798 if (local_flags & __GFP_WAIT)
2799 local_irq_disable();
2800 return 0;
2801}
2802
2803#if DEBUG
2804
2805/*
2806 * Perform extra freeing checks:
2807 * - detect bad pointers.
2808 * - POISON/RED_ZONE checking
Linus Torvalds1da177e2005-04-16 15:20:36 -07002809 */
2810static void kfree_debugcheck(const void *objp)
2811{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002812 if (!virt_addr_valid(objp)) {
2813 printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n",
Pekka Enbergb28a02d2006-01-08 01:00:37 -08002814 (unsigned long)objp);
2815 BUG();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002816 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002817}
2818
Pekka Enberg58ce1fd2006-06-23 02:03:24 -07002819static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
2820{
David Woodhouseb46b8f12007-05-08 00:22:59 -07002821 unsigned long long redzone1, redzone2;
Pekka Enberg58ce1fd2006-06-23 02:03:24 -07002822
2823 redzone1 = *dbg_redzone1(cache, obj);
2824 redzone2 = *dbg_redzone2(cache, obj);
2825
2826 /*
2827 * Redzone is ok.
2828 */
2829 if (redzone1 == RED_ACTIVE && redzone2 == RED_ACTIVE)
2830 return;
2831
2832 if (redzone1 == RED_INACTIVE && redzone2 == RED_INACTIVE)
2833 slab_error(cache, "double free detected");
2834 else
2835 slab_error(cache, "memory outside object was overwritten");
2836
David Woodhouseb46b8f12007-05-08 00:22:59 -07002837 printk(KERN_ERR "%p: redzone 1:0x%llx, redzone 2:0x%llx.\n",
Pekka Enberg58ce1fd2006-06-23 02:03:24 -07002838 obj, redzone1, redzone2);
2839}
2840
Pekka Enberg343e0d72006-02-01 03:05:50 -08002841static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
Ezequiel Garcia7c0cb9c2012-09-08 17:47:55 -03002842 unsigned long caller)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002843{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002844 unsigned int objnr;
Joonsoo Kim8456a642013-10-24 10:07:49 +09002845 struct page *page;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002846
Matthew Wilcox80cbd912007-11-29 12:05:13 -07002847 BUG_ON(virt_to_cache(objp) != cachep);
2848
Manfred Spraul3dafccf2006-02-01 03:05:42 -08002849 objp -= obj_offset(cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002850 kfree_debugcheck(objp);
Christoph Lameterb49af682007-05-06 14:49:41 -07002851 page = virt_to_head_page(objp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002852
Linus Torvalds1da177e2005-04-16 15:20:36 -07002853 if (cachep->flags & SLAB_RED_ZONE) {
Pekka Enberg58ce1fd2006-06-23 02:03:24 -07002854 verify_redzone_free(cachep, objp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002855 *dbg_redzone1(cachep, objp) = RED_INACTIVE;
2856 *dbg_redzone2(cachep, objp) = RED_INACTIVE;
2857 }
2858 if (cachep->flags & SLAB_STORE_USER)
Ezequiel Garcia7c0cb9c2012-09-08 17:47:55 -03002859 *dbg_userword(cachep, objp) = (void *)caller;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002860
Joonsoo Kim8456a642013-10-24 10:07:49 +09002861 objnr = obj_to_index(cachep, page, objp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002862
2863 BUG_ON(objnr >= cachep->num);
Joonsoo Kim8456a642013-10-24 10:07:49 +09002864 BUG_ON(objp != index_to_obj(cachep, page, objnr));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002865
Joonsoo Kim03787302014-06-23 13:22:06 -07002866 set_obj_status(page, objnr, OBJECT_FREE);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002867 if (cachep->flags & SLAB_POISON) {
2868#ifdef CONFIG_DEBUG_PAGEALLOC
Christoph Lameter3b0efdf2012-06-13 10:24:57 -05002869 if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
Ezequiel Garcia7c0cb9c2012-09-08 17:47:55 -03002870 store_stackinfo(cachep, objp, caller);
Pekka Enbergb28a02d2006-01-08 01:00:37 -08002871 kernel_map_pages(virt_to_page(objp),
Christoph Lameter3b0efdf2012-06-13 10:24:57 -05002872 cachep->size / PAGE_SIZE, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002873 } else {
2874 poison_obj(cachep, objp, POISON_FREE);
2875 }
2876#else
2877 poison_obj(cachep, objp, POISON_FREE);
2878#endif
2879 }
2880 return objp;
2881}
2882
Linus Torvalds1da177e2005-04-16 15:20:36 -07002883#else
2884#define kfree_debugcheck(x) do { } while(0)
2885#define cache_free_debugcheck(x,objp,z) (objp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002886#endif
2887
Mel Gorman072bb0a2012-07-31 16:43:58 -07002888static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
2889 bool force_refill)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002890{
2891 int batchcount;
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00002892 struct kmem_cache_node *n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002893 struct array_cache *ac;
Pekka Enberg1ca4cb22006-10-06 00:43:52 -07002894 int node;
2895
Joe Korty6d2144d2008-03-05 15:04:59 -08002896 check_irq_off();
Lee Schermerhorn7d6e6d02010-05-26 14:45:03 -07002897 node = numa_mem_id();
Mel Gorman072bb0a2012-07-31 16:43:58 -07002898 if (unlikely(force_refill))
2899 goto force_grow;
2900retry:
Joe Korty6d2144d2008-03-05 15:04:59 -08002901 ac = cpu_cache_get(cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002902 batchcount = ac->batchcount;
2903 if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
Andrew Mortona737b3e2006-03-22 00:08:11 -08002904 /*
2905 * If there was little recent activity on this cache, then
2906 * perform only a partial refill. Otherwise we could generate
2907 * refill bouncing.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002908 */
2909 batchcount = BATCHREFILL_LIMIT;
2910 }
Christoph Lameter18bf8542014-08-06 16:04:11 -07002911 n = get_node(cachep, node);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002912
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00002913 BUG_ON(ac->avail > 0 || !n);
2914 spin_lock(&n->list_lock);
Christoph Lametere498be72005-09-09 13:03:32 -07002915
Christoph Lameter3ded1752006-03-25 03:06:44 -08002916 /* See if we can refill from the shared array */
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00002917 if (n->shared && transfer_objects(ac, n->shared, batchcount)) {
2918 n->shared->touched = 1;
Christoph Lameter3ded1752006-03-25 03:06:44 -08002919 goto alloc_done;
Nick Piggin44b57f12010-01-27 22:27:40 +11002920 }
Christoph Lameter3ded1752006-03-25 03:06:44 -08002921
Linus Torvalds1da177e2005-04-16 15:20:36 -07002922 while (batchcount > 0) {
2923 struct list_head *entry;
Joonsoo Kim8456a642013-10-24 10:07:49 +09002924 struct page *page;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002925 /* Get slab alloc is to come from. */
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00002926 entry = n->slabs_partial.next;
2927 if (entry == &n->slabs_partial) {
2928 n->free_touched = 1;
2929 entry = n->slabs_free.next;
2930 if (entry == &n->slabs_free)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002931 goto must_grow;
2932 }
2933
Joonsoo Kim8456a642013-10-24 10:07:49 +09002934 page = list_entry(entry, struct page, lru);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002935 check_spinlock_acquired(cachep);
Pekka Enberg714b81712007-05-06 14:49:03 -07002936
2937 /*
2938 * The slab was either on partial or free list so
2939 * there must be at least one object available for
2940 * allocation.
2941 */
Joonsoo Kim8456a642013-10-24 10:07:49 +09002942 BUG_ON(page->active >= cachep->num);
Pekka Enberg714b81712007-05-06 14:49:03 -07002943
Joonsoo Kim8456a642013-10-24 10:07:49 +09002944 while (page->active < cachep->num && batchcount--) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002945 STATS_INC_ALLOCED(cachep);
2946 STATS_INC_ACTIVE(cachep);
2947 STATS_SET_HIGH(cachep);
2948
Joonsoo Kim8456a642013-10-24 10:07:49 +09002949 ac_put_obj(cachep, ac, slab_get_obj(cachep, page,
Mel Gorman072bb0a2012-07-31 16:43:58 -07002950 node));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002951 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002952
2953 /* move slabp to correct slabp list: */
Joonsoo Kim8456a642013-10-24 10:07:49 +09002954 list_del(&page->lru);
2955 if (page->active == cachep->num)
Dave Hansen34bf6ef2014-04-08 13:44:27 -07002956 list_add(&page->lru, &n->slabs_full);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002957 else
Dave Hansen34bf6ef2014-04-08 13:44:27 -07002958 list_add(&page->lru, &n->slabs_partial);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002959 }
2960
Andrew Mortona737b3e2006-03-22 00:08:11 -08002961must_grow:
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00002962 n->free_objects -= ac->avail;
Andrew Mortona737b3e2006-03-22 00:08:11 -08002963alloc_done:
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00002964 spin_unlock(&n->list_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002965
2966 if (unlikely(!ac->avail)) {
2967 int x;
Mel Gorman072bb0a2012-07-31 16:43:58 -07002968force_grow:
Christoph Lameter3c517a62006-12-06 20:33:29 -08002969 x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
Christoph Lametere498be72005-09-09 13:03:32 -07002970
Andrew Mortona737b3e2006-03-22 00:08:11 -08002971 /* cache_grow can reenable interrupts, then ac could change. */
Pekka Enberg9a2dba42006-02-01 03:05:49 -08002972 ac = cpu_cache_get(cachep);
David Rientjes51cd8e62012-08-28 19:57:21 -07002973 node = numa_mem_id();
Mel Gorman072bb0a2012-07-31 16:43:58 -07002974
2975 /* no objects in sight? abort */
2976 if (!x && (ac->avail == 0 || force_refill))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002977 return NULL;
2978
Andrew Mortona737b3e2006-03-22 00:08:11 -08002979 if (!ac->avail) /* objects refilled by interrupt? */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002980 goto retry;
2981 }
2982 ac->touched = 1;
Mel Gorman072bb0a2012-07-31 16:43:58 -07002983
2984 return ac_get_obj(cachep, ac, flags, force_refill);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002985}
2986
Andrew Mortona737b3e2006-03-22 00:08:11 -08002987static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
2988 gfp_t flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002989{
2990 might_sleep_if(flags & __GFP_WAIT);
2991#if DEBUG
2992 kmem_flagcheck(cachep, flags);
2993#endif
2994}
2995
2996#if DEBUG
Andrew Mortona737b3e2006-03-22 00:08:11 -08002997static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
Ezequiel Garcia7c0cb9c2012-09-08 17:47:55 -03002998 gfp_t flags, void *objp, unsigned long caller)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002999{
Joonsoo Kim03787302014-06-23 13:22:06 -07003000 struct page *page;
3001
Pekka Enbergb28a02d2006-01-08 01:00:37 -08003002 if (!objp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003003 return objp;
Pekka Enbergb28a02d2006-01-08 01:00:37 -08003004 if (cachep->flags & SLAB_POISON) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003005#ifdef CONFIG_DEBUG_PAGEALLOC
Christoph Lameter3b0efdf2012-06-13 10:24:57 -05003006 if ((cachep->size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
Pekka Enbergb28a02d2006-01-08 01:00:37 -08003007 kernel_map_pages(virt_to_page(objp),
Christoph Lameter3b0efdf2012-06-13 10:24:57 -05003008 cachep->size / PAGE_SIZE, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003009 else
3010 check_poison_obj(cachep, objp);
3011#else
3012 check_poison_obj(cachep, objp);
3013#endif
3014 poison_obj(cachep, objp, POISON_INUSE);
3015 }
3016 if (cachep->flags & SLAB_STORE_USER)
Ezequiel Garcia7c0cb9c2012-09-08 17:47:55 -03003017 *dbg_userword(cachep, objp) = (void *)caller;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003018
3019 if (cachep->flags & SLAB_RED_ZONE) {
Andrew Mortona737b3e2006-03-22 00:08:11 -08003020 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE ||
3021 *dbg_redzone2(cachep, objp) != RED_INACTIVE) {
3022 slab_error(cachep, "double free, or memory outside"
3023 " object was overwritten");
Pekka Enbergb28a02d2006-01-08 01:00:37 -08003024 printk(KERN_ERR
David Woodhouseb46b8f12007-05-08 00:22:59 -07003025 "%p: redzone 1:0x%llx, redzone 2:0x%llx\n",
Andrew Mortona737b3e2006-03-22 00:08:11 -08003026 objp, *dbg_redzone1(cachep, objp),
3027 *dbg_redzone2(cachep, objp));
Linus Torvalds1da177e2005-04-16 15:20:36 -07003028 }
3029 *dbg_redzone1(cachep, objp) = RED_ACTIVE;
3030 *dbg_redzone2(cachep, objp) = RED_ACTIVE;
3031 }
Joonsoo Kim03787302014-06-23 13:22:06 -07003032
3033 page = virt_to_head_page(objp);
3034 set_obj_status(page, obj_to_index(cachep, page, objp), OBJECT_ACTIVE);
Manfred Spraul3dafccf2006-02-01 03:05:42 -08003035 objp += obj_offset(cachep);
Christoph Lameter4f104932007-05-06 14:50:17 -07003036 if (cachep->ctor && cachep->flags & SLAB_POISON)
Alexey Dobriyan51cc5062008-07-25 19:45:34 -07003037 cachep->ctor(objp);
Tetsuo Handa7ea466f2011-07-21 09:42:45 +09003038 if (ARCH_SLAB_MINALIGN &&
3039 ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) {
Kevin Hilmana44b56d2006-12-06 20:32:11 -08003040 printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
Hugh Dickinsc2251502011-07-11 13:35:08 -07003041 objp, (int)ARCH_SLAB_MINALIGN);
Kevin Hilmana44b56d2006-12-06 20:32:11 -08003042 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003043 return objp;
3044}
3045#else
3046#define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
3047#endif
3048
Akinobu Mita773ff602008-12-23 19:37:01 +09003049static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
Akinobu Mita8a8b6502006-12-08 02:39:44 -08003050{
Joonsoo Kim8a9c61d2014-08-06 16:04:20 -07003051 if (unlikely(cachep == kmem_cache))
Akinobu Mita773ff602008-12-23 19:37:01 +09003052 return false;
Akinobu Mita8a8b6502006-12-08 02:39:44 -08003053
Christoph Lameter8c138bc2012-06-13 10:24:58 -05003054 return should_failslab(cachep->object_size, flags, cachep->flags);
Akinobu Mita8a8b6502006-12-08 02:39:44 -08003055}
3056
Pekka Enberg343e0d72006-02-01 03:05:50 -08003057static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003058{
Pekka Enbergb28a02d2006-01-08 01:00:37 -08003059 void *objp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003060 struct array_cache *ac;
Mel Gorman072bb0a2012-07-31 16:43:58 -07003061 bool force_refill = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003062
Alok N Kataria5c382302005-09-27 21:45:46 -07003063 check_irq_off();
Akinobu Mita8a8b6502006-12-08 02:39:44 -08003064
Pekka Enberg9a2dba42006-02-01 03:05:49 -08003065 ac = cpu_cache_get(cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003066 if (likely(ac->avail)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003067 ac->touched = 1;
Mel Gorman072bb0a2012-07-31 16:43:58 -07003068 objp = ac_get_obj(cachep, ac, flags, false);
3069
J. R. Okajimaddbf2e82009-12-02 16:55:50 +09003070 /*
Mel Gorman072bb0a2012-07-31 16:43:58 -07003071 * Allow for the possibility all avail objects are not allowed
3072 * by the current flags
J. R. Okajimaddbf2e82009-12-02 16:55:50 +09003073 */
Mel Gorman072bb0a2012-07-31 16:43:58 -07003074 if (objp) {
3075 STATS_INC_ALLOCHIT(cachep);
3076 goto out;
3077 }
3078 force_refill = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003079 }
Mel Gorman072bb0a2012-07-31 16:43:58 -07003080
3081 STATS_INC_ALLOCMISS(cachep);
3082 objp = cache_alloc_refill(cachep, flags, force_refill);
3083 /*
3084 * the 'ac' may be updated by cache_alloc_refill(),
3085 * and kmemleak_erase() requires its correct value.
3086 */
3087 ac = cpu_cache_get(cachep);
3088
3089out:
Catalin Marinasd5cff632009-06-11 13:22:40 +01003090 /*
3091 * To avoid a false negative, if an object that is in one of the
3092 * per-CPU caches is leaked, we need to make sure kmemleak doesn't
3093 * treat the array pointers as a reference to the object.
3094 */
J. R. Okajimaf3d8b532009-12-02 16:55:49 +09003095 if (objp)
3096 kmemleak_erase(&ac->entry[ac->avail]);
Alok N Kataria5c382302005-09-27 21:45:46 -07003097 return objp;
3098}
3099
Christoph Lametere498be72005-09-09 13:03:32 -07003100#ifdef CONFIG_NUMA
3101/*
David Rientjesf0432d12014-04-07 15:37:30 -07003102 * Try allocating on another node if PF_SPREAD_SLAB is a mempolicy is set.
Paul Jacksonc61afb12006-03-24 03:16:08 -08003103 *
3104 * If we are in_interrupt, then process context, including cpusets and
3105 * mempolicy, may not apply and should not be used for allocation policy.
3106 */
3107static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
3108{
3109 int nid_alloc, nid_here;
3110
Christoph Lameter765c4502006-09-27 01:50:08 -07003111 if (in_interrupt() || (flags & __GFP_THISNODE))
Paul Jacksonc61afb12006-03-24 03:16:08 -08003112 return NULL;
Lee Schermerhorn7d6e6d02010-05-26 14:45:03 -07003113 nid_alloc = nid_here = numa_mem_id();
Paul Jacksonc61afb12006-03-24 03:16:08 -08003114 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
Jack Steiner6adef3e2010-05-26 14:42:49 -07003115 nid_alloc = cpuset_slab_spread_node();
Paul Jacksonc61afb12006-03-24 03:16:08 -08003116 else if (current->mempolicy)
David Rientjes2a389612014-04-07 15:37:29 -07003117 nid_alloc = mempolicy_slab_node();
Paul Jacksonc61afb12006-03-24 03:16:08 -08003118 if (nid_alloc != nid_here)
Christoph Hellwig8b98c162006-12-06 20:32:30 -08003119 return ____cache_alloc_node(cachep, flags, nid_alloc);
Paul Jacksonc61afb12006-03-24 03:16:08 -08003120 return NULL;
3121}
3122
3123/*
Christoph Lameter765c4502006-09-27 01:50:08 -07003124 * Fallback function if there was no memory available and no objects on a
Christoph Lameter3c517a62006-12-06 20:33:29 -08003125 * certain node and fall back is permitted. First we scan all the
Christoph Lameter6a673682013-01-10 19:14:19 +00003126 * available node for available objects. If that fails then we
Christoph Lameter3c517a62006-12-06 20:33:29 -08003127 * perform an allocation without specifying a node. This allows the page
3128 * allocator to do its reclaim / fallback magic. We then insert the
3129 * slab into the proper nodelist and then allocate from it.
Christoph Lameter765c4502006-09-27 01:50:08 -07003130 */
Pekka Enberg8c8cc2c2007-02-10 01:42:53 -08003131static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
Christoph Lameter765c4502006-09-27 01:50:08 -07003132{
Pekka Enberg8c8cc2c2007-02-10 01:42:53 -08003133 struct zonelist *zonelist;
3134 gfp_t local_flags;
Mel Gormandd1a2392008-04-28 02:12:17 -07003135 struct zoneref *z;
Mel Gorman54a6eb52008-04-28 02:12:16 -07003136 struct zone *zone;
3137 enum zone_type high_zoneidx = gfp_zone(flags);
Christoph Lameter765c4502006-09-27 01:50:08 -07003138 void *obj = NULL;
Christoph Lameter3c517a62006-12-06 20:33:29 -08003139 int nid;
Mel Gormancc9a6c82012-03-21 16:34:11 -07003140 unsigned int cpuset_mems_cookie;
Pekka Enberg8c8cc2c2007-02-10 01:42:53 -08003141
3142 if (flags & __GFP_THISNODE)
3143 return NULL;
3144
Christoph Lameter6cb06222007-10-16 01:25:41 -07003145 local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
Christoph Lameter765c4502006-09-27 01:50:08 -07003146
Mel Gormancc9a6c82012-03-21 16:34:11 -07003147retry_cpuset:
Mel Gormand26914d2014-04-03 14:47:24 -07003148 cpuset_mems_cookie = read_mems_allowed_begin();
David Rientjes2a389612014-04-07 15:37:29 -07003149 zonelist = node_zonelist(mempolicy_slab_node(), flags);
Mel Gormancc9a6c82012-03-21 16:34:11 -07003150
Christoph Lameter3c517a62006-12-06 20:33:29 -08003151retry:
3152 /*
3153 * Look through allowed nodes for objects available
3154 * from existing per node queues.
3155 */
Mel Gorman54a6eb52008-04-28 02:12:16 -07003156 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
3157 nid = zone_to_nid(zone);
Christoph Lameteraedb0eb2006-10-21 10:24:16 -07003158
Mel Gorman54a6eb52008-04-28 02:12:16 -07003159 if (cpuset_zone_allowed_hardwall(zone, flags) &&
Christoph Lameter18bf8542014-08-06 16:04:11 -07003160 get_node(cache, nid) &&
3161 get_node(cache, nid)->free_objects) {
Christoph Lameter3c517a62006-12-06 20:33:29 -08003162 obj = ____cache_alloc_node(cache,
3163 flags | GFP_THISNODE, nid);
Christoph Lameter481c5342008-06-21 16:46:35 -07003164 if (obj)
3165 break;
3166 }
Christoph Lameter3c517a62006-12-06 20:33:29 -08003167 }
3168
Christoph Lametercfce6602007-05-06 14:50:17 -07003169 if (!obj) {
Christoph Lameter3c517a62006-12-06 20:33:29 -08003170 /*
3171 * This allocation will be performed within the constraints
3172 * of the current cpuset / memory policy requirements.
3173 * We may trigger various forms of reclaim on the allowed
3174 * set and go into memory reserves if necessary.
3175 */
Joonsoo Kim0c3aa832013-10-24 10:07:38 +09003176 struct page *page;
3177
Christoph Lameterdd47ea72006-12-13 00:34:11 -08003178 if (local_flags & __GFP_WAIT)
3179 local_irq_enable();
3180 kmem_flagcheck(cache, flags);
Joonsoo Kim0c3aa832013-10-24 10:07:38 +09003181 page = kmem_getpages(cache, local_flags, numa_mem_id());
Christoph Lameterdd47ea72006-12-13 00:34:11 -08003182 if (local_flags & __GFP_WAIT)
3183 local_irq_disable();
Joonsoo Kim0c3aa832013-10-24 10:07:38 +09003184 if (page) {
Christoph Lameter3c517a62006-12-06 20:33:29 -08003185 /*
3186 * Insert into the appropriate per node queues
3187 */
Joonsoo Kim0c3aa832013-10-24 10:07:38 +09003188 nid = page_to_nid(page);
3189 if (cache_grow(cache, flags, nid, page)) {
Christoph Lameter3c517a62006-12-06 20:33:29 -08003190 obj = ____cache_alloc_node(cache,
3191 flags | GFP_THISNODE, nid);
3192 if (!obj)
3193 /*
3194 * Another processor may allocate the
3195 * objects in the slab since we are
3196 * not holding any locks.
3197 */
3198 goto retry;
3199 } else {
Hugh Dickinsb6a60452007-01-05 16:36:36 -08003200 /* cache_grow already freed obj */
Christoph Lameter3c517a62006-12-06 20:33:29 -08003201 obj = NULL;
3202 }
3203 }
Christoph Lameteraedb0eb2006-10-21 10:24:16 -07003204 }
Mel Gormancc9a6c82012-03-21 16:34:11 -07003205
Mel Gormand26914d2014-04-03 14:47:24 -07003206 if (unlikely(!obj && read_mems_allowed_retry(cpuset_mems_cookie)))
Mel Gormancc9a6c82012-03-21 16:34:11 -07003207 goto retry_cpuset;
Christoph Lameter765c4502006-09-27 01:50:08 -07003208 return obj;
3209}
3210
3211/*
Christoph Lametere498be72005-09-09 13:03:32 -07003212 * A interface to enable slab creation on nodeid
Linus Torvalds1da177e2005-04-16 15:20:36 -07003213 */
Christoph Hellwig8b98c162006-12-06 20:32:30 -08003214static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
Andrew Mortona737b3e2006-03-22 00:08:11 -08003215 int nodeid)
Christoph Lametere498be72005-09-09 13:03:32 -07003216{
3217 struct list_head *entry;
Joonsoo Kim8456a642013-10-24 10:07:49 +09003218 struct page *page;
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00003219 struct kmem_cache_node *n;
Pekka Enbergb28a02d2006-01-08 01:00:37 -08003220 void *obj;
Pekka Enbergb28a02d2006-01-08 01:00:37 -08003221 int x;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003222
Aaron Tomlin14e50c62013-04-26 16:15:34 +01003223 VM_BUG_ON(nodeid > num_online_nodes());
Christoph Lameter18bf8542014-08-06 16:04:11 -07003224 n = get_node(cachep, nodeid);
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00003225 BUG_ON(!n);
Christoph Lametere498be72005-09-09 13:03:32 -07003226
Andrew Mortona737b3e2006-03-22 00:08:11 -08003227retry:
Ravikiran G Thirumalaica3b9b92006-02-04 23:27:58 -08003228 check_irq_off();
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00003229 spin_lock(&n->list_lock);
3230 entry = n->slabs_partial.next;
3231 if (entry == &n->slabs_partial) {
3232 n->free_touched = 1;
3233 entry = n->slabs_free.next;
3234 if (entry == &n->slabs_free)
Pekka Enbergb28a02d2006-01-08 01:00:37 -08003235 goto must_grow;
3236 }
Christoph Lametere498be72005-09-09 13:03:32 -07003237
Joonsoo Kim8456a642013-10-24 10:07:49 +09003238 page = list_entry(entry, struct page, lru);
Pekka Enbergb28a02d2006-01-08 01:00:37 -08003239 check_spinlock_acquired_node(cachep, nodeid);
Christoph Lametere498be72005-09-09 13:03:32 -07003240
Pekka Enbergb28a02d2006-01-08 01:00:37 -08003241 STATS_INC_NODEALLOCS(cachep);
3242 STATS_INC_ACTIVE(cachep);
3243 STATS_SET_HIGH(cachep);
Christoph Lametere498be72005-09-09 13:03:32 -07003244
Joonsoo Kim8456a642013-10-24 10:07:49 +09003245 BUG_ON(page->active == cachep->num);
Christoph Lametere498be72005-09-09 13:03:32 -07003246
Joonsoo Kim8456a642013-10-24 10:07:49 +09003247 obj = slab_get_obj(cachep, page, nodeid);
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00003248 n->free_objects--;
Pekka Enbergb28a02d2006-01-08 01:00:37 -08003249 /* move slabp to correct slabp list: */
Joonsoo Kim8456a642013-10-24 10:07:49 +09003250 list_del(&page->lru);
Christoph Lametere498be72005-09-09 13:03:32 -07003251
Joonsoo Kim8456a642013-10-24 10:07:49 +09003252 if (page->active == cachep->num)
3253 list_add(&page->lru, &n->slabs_full);
Andrew Mortona737b3e2006-03-22 00:08:11 -08003254 else
Joonsoo Kim8456a642013-10-24 10:07:49 +09003255 list_add(&page->lru, &n->slabs_partial);
Christoph Lametere498be72005-09-09 13:03:32 -07003256
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00003257 spin_unlock(&n->list_lock);
Pekka Enbergb28a02d2006-01-08 01:00:37 -08003258 goto done;
Christoph Lametere498be72005-09-09 13:03:32 -07003259
Andrew Mortona737b3e2006-03-22 00:08:11 -08003260must_grow:
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00003261 spin_unlock(&n->list_lock);
Christoph Lameter3c517a62006-12-06 20:33:29 -08003262 x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL);
Christoph Lameter765c4502006-09-27 01:50:08 -07003263 if (x)
3264 goto retry;
Christoph Lametere498be72005-09-09 13:03:32 -07003265
Pekka Enberg8c8cc2c2007-02-10 01:42:53 -08003266 return fallback_alloc(cachep, flags);
Christoph Lameter765c4502006-09-27 01:50:08 -07003267
Andrew Mortona737b3e2006-03-22 00:08:11 -08003268done:
Pekka Enbergb28a02d2006-01-08 01:00:37 -08003269 return obj;
Christoph Lametere498be72005-09-09 13:03:32 -07003270}
Pekka Enberg8c8cc2c2007-02-10 01:42:53 -08003271
Pekka Enberg8c8cc2c2007-02-10 01:42:53 -08003272static __always_inline void *
Ezequiel Garcia48356302012-09-08 17:47:57 -03003273slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
Ezequiel Garcia7c0cb9c2012-09-08 17:47:55 -03003274 unsigned long caller)
Pekka Enberg8c8cc2c2007-02-10 01:42:53 -08003275{
3276 unsigned long save_flags;
3277 void *ptr;
Lee Schermerhorn7d6e6d02010-05-26 14:45:03 -07003278 int slab_node = numa_mem_id();
Pekka Enberg8c8cc2c2007-02-10 01:42:53 -08003279
Benjamin Herrenschmidtdcce2842009-06-18 13:24:12 +10003280 flags &= gfp_allowed_mask;
Pekka Enberg7e85ee02009-06-12 14:03:06 +03003281
Nick Piggincf40bd12009-01-21 08:12:39 +01003282 lockdep_trace_alloc(flags);
3283
Akinobu Mita773ff602008-12-23 19:37:01 +09003284 if (slab_should_failslab(cachep, flags))
Akinobu Mita824ebef2007-05-06 14:49:58 -07003285 return NULL;
3286
Glauber Costad79923f2012-12-18 14:22:48 -08003287 cachep = memcg_kmem_get_cache(cachep, flags);
3288
Pekka Enberg8c8cc2c2007-02-10 01:42:53 -08003289 cache_alloc_debugcheck_before(cachep, flags);
3290 local_irq_save(save_flags);
3291
Andrew Mortoneacbbae2011-07-28 13:59:49 -07003292 if (nodeid == NUMA_NO_NODE)
Lee Schermerhorn7d6e6d02010-05-26 14:45:03 -07003293 nodeid = slab_node;
Pekka Enberg8c8cc2c2007-02-10 01:42:53 -08003294
Christoph Lameter18bf8542014-08-06 16:04:11 -07003295 if (unlikely(!get_node(cachep, nodeid))) {
Pekka Enberg8c8cc2c2007-02-10 01:42:53 -08003296 /* Node not bootstrapped yet */
3297 ptr = fallback_alloc(cachep, flags);
3298 goto out;
3299 }
3300
Lee Schermerhorn7d6e6d02010-05-26 14:45:03 -07003301 if (nodeid == slab_node) {
Pekka Enberg8c8cc2c2007-02-10 01:42:53 -08003302 /*
3303 * Use the locally cached objects if possible.
3304 * However ____cache_alloc does not allow fallback
3305 * to other nodes. It may fail while we still have
3306 * objects on other nodes available.
3307 */
3308 ptr = ____cache_alloc(cachep, flags);
3309 if (ptr)
3310 goto out;
3311 }
3312 /* ___cache_alloc_node can fall back to other nodes */
3313 ptr = ____cache_alloc_node(cachep, flags, nodeid);
3314 out:
3315 local_irq_restore(save_flags);
3316 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
Christoph Lameter8c138bc2012-06-13 10:24:58 -05003317 kmemleak_alloc_recursive(ptr, cachep->object_size, 1, cachep->flags,
Catalin Marinasd5cff632009-06-11 13:22:40 +01003318 flags);
Pekka Enberg8c8cc2c2007-02-10 01:42:53 -08003319
Joe Perches5087c822013-09-10 17:02:51 -07003320 if (likely(ptr)) {
Christoph Lameter8c138bc2012-06-13 10:24:58 -05003321 kmemcheck_slab_alloc(cachep, flags, ptr, cachep->object_size);
Joe Perches5087c822013-09-10 17:02:51 -07003322 if (unlikely(flags & __GFP_ZERO))
3323 memset(ptr, 0, cachep->object_size);
3324 }
Christoph Lameterd07dbea2007-07-17 04:03:23 -07003325
Pekka Enberg8c8cc2c2007-02-10 01:42:53 -08003326 return ptr;
3327}
3328
3329static __always_inline void *
3330__do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
3331{
3332 void *objp;
3333
David Rientjesf0432d12014-04-07 15:37:30 -07003334 if (current->mempolicy || unlikely(current->flags & PF_SPREAD_SLAB)) {
Pekka Enberg8c8cc2c2007-02-10 01:42:53 -08003335 objp = alternate_node_alloc(cache, flags);
3336 if (objp)
3337 goto out;
3338 }
3339 objp = ____cache_alloc(cache, flags);
3340
3341 /*
3342 * We may just have run out of memory on the local node.
3343 * ____cache_alloc_node() knows how to locate memory on other nodes
3344 */
Lee Schermerhorn7d6e6d02010-05-26 14:45:03 -07003345 if (!objp)
3346 objp = ____cache_alloc_node(cache, flags, numa_mem_id());
Pekka Enberg8c8cc2c2007-02-10 01:42:53 -08003347
3348 out:
3349 return objp;
3350}
3351#else
3352
3353static __always_inline void *
3354__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3355{
3356 return ____cache_alloc(cachep, flags);
3357}
3358
3359#endif /* CONFIG_NUMA */
3360
3361static __always_inline void *
Ezequiel Garcia48356302012-09-08 17:47:57 -03003362slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
Pekka Enberg8c8cc2c2007-02-10 01:42:53 -08003363{
3364 unsigned long save_flags;
3365 void *objp;
3366
Benjamin Herrenschmidtdcce2842009-06-18 13:24:12 +10003367 flags &= gfp_allowed_mask;
Pekka Enberg7e85ee02009-06-12 14:03:06 +03003368
Nick Piggincf40bd12009-01-21 08:12:39 +01003369 lockdep_trace_alloc(flags);
3370
Akinobu Mita773ff602008-12-23 19:37:01 +09003371 if (slab_should_failslab(cachep, flags))
Akinobu Mita824ebef2007-05-06 14:49:58 -07003372 return NULL;
3373
Glauber Costad79923f2012-12-18 14:22:48 -08003374 cachep = memcg_kmem_get_cache(cachep, flags);
3375
Pekka Enberg8c8cc2c2007-02-10 01:42:53 -08003376 cache_alloc_debugcheck_before(cachep, flags);
3377 local_irq_save(save_flags);
3378 objp = __do_cache_alloc(cachep, flags);
3379 local_irq_restore(save_flags);
3380 objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
Christoph Lameter8c138bc2012-06-13 10:24:58 -05003381 kmemleak_alloc_recursive(objp, cachep->object_size, 1, cachep->flags,
Catalin Marinasd5cff632009-06-11 13:22:40 +01003382 flags);
Pekka Enberg8c8cc2c2007-02-10 01:42:53 -08003383 prefetchw(objp);
3384
Joe Perches5087c822013-09-10 17:02:51 -07003385 if (likely(objp)) {
Christoph Lameter8c138bc2012-06-13 10:24:58 -05003386 kmemcheck_slab_alloc(cachep, flags, objp, cachep->object_size);
Joe Perches5087c822013-09-10 17:02:51 -07003387 if (unlikely(flags & __GFP_ZERO))
3388 memset(objp, 0, cachep->object_size);
3389 }
Christoph Lameterd07dbea2007-07-17 04:03:23 -07003390
Pekka Enberg8c8cc2c2007-02-10 01:42:53 -08003391 return objp;
3392}
Christoph Lametere498be72005-09-09 13:03:32 -07003393
3394/*
Jianyu Zhan5f0985b2014-03-30 17:02:20 +08003395 * Caller needs to acquire correct kmem_cache_node's list_lock
Christoph Lametere498be72005-09-09 13:03:32 -07003396 */
Pekka Enberg343e0d72006-02-01 03:05:50 -08003397static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
Pekka Enbergb28a02d2006-01-08 01:00:37 -08003398 int node)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003399{
3400 int i;
Joonsoo Kim25c063f2014-08-06 16:04:22 -07003401 struct kmem_cache_node *n = get_node(cachep, node);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003402
3403 for (i = 0; i < nr_objects; i++) {
Mel Gorman072bb0a2012-07-31 16:43:58 -07003404 void *objp;
Joonsoo Kim8456a642013-10-24 10:07:49 +09003405 struct page *page;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003406
Mel Gorman072bb0a2012-07-31 16:43:58 -07003407 clear_obj_pfmemalloc(&objpp[i]);
3408 objp = objpp[i];
3409
Joonsoo Kim8456a642013-10-24 10:07:49 +09003410 page = virt_to_head_page(objp);
Joonsoo Kim8456a642013-10-24 10:07:49 +09003411 list_del(&page->lru);
Christoph Lameterff694162005-09-22 21:44:02 -07003412 check_spinlock_acquired_node(cachep, node);
Joonsoo Kim8456a642013-10-24 10:07:49 +09003413 slab_put_obj(cachep, page, objp, node);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003414 STATS_DEC_ACTIVE(cachep);
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00003415 n->free_objects++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003416
3417 /* fixup slab chains */
Joonsoo Kim8456a642013-10-24 10:07:49 +09003418 if (page->active == 0) {
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00003419 if (n->free_objects > n->free_limit) {
3420 n->free_objects -= cachep->num;
Ravikiran G Thirumalaie5ac9c52006-09-25 23:31:34 -07003421 /* No need to drop any previously held
3422 * lock here, even if we have a off-slab slab
3423 * descriptor it is guaranteed to come from
3424 * a different cache, refer to comments before
3425 * alloc_slabmgmt.
3426 */
Joonsoo Kim8456a642013-10-24 10:07:49 +09003427 slab_destroy(cachep, page);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003428 } else {
Joonsoo Kim8456a642013-10-24 10:07:49 +09003429 list_add(&page->lru, &n->slabs_free);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003430 }
3431 } else {
3432 /* Unconditionally move a slab to the end of the
3433 * partial list on free - maximum time for the
3434 * other objects to be freed, too.
3435 */
Joonsoo Kim8456a642013-10-24 10:07:49 +09003436 list_add_tail(&page->lru, &n->slabs_partial);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003437 }
3438 }
3439}
3440
Pekka Enberg343e0d72006-02-01 03:05:50 -08003441static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003442{
3443 int batchcount;
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00003444 struct kmem_cache_node *n;
Lee Schermerhorn7d6e6d02010-05-26 14:45:03 -07003445 int node = numa_mem_id();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003446
3447 batchcount = ac->batchcount;
3448#if DEBUG
3449 BUG_ON(!batchcount || batchcount > ac->avail);
3450#endif
3451 check_irq_off();
Christoph Lameter18bf8542014-08-06 16:04:11 -07003452 n = get_node(cachep, node);
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00003453 spin_lock(&n->list_lock);
3454 if (n->shared) {
3455 struct array_cache *shared_array = n->shared;
Pekka Enbergb28a02d2006-01-08 01:00:37 -08003456 int max = shared_array->limit - shared_array->avail;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003457 if (max) {
3458 if (batchcount > max)
3459 batchcount = max;
Christoph Lametere498be72005-09-09 13:03:32 -07003460 memcpy(&(shared_array->entry[shared_array->avail]),
Pekka Enbergb28a02d2006-01-08 01:00:37 -08003461 ac->entry, sizeof(void *) * batchcount);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003462 shared_array->avail += batchcount;
3463 goto free_done;
3464 }
3465 }
3466
Christoph Lameterff694162005-09-22 21:44:02 -07003467 free_block(cachep, ac->entry, batchcount, node);
Andrew Mortona737b3e2006-03-22 00:08:11 -08003468free_done:
Linus Torvalds1da177e2005-04-16 15:20:36 -07003469#if STATS
3470 {
3471 int i = 0;
3472 struct list_head *p;
3473
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00003474 p = n->slabs_free.next;
3475 while (p != &(n->slabs_free)) {
Joonsoo Kim8456a642013-10-24 10:07:49 +09003476 struct page *page;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003477
Joonsoo Kim8456a642013-10-24 10:07:49 +09003478 page = list_entry(p, struct page, lru);
3479 BUG_ON(page->active);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003480
3481 i++;
3482 p = p->next;
3483 }
3484 STATS_SET_FREEABLE(cachep, i);
3485 }
3486#endif
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00003487 spin_unlock(&n->list_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003488 ac->avail -= batchcount;
Andrew Mortona737b3e2006-03-22 00:08:11 -08003489 memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003490}
3491
3492/*
Andrew Mortona737b3e2006-03-22 00:08:11 -08003493 * Release an obj back to its cache. If the obj has a constructed state, it must
3494 * be in this state _before_ it is released. Called with disabled ints.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003495 */
Suleiman Souhlala947eb92011-06-02 00:16:42 -07003496static inline void __cache_free(struct kmem_cache *cachep, void *objp,
Ezequiel Garcia7c0cb9c2012-09-08 17:47:55 -03003497 unsigned long caller)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003498{
Pekka Enberg9a2dba42006-02-01 03:05:49 -08003499 struct array_cache *ac = cpu_cache_get(cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003500
3501 check_irq_off();
Catalin Marinasd5cff632009-06-11 13:22:40 +01003502 kmemleak_free_recursive(objp, cachep->flags);
Suleiman Souhlala947eb92011-06-02 00:16:42 -07003503 objp = cache_free_debugcheck(cachep, objp, caller);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003504
Christoph Lameter8c138bc2012-06-13 10:24:58 -05003505 kmemcheck_slab_free(cachep, objp, cachep->object_size);
Pekka Enbergc175eea2008-05-09 20:35:53 +02003506
Siddha, Suresh B1807a1a2007-08-22 14:01:49 -07003507 /*
3508 * Skip calling cache_free_alien() when the platform is not numa.
3509 * This will avoid cache misses that happen while accessing slabp (which
3510 * is per page memory reference) to get nodeid. Instead use a global
3511 * variable to skip the call, which is mostly likely to be present in
3512 * the cache.
3513 */
Mel Gormanb6e68bc2009-06-16 15:32:16 -07003514 if (nr_online_nodes > 1 && cache_free_alien(cachep, objp))
Pekka Enberg729bd0b2006-06-23 02:03:05 -07003515 return;
Christoph Lametere498be72005-09-09 13:03:32 -07003516
Linus Torvalds1da177e2005-04-16 15:20:36 -07003517 if (likely(ac->avail < ac->limit)) {
3518 STATS_INC_FREEHIT(cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003519 } else {
3520 STATS_INC_FREEMISS(cachep);
3521 cache_flusharray(cachep, ac);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003522 }
Zhao Jin42c8c992011-08-27 00:26:17 +08003523
Mel Gorman072bb0a2012-07-31 16:43:58 -07003524 ac_put_obj(cachep, ac, objp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003525}
3526
3527/**
3528 * kmem_cache_alloc - Allocate an object
3529 * @cachep: The cache to allocate from.
3530 * @flags: See kmalloc().
3531 *
3532 * Allocate an object from this cache. The flags are only relevant
3533 * if the cache has no available objects.
3534 */
Pekka Enberg343e0d72006-02-01 03:05:50 -08003535void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003536{
Ezequiel Garcia48356302012-09-08 17:47:57 -03003537 void *ret = slab_alloc(cachep, flags, _RET_IP_);
Eduard - Gabriel Munteanu36555752008-08-10 20:14:05 +03003538
Eduard - Gabriel Munteanuca2b84c2009-03-23 15:12:24 +02003539 trace_kmem_cache_alloc(_RET_IP_, ret,
Christoph Lameter8c138bc2012-06-13 10:24:58 -05003540 cachep->object_size, cachep->size, flags);
Eduard - Gabriel Munteanu36555752008-08-10 20:14:05 +03003541
3542 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003543}
3544EXPORT_SYMBOL(kmem_cache_alloc);
3545
Li Zefan0f24f122009-12-11 15:45:30 +08003546#ifdef CONFIG_TRACING
Steven Rostedt85beb582010-11-24 16:23:34 -05003547void *
Ezequiel Garcia40521472012-09-08 17:47:56 -03003548kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
Eduard - Gabriel Munteanu36555752008-08-10 20:14:05 +03003549{
Steven Rostedt85beb582010-11-24 16:23:34 -05003550 void *ret;
3551
Ezequiel Garcia48356302012-09-08 17:47:57 -03003552 ret = slab_alloc(cachep, flags, _RET_IP_);
Steven Rostedt85beb582010-11-24 16:23:34 -05003553
3554 trace_kmalloc(_RET_IP_, ret,
Ezequiel Garciaff4fcd02012-09-08 17:47:52 -03003555 size, cachep->size, flags);
Steven Rostedt85beb582010-11-24 16:23:34 -05003556 return ret;
Eduard - Gabriel Munteanu36555752008-08-10 20:14:05 +03003557}
Steven Rostedt85beb582010-11-24 16:23:34 -05003558EXPORT_SYMBOL(kmem_cache_alloc_trace);
Eduard - Gabriel Munteanu36555752008-08-10 20:14:05 +03003559#endif
3560
Linus Torvalds1da177e2005-04-16 15:20:36 -07003561#ifdef CONFIG_NUMA
Zhouping Liud0d04b72013-05-16 11:36:23 +08003562/**
3563 * kmem_cache_alloc_node - Allocate an object on the specified node
3564 * @cachep: The cache to allocate from.
3565 * @flags: See kmalloc().
3566 * @nodeid: node number of the target node.
3567 *
3568 * Identical to kmem_cache_alloc but it will allocate memory on the given
3569 * node, which can improve the performance for cpu bound structures.
3570 *
3571 * Fallback to other node is possible if __GFP_THISNODE is not set.
3572 */
Christoph Hellwig8b98c162006-12-06 20:32:30 -08003573void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
3574{
Ezequiel Garcia48356302012-09-08 17:47:57 -03003575 void *ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);
Eduard - Gabriel Munteanu36555752008-08-10 20:14:05 +03003576
Eduard - Gabriel Munteanuca2b84c2009-03-23 15:12:24 +02003577 trace_kmem_cache_alloc_node(_RET_IP_, ret,
Christoph Lameter8c138bc2012-06-13 10:24:58 -05003578 cachep->object_size, cachep->size,
Eduard - Gabriel Munteanuca2b84c2009-03-23 15:12:24 +02003579 flags, nodeid);
Eduard - Gabriel Munteanu36555752008-08-10 20:14:05 +03003580
3581 return ret;
Christoph Hellwig8b98c162006-12-06 20:32:30 -08003582}
Linus Torvalds1da177e2005-04-16 15:20:36 -07003583EXPORT_SYMBOL(kmem_cache_alloc_node);
3584
Li Zefan0f24f122009-12-11 15:45:30 +08003585#ifdef CONFIG_TRACING
Ezequiel Garcia40521472012-09-08 17:47:56 -03003586void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
Steven Rostedt85beb582010-11-24 16:23:34 -05003587 gfp_t flags,
Ezequiel Garcia40521472012-09-08 17:47:56 -03003588 int nodeid,
3589 size_t size)
Eduard - Gabriel Munteanu36555752008-08-10 20:14:05 +03003590{
Steven Rostedt85beb582010-11-24 16:23:34 -05003591 void *ret;
3592
Ezequiel Garcia592f4142012-09-25 08:07:08 -03003593 ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);
Ezequiel Garcia7c0cb9c2012-09-08 17:47:55 -03003594
Steven Rostedt85beb582010-11-24 16:23:34 -05003595 trace_kmalloc_node(_RET_IP_, ret,
Ezequiel Garciaff4fcd02012-09-08 17:47:52 -03003596 size, cachep->size,
Steven Rostedt85beb582010-11-24 16:23:34 -05003597 flags, nodeid);
3598 return ret;
Eduard - Gabriel Munteanu36555752008-08-10 20:14:05 +03003599}
Steven Rostedt85beb582010-11-24 16:23:34 -05003600EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
Eduard - Gabriel Munteanu36555752008-08-10 20:14:05 +03003601#endif
3602
Christoph Hellwig8b98c162006-12-06 20:32:30 -08003603static __always_inline void *
Ezequiel Garcia7c0cb9c2012-09-08 17:47:55 -03003604__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
Manfred Spraul97e2bde2005-05-01 08:58:38 -07003605{
Pekka Enberg343e0d72006-02-01 03:05:50 -08003606 struct kmem_cache *cachep;
Manfred Spraul97e2bde2005-05-01 08:58:38 -07003607
Christoph Lameter2c59dd62013-01-10 19:14:19 +00003608 cachep = kmalloc_slab(size, flags);
Christoph Lameter6cb8f912007-07-17 04:03:22 -07003609 if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3610 return cachep;
Ezequiel Garcia40521472012-09-08 17:47:56 -03003611 return kmem_cache_alloc_node_trace(cachep, flags, node, size);
Manfred Spraul97e2bde2005-05-01 08:58:38 -07003612}
Christoph Hellwig8b98c162006-12-06 20:32:30 -08003613
Li Zefan0bb38a52009-12-11 15:45:50 +08003614#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)
Christoph Hellwig8b98c162006-12-06 20:32:30 -08003615void *__kmalloc_node(size_t size, gfp_t flags, int node)
3616{
Ezequiel Garcia7c0cb9c2012-09-08 17:47:55 -03003617 return __do_kmalloc_node(size, flags, node, _RET_IP_);
Christoph Hellwig8b98c162006-12-06 20:32:30 -08003618}
Christoph Hellwigdbe5e692006-09-25 23:31:36 -07003619EXPORT_SYMBOL(__kmalloc_node);
Christoph Hellwig8b98c162006-12-06 20:32:30 -08003620
3621void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
Eduard - Gabriel Munteanuce71e272008-08-19 20:43:25 +03003622 int node, unsigned long caller)
Christoph Hellwig8b98c162006-12-06 20:32:30 -08003623{
Ezequiel Garcia7c0cb9c2012-09-08 17:47:55 -03003624 return __do_kmalloc_node(size, flags, node, caller);
Christoph Hellwig8b98c162006-12-06 20:32:30 -08003625}
3626EXPORT_SYMBOL(__kmalloc_node_track_caller);
3627#else
3628void *__kmalloc_node(size_t size, gfp_t flags, int node)
3629{
Ezequiel Garcia7c0cb9c2012-09-08 17:47:55 -03003630 return __do_kmalloc_node(size, flags, node, 0);
Christoph Hellwig8b98c162006-12-06 20:32:30 -08003631}
3632EXPORT_SYMBOL(__kmalloc_node);
Li Zefan0bb38a52009-12-11 15:45:50 +08003633#endif /* CONFIG_DEBUG_SLAB || CONFIG_TRACING */
Christoph Hellwig8b98c162006-12-06 20:32:30 -08003634#endif /* CONFIG_NUMA */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003635
3636/**
Paul Drynoff800590f2006-06-23 02:03:48 -07003637 * __do_kmalloc - allocate memory
Linus Torvalds1da177e2005-04-16 15:20:36 -07003638 * @size: how many bytes of memory are required.
Paul Drynoff800590f2006-06-23 02:03:48 -07003639 * @flags: the type of memory to allocate (see kmalloc).
Randy Dunlap911851e2006-03-22 00:08:14 -08003640 * @caller: function caller for debug tracking of the caller
Linus Torvalds1da177e2005-04-16 15:20:36 -07003641 */
Pekka Enberg7fd6b142006-02-01 03:05:52 -08003642static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
Ezequiel Garcia7c0cb9c2012-09-08 17:47:55 -03003643 unsigned long caller)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003644{
Pekka Enberg343e0d72006-02-01 03:05:50 -08003645 struct kmem_cache *cachep;
Eduard - Gabriel Munteanu36555752008-08-10 20:14:05 +03003646 void *ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003647
Christoph Lameter2c59dd62013-01-10 19:14:19 +00003648 cachep = kmalloc_slab(size, flags);
Linus Torvaldsa5c96d82007-07-19 13:17:15 -07003649 if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3650 return cachep;
Ezequiel Garcia48356302012-09-08 17:47:57 -03003651 ret = slab_alloc(cachep, flags, caller);
Eduard - Gabriel Munteanu36555752008-08-10 20:14:05 +03003652
Ezequiel Garcia7c0cb9c2012-09-08 17:47:55 -03003653 trace_kmalloc(caller, ret,
Christoph Lameter3b0efdf2012-06-13 10:24:57 -05003654 size, cachep->size, flags);
Eduard - Gabriel Munteanu36555752008-08-10 20:14:05 +03003655
3656 return ret;
Pekka Enberg7fd6b142006-02-01 03:05:52 -08003657}
3658
Pekka Enberg7fd6b142006-02-01 03:05:52 -08003659
Li Zefan0bb38a52009-12-11 15:45:50 +08003660#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)
Pekka Enberg7fd6b142006-02-01 03:05:52 -08003661void *__kmalloc(size_t size, gfp_t flags)
3662{
Ezequiel Garcia7c0cb9c2012-09-08 17:47:55 -03003663 return __do_kmalloc(size, flags, _RET_IP_);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003664}
3665EXPORT_SYMBOL(__kmalloc);
3666
Eduard - Gabriel Munteanuce71e272008-08-19 20:43:25 +03003667void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller)
Pekka Enberg7fd6b142006-02-01 03:05:52 -08003668{
Ezequiel Garcia7c0cb9c2012-09-08 17:47:55 -03003669 return __do_kmalloc(size, flags, caller);
Pekka Enberg7fd6b142006-02-01 03:05:52 -08003670}
3671EXPORT_SYMBOL(__kmalloc_track_caller);
Christoph Hellwig1d2c8ee2006-10-04 02:15:25 -07003672
3673#else
3674void *__kmalloc(size_t size, gfp_t flags)
3675{
Ezequiel Garcia7c0cb9c2012-09-08 17:47:55 -03003676 return __do_kmalloc(size, flags, 0);
Christoph Hellwig1d2c8ee2006-10-04 02:15:25 -07003677}
3678EXPORT_SYMBOL(__kmalloc);
Pekka Enberg7fd6b142006-02-01 03:05:52 -08003679#endif
3680
Linus Torvalds1da177e2005-04-16 15:20:36 -07003681/**
3682 * kmem_cache_free - Deallocate an object
3683 * @cachep: The cache the allocation was from.
3684 * @objp: The previously allocated object.
3685 *
3686 * Free an object which was previously allocated from this
3687 * cache.
3688 */
Pekka Enberg343e0d72006-02-01 03:05:50 -08003689void kmem_cache_free(struct kmem_cache *cachep, void *objp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003690{
3691 unsigned long flags;
Glauber Costab9ce5ef2012-12-18 14:22:46 -08003692 cachep = cache_from_obj(cachep, objp);
3693 if (!cachep)
3694 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003695
3696 local_irq_save(flags);
Feng Tangd97d4762012-07-02 14:29:10 +08003697 debug_check_no_locks_freed(objp, cachep->object_size);
Thomas Gleixner3ac7fe52008-04-30 00:55:01 -07003698 if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
Christoph Lameter8c138bc2012-06-13 10:24:58 -05003699 debug_check_no_obj_freed(objp, cachep->object_size);
Ezequiel Garcia7c0cb9c2012-09-08 17:47:55 -03003700 __cache_free(cachep, objp, _RET_IP_);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003701 local_irq_restore(flags);
Eduard - Gabriel Munteanu36555752008-08-10 20:14:05 +03003702
Eduard - Gabriel Munteanuca2b84c2009-03-23 15:12:24 +02003703 trace_kmem_cache_free(_RET_IP_, objp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003704}
3705EXPORT_SYMBOL(kmem_cache_free);
3706
3707/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07003708 * kfree - free previously allocated memory
3709 * @objp: pointer returned by kmalloc.
3710 *
Pekka Enberg80e93ef2005-09-09 13:10:16 -07003711 * If @objp is NULL, no operation is performed.
3712 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07003713 * Don't free memory not originally allocated by kmalloc()
3714 * or you will run into trouble.
3715 */
3716void kfree(const void *objp)
3717{
Pekka Enberg343e0d72006-02-01 03:05:50 -08003718 struct kmem_cache *c;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003719 unsigned long flags;
3720
Pekka Enberg2121db72009-03-25 11:05:57 +02003721 trace_kfree(_RET_IP_, objp);
3722
Christoph Lameter6cb8f912007-07-17 04:03:22 -07003723 if (unlikely(ZERO_OR_NULL_PTR(objp)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003724 return;
3725 local_irq_save(flags);
3726 kfree_debugcheck(objp);
Pekka Enberg6ed5eb2212006-02-01 03:05:49 -08003727 c = virt_to_cache(objp);
Christoph Lameter8c138bc2012-06-13 10:24:58 -05003728 debug_check_no_locks_freed(objp, c->object_size);
3729
3730 debug_check_no_obj_freed(objp, c->object_size);
Ezequiel Garcia7c0cb9c2012-09-08 17:47:55 -03003731 __cache_free(c, (void *)objp, _RET_IP_);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003732 local_irq_restore(flags);
3733}
3734EXPORT_SYMBOL(kfree);
3735
Christoph Lametere498be72005-09-09 13:03:32 -07003736/*
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00003737 * This initializes kmem_cache_node or resizes various caches for all nodes.
Christoph Lametere498be72005-09-09 13:03:32 -07003738 */
Jianyu Zhan5f0985b2014-03-30 17:02:20 +08003739static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
Christoph Lametere498be72005-09-09 13:03:32 -07003740{
3741 int node;
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00003742 struct kmem_cache_node *n;
Christoph Lametercafeb022006-03-25 03:06:46 -08003743 struct array_cache *new_shared;
Paul Menage3395ee02006-12-06 20:32:16 -08003744 struct array_cache **new_alien = NULL;
Christoph Lametere498be72005-09-09 13:03:32 -07003745
Mel Gorman9c09a952008-01-24 05:49:54 -08003746 for_each_online_node(node) {
Christoph Lametercafeb022006-03-25 03:06:46 -08003747
Paul Menage3395ee02006-12-06 20:32:16 -08003748 if (use_alien_caches) {
Pekka Enberg83b519e2009-06-10 19:40:04 +03003749 new_alien = alloc_alien_cache(node, cachep->limit, gfp);
Paul Menage3395ee02006-12-06 20:32:16 -08003750 if (!new_alien)
3751 goto fail;
3752 }
Christoph Lametercafeb022006-03-25 03:06:46 -08003753
Eric Dumazet63109842007-05-06 14:49:28 -07003754 new_shared = NULL;
3755 if (cachep->shared) {
3756 new_shared = alloc_arraycache(node,
Christoph Lameter0718dc22006-03-25 03:06:47 -08003757 cachep->shared*cachep->batchcount,
Pekka Enberg83b519e2009-06-10 19:40:04 +03003758 0xbaadf00d, gfp);
Eric Dumazet63109842007-05-06 14:49:28 -07003759 if (!new_shared) {
3760 free_alien_cache(new_alien);
3761 goto fail;
3762 }
Christoph Lameter0718dc22006-03-25 03:06:47 -08003763 }
Christoph Lametercafeb022006-03-25 03:06:46 -08003764
Christoph Lameter18bf8542014-08-06 16:04:11 -07003765 n = get_node(cachep, node);
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00003766 if (n) {
3767 struct array_cache *shared = n->shared;
Christoph Lametercafeb022006-03-25 03:06:46 -08003768
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00003769 spin_lock_irq(&n->list_lock);
Christoph Lametere498be72005-09-09 13:03:32 -07003770
Christoph Lametercafeb022006-03-25 03:06:46 -08003771 if (shared)
Christoph Lameter0718dc22006-03-25 03:06:47 -08003772 free_block(cachep, shared->entry,
3773 shared->avail, node);
Christoph Lametere498be72005-09-09 13:03:32 -07003774
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00003775 n->shared = new_shared;
3776 if (!n->alien) {
3777 n->alien = new_alien;
Christoph Lametere498be72005-09-09 13:03:32 -07003778 new_alien = NULL;
3779 }
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00003780 n->free_limit = (1 + nr_cpus_node(node)) *
Andrew Mortona737b3e2006-03-22 00:08:11 -08003781 cachep->batchcount + cachep->num;
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00003782 spin_unlock_irq(&n->list_lock);
Christoph Lametercafeb022006-03-25 03:06:46 -08003783 kfree(shared);
Christoph Lametere498be72005-09-09 13:03:32 -07003784 free_alien_cache(new_alien);
3785 continue;
3786 }
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00003787 n = kmalloc_node(sizeof(struct kmem_cache_node), gfp, node);
3788 if (!n) {
Christoph Lameter0718dc22006-03-25 03:06:47 -08003789 free_alien_cache(new_alien);
3790 kfree(new_shared);
Christoph Lametere498be72005-09-09 13:03:32 -07003791 goto fail;
Christoph Lameter0718dc22006-03-25 03:06:47 -08003792 }
Christoph Lametere498be72005-09-09 13:03:32 -07003793
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00003794 kmem_cache_node_init(n);
Jianyu Zhan5f0985b2014-03-30 17:02:20 +08003795 n->next_reap = jiffies + REAPTIMEOUT_NODE +
3796 ((unsigned long)cachep) % REAPTIMEOUT_NODE;
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00003797 n->shared = new_shared;
3798 n->alien = new_alien;
3799 n->free_limit = (1 + nr_cpus_node(node)) *
Andrew Mortona737b3e2006-03-22 00:08:11 -08003800 cachep->batchcount + cachep->num;
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00003801 cachep->node[node] = n;
Christoph Lametere498be72005-09-09 13:03:32 -07003802 }
Christoph Lametercafeb022006-03-25 03:06:46 -08003803 return 0;
Christoph Lameter0718dc22006-03-25 03:06:47 -08003804
Andrew Mortona737b3e2006-03-22 00:08:11 -08003805fail:
Christoph Lameter3b0efdf2012-06-13 10:24:57 -05003806 if (!cachep->list.next) {
Christoph Lameter0718dc22006-03-25 03:06:47 -08003807 /* Cache is not active yet. Roll back what we did */
3808 node--;
3809 while (node >= 0) {
Christoph Lameter18bf8542014-08-06 16:04:11 -07003810 n = get_node(cachep, node);
3811 if (n) {
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00003812 kfree(n->shared);
3813 free_alien_cache(n->alien);
3814 kfree(n);
Christoph Lameter6a673682013-01-10 19:14:19 +00003815 cachep->node[node] = NULL;
Christoph Lameter0718dc22006-03-25 03:06:47 -08003816 }
3817 node--;
3818 }
3819 }
Christoph Lametercafeb022006-03-25 03:06:46 -08003820 return -ENOMEM;
Christoph Lametere498be72005-09-09 13:03:32 -07003821}
3822
Linus Torvalds1da177e2005-04-16 15:20:36 -07003823struct ccupdate_struct {
Pekka Enberg343e0d72006-02-01 03:05:50 -08003824 struct kmem_cache *cachep;
Eric Dumazetacfe7d72011-07-25 08:55:42 +02003825 struct array_cache *new[0];
Linus Torvalds1da177e2005-04-16 15:20:36 -07003826};
3827
3828static void do_ccupdate_local(void *info)
3829{
Andrew Mortona737b3e2006-03-22 00:08:11 -08003830 struct ccupdate_struct *new = info;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003831 struct array_cache *old;
3832
3833 check_irq_off();
Pekka Enberg9a2dba42006-02-01 03:05:49 -08003834 old = cpu_cache_get(new->cachep);
Christoph Lametere498be72005-09-09 13:03:32 -07003835
Linus Torvalds1da177e2005-04-16 15:20:36 -07003836 new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
3837 new->new[smp_processor_id()] = old;
3838}
3839
Christoph Lameter18004c52012-07-06 15:25:12 -05003840/* Always called with the slab_mutex held */
Glauber Costa943a4512012-12-18 14:23:03 -08003841static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
Pekka Enberg83b519e2009-06-10 19:40:04 +03003842 int batchcount, int shared, gfp_t gfp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003843{
Siddha, Suresh Bd2e7b7d2006-09-25 23:31:47 -07003844 struct ccupdate_struct *new;
Christoph Lameter2ed3a4e2006-09-25 23:31:38 -07003845 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003846
Eric Dumazetacfe7d72011-07-25 08:55:42 +02003847 new = kzalloc(sizeof(*new) + nr_cpu_ids * sizeof(struct array_cache *),
3848 gfp);
Siddha, Suresh Bd2e7b7d2006-09-25 23:31:47 -07003849 if (!new)
3850 return -ENOMEM;
3851
Christoph Lametere498be72005-09-09 13:03:32 -07003852 for_each_online_cpu(i) {
Lee Schermerhorn7d6e6d02010-05-26 14:45:03 -07003853 new->new[i] = alloc_arraycache(cpu_to_mem(i), limit,
Pekka Enberg83b519e2009-06-10 19:40:04 +03003854 batchcount, gfp);
Siddha, Suresh Bd2e7b7d2006-09-25 23:31:47 -07003855 if (!new->new[i]) {
Pekka Enbergb28a02d2006-01-08 01:00:37 -08003856 for (i--; i >= 0; i--)
Siddha, Suresh Bd2e7b7d2006-09-25 23:31:47 -07003857 kfree(new->new[i]);
3858 kfree(new);
Christoph Lametere498be72005-09-09 13:03:32 -07003859 return -ENOMEM;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003860 }
3861 }
Siddha, Suresh Bd2e7b7d2006-09-25 23:31:47 -07003862 new->cachep = cachep;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003863
Jens Axboe15c8b6c2008-05-09 09:39:44 +02003864 on_each_cpu(do_ccupdate_local, (void *)new, 1);
Christoph Lametere498be72005-09-09 13:03:32 -07003865
Linus Torvalds1da177e2005-04-16 15:20:36 -07003866 check_irq_on();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003867 cachep->batchcount = batchcount;
3868 cachep->limit = limit;
Christoph Lametere498be72005-09-09 13:03:32 -07003869 cachep->shared = shared;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003870
Christoph Lametere498be72005-09-09 13:03:32 -07003871 for_each_online_cpu(i) {
Siddha, Suresh Bd2e7b7d2006-09-25 23:31:47 -07003872 struct array_cache *ccold = new->new[i];
Christoph Lameter18bf8542014-08-06 16:04:11 -07003873 int node;
3874 struct kmem_cache_node *n;
3875
Linus Torvalds1da177e2005-04-16 15:20:36 -07003876 if (!ccold)
3877 continue;
Christoph Lameter18bf8542014-08-06 16:04:11 -07003878
3879 node = cpu_to_mem(i);
3880 n = get_node(cachep, node);
3881 spin_lock_irq(&n->list_lock);
3882 free_block(cachep, ccold->entry, ccold->avail, node);
3883 spin_unlock_irq(&n->list_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003884 kfree(ccold);
3885 }
Siddha, Suresh Bd2e7b7d2006-09-25 23:31:47 -07003886 kfree(new);
Jianyu Zhan5f0985b2014-03-30 17:02:20 +08003887 return alloc_kmem_cache_node(cachep, gfp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003888}
3889
Glauber Costa943a4512012-12-18 14:23:03 -08003890static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
3891 int batchcount, int shared, gfp_t gfp)
3892{
3893 int ret;
3894 struct kmem_cache *c = NULL;
3895 int i = 0;
3896
3897 ret = __do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
3898
3899 if (slab_state < FULL)
3900 return ret;
3901
3902 if ((ret < 0) || !is_root_cache(cachep))
3903 return ret;
3904
Glauber Costaebe945c2012-12-18 14:23:10 -08003905 VM_BUG_ON(!mutex_is_locked(&slab_mutex));
Glauber Costa943a4512012-12-18 14:23:03 -08003906 for_each_memcg_cache_index(i) {
Qiang Huang2ade4de2013-11-12 15:08:23 -08003907 c = cache_from_memcg_idx(cachep, i);
Glauber Costa943a4512012-12-18 14:23:03 -08003908 if (c)
3909 /* return value determined by the parent cache only */
3910 __do_tune_cpucache(c, limit, batchcount, shared, gfp);
3911 }
3912
3913 return ret;
3914}
3915
Christoph Lameter18004c52012-07-06 15:25:12 -05003916/* Called with slab_mutex held always */
Pekka Enberg83b519e2009-06-10 19:40:04 +03003917static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003918{
3919 int err;
Glauber Costa943a4512012-12-18 14:23:03 -08003920 int limit = 0;
3921 int shared = 0;
3922 int batchcount = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003923
Glauber Costa943a4512012-12-18 14:23:03 -08003924 if (!is_root_cache(cachep)) {
3925 struct kmem_cache *root = memcg_root_cache(cachep);
3926 limit = root->limit;
3927 shared = root->shared;
3928 batchcount = root->batchcount;
3929 }
3930
3931 if (limit && shared && batchcount)
3932 goto skip_setup;
Andrew Mortona737b3e2006-03-22 00:08:11 -08003933 /*
3934 * The head array serves three purposes:
Linus Torvalds1da177e2005-04-16 15:20:36 -07003935 * - create a LIFO ordering, i.e. return objects that are cache-warm
3936 * - reduce the number of spinlock operations.
Andrew Mortona737b3e2006-03-22 00:08:11 -08003937 * - reduce the number of linked list operations on the slab and
Linus Torvalds1da177e2005-04-16 15:20:36 -07003938 * bufctl chains: array operations are cheaper.
3939 * The numbers are guessed, we should auto-tune as described by
3940 * Bonwick.
3941 */
Christoph Lameter3b0efdf2012-06-13 10:24:57 -05003942 if (cachep->size > 131072)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003943 limit = 1;
Christoph Lameter3b0efdf2012-06-13 10:24:57 -05003944 else if (cachep->size > PAGE_SIZE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003945 limit = 8;
Christoph Lameter3b0efdf2012-06-13 10:24:57 -05003946 else if (cachep->size > 1024)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003947 limit = 24;
Christoph Lameter3b0efdf2012-06-13 10:24:57 -05003948 else if (cachep->size > 256)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003949 limit = 54;
3950 else
3951 limit = 120;
3952
Andrew Mortona737b3e2006-03-22 00:08:11 -08003953 /*
3954 * CPU bound tasks (e.g. network routing) can exhibit cpu bound
Linus Torvalds1da177e2005-04-16 15:20:36 -07003955 * allocation behaviour: Most allocs on one cpu, most free operations
3956 * on another cpu. For these cases, an efficient object passing between
3957 * cpus is necessary. This is provided by a shared array. The array
3958 * replaces Bonwick's magazine layer.
3959 * On uniprocessor, it's functionally equivalent (but less efficient)
3960 * to a larger limit. Thus disabled by default.
3961 */
3962 shared = 0;
Christoph Lameter3b0efdf2012-06-13 10:24:57 -05003963 if (cachep->size <= PAGE_SIZE && num_possible_cpus() > 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003964 shared = 8;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003965
3966#if DEBUG
Andrew Mortona737b3e2006-03-22 00:08:11 -08003967 /*
3968 * With debugging enabled, large batchcount lead to excessively long
3969 * periods with disabled local interrupts. Limit the batchcount
Linus Torvalds1da177e2005-04-16 15:20:36 -07003970 */
3971 if (limit > 32)
3972 limit = 32;
3973#endif
Glauber Costa943a4512012-12-18 14:23:03 -08003974 batchcount = (limit + 1) / 2;
3975skip_setup:
3976 err = do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003977 if (err)
3978 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
Pekka Enbergb28a02d2006-01-08 01:00:37 -08003979 cachep->name, -err);
Christoph Lameter2ed3a4e2006-09-25 23:31:38 -07003980 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003981}
3982
Christoph Lameter1b552532006-03-22 00:09:07 -08003983/*
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00003984 * Drain an array if it contains any elements taking the node lock only if
3985 * necessary. Note that the node listlock also protects the array_cache
Christoph Lameterb18e7e62006-03-22 00:09:07 -08003986 * if drain_array() is used on the shared array.
Christoph Lameter1b552532006-03-22 00:09:07 -08003987 */
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00003988static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
Christoph Lameter1b552532006-03-22 00:09:07 -08003989 struct array_cache *ac, int force, int node)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003990{
3991 int tofree;
3992
Christoph Lameter1b552532006-03-22 00:09:07 -08003993 if (!ac || !ac->avail)
3994 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003995 if (ac->touched && !force) {
3996 ac->touched = 0;
Christoph Lameterb18e7e62006-03-22 00:09:07 -08003997 } else {
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00003998 spin_lock_irq(&n->list_lock);
Christoph Lameterb18e7e62006-03-22 00:09:07 -08003999 if (ac->avail) {
4000 tofree = force ? ac->avail : (ac->limit + 4) / 5;
4001 if (tofree > ac->avail)
4002 tofree = (ac->avail + 1) / 2;
4003 free_block(cachep, ac->entry, tofree, node);
4004 ac->avail -= tofree;
4005 memmove(ac->entry, &(ac->entry[tofree]),
4006 sizeof(void *) * ac->avail);
4007 }
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00004008 spin_unlock_irq(&n->list_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004009 }
4010}
4011
4012/**
4013 * cache_reap - Reclaim memory from caches.
Randy Dunlap05fb6bf2007-02-28 20:12:13 -08004014 * @w: work descriptor
Linus Torvalds1da177e2005-04-16 15:20:36 -07004015 *
4016 * Called from workqueue/eventd every few seconds.
4017 * Purpose:
4018 * - clear the per-cpu caches for this CPU.
4019 * - return freeable pages to the main free memory pool.
4020 *
Andrew Mortona737b3e2006-03-22 00:08:11 -08004021 * If we cannot acquire the cache chain mutex then just give up - we'll try
4022 * again on the next iteration.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004023 */
Christoph Lameter7c5cae32007-02-10 01:42:55 -08004024static void cache_reap(struct work_struct *w)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004025{
Christoph Hellwig7a7c3812006-06-23 02:03:17 -07004026 struct kmem_cache *searchp;
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00004027 struct kmem_cache_node *n;
Lee Schermerhorn7d6e6d02010-05-26 14:45:03 -07004028 int node = numa_mem_id();
Jean Delvarebf6aede2009-04-02 16:56:54 -07004029 struct delayed_work *work = to_delayed_work(w);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004030
Christoph Lameter18004c52012-07-06 15:25:12 -05004031 if (!mutex_trylock(&slab_mutex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07004032 /* Give up. Setup the next iteration. */
Christoph Lameter7c5cae32007-02-10 01:42:55 -08004033 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004034
Christoph Lameter18004c52012-07-06 15:25:12 -05004035 list_for_each_entry(searchp, &slab_caches, list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004036 check_irq_on();
4037
Christoph Lameter35386e32006-03-22 00:09:05 -08004038 /*
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00004039 * We only take the node lock if absolutely necessary and we
Christoph Lameter35386e32006-03-22 00:09:05 -08004040 * have established with reasonable certainty that
4041 * we can do some work if the lock was obtained.
4042 */
Christoph Lameter18bf8542014-08-06 16:04:11 -07004043 n = get_node(searchp, node);
Christoph Lameter35386e32006-03-22 00:09:05 -08004044
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00004045 reap_alien(searchp, n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004046
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00004047 drain_array(searchp, n, cpu_cache_get(searchp), 0, node);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004048
Christoph Lameter35386e32006-03-22 00:09:05 -08004049 /*
4050 * These are racy checks but it does not matter
4051 * if we skip one check or scan twice.
4052 */
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00004053 if (time_after(n->next_reap, jiffies))
Christoph Lameter35386e32006-03-22 00:09:05 -08004054 goto next;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004055
Jianyu Zhan5f0985b2014-03-30 17:02:20 +08004056 n->next_reap = jiffies + REAPTIMEOUT_NODE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004057
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00004058 drain_array(searchp, n, n->shared, 0, node);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004059
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00004060 if (n->free_touched)
4061 n->free_touched = 0;
Christoph Lametered11d9e2006-06-30 01:55:45 -07004062 else {
4063 int freed;
4064
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00004065 freed = drain_freelist(searchp, n, (n->free_limit +
Christoph Lametered11d9e2006-06-30 01:55:45 -07004066 5 * searchp->num - 1) / (5 * searchp->num));
4067 STATS_ADD_REAPED(searchp, freed);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004068 }
Christoph Lameter35386e32006-03-22 00:09:05 -08004069next:
Linus Torvalds1da177e2005-04-16 15:20:36 -07004070 cond_resched();
4071 }
4072 check_irq_on();
Christoph Lameter18004c52012-07-06 15:25:12 -05004073 mutex_unlock(&slab_mutex);
Christoph Lameter8fce4d82006-03-09 17:33:54 -08004074 next_reap_node();
Christoph Lameter7c5cae32007-02-10 01:42:55 -08004075out:
Andrew Mortona737b3e2006-03-22 00:08:11 -08004076 /* Set up the next iteration */
Jianyu Zhan5f0985b2014-03-30 17:02:20 +08004077 schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_AC));
Linus Torvalds1da177e2005-04-16 15:20:36 -07004078}
4079
Linus Torvalds158a9622008-01-02 13:04:48 -08004080#ifdef CONFIG_SLABINFO
Glauber Costa0d7561c2012-10-19 18:20:27 +04004081void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004082{
Joonsoo Kim8456a642013-10-24 10:07:49 +09004083 struct page *page;
Pekka Enbergb28a02d2006-01-08 01:00:37 -08004084 unsigned long active_objs;
4085 unsigned long num_objs;
4086 unsigned long active_slabs = 0;
4087 unsigned long num_slabs, free_objects = 0, shared_avail = 0;
Christoph Lametere498be72005-09-09 13:03:32 -07004088 const char *name;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004089 char *error = NULL;
Christoph Lametere498be72005-09-09 13:03:32 -07004090 int node;
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00004091 struct kmem_cache_node *n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004092
Linus Torvalds1da177e2005-04-16 15:20:36 -07004093 active_objs = 0;
4094 num_slabs = 0;
Christoph Lameter18bf8542014-08-06 16:04:11 -07004095 for_each_kmem_cache_node(cachep, node, n) {
Christoph Lametere498be72005-09-09 13:03:32 -07004096
Ravikiran G Thirumalaica3b9b92006-02-04 23:27:58 -08004097 check_irq_on();
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00004098 spin_lock_irq(&n->list_lock);
Christoph Lametere498be72005-09-09 13:03:32 -07004099
Joonsoo Kim8456a642013-10-24 10:07:49 +09004100 list_for_each_entry(page, &n->slabs_full, lru) {
4101 if (page->active != cachep->num && !error)
Christoph Lametere498be72005-09-09 13:03:32 -07004102 error = "slabs_full accounting error";
4103 active_objs += cachep->num;
4104 active_slabs++;
4105 }
Joonsoo Kim8456a642013-10-24 10:07:49 +09004106 list_for_each_entry(page, &n->slabs_partial, lru) {
4107 if (page->active == cachep->num && !error)
Joonsoo Kim106a74e2013-10-24 10:07:48 +09004108 error = "slabs_partial accounting error";
Joonsoo Kim8456a642013-10-24 10:07:49 +09004109 if (!page->active && !error)
Joonsoo Kim106a74e2013-10-24 10:07:48 +09004110 error = "slabs_partial accounting error";
Joonsoo Kim8456a642013-10-24 10:07:49 +09004111 active_objs += page->active;
Christoph Lametere498be72005-09-09 13:03:32 -07004112 active_slabs++;
4113 }
Joonsoo Kim8456a642013-10-24 10:07:49 +09004114 list_for_each_entry(page, &n->slabs_free, lru) {
4115 if (page->active && !error)
Joonsoo Kim106a74e2013-10-24 10:07:48 +09004116 error = "slabs_free accounting error";
Christoph Lametere498be72005-09-09 13:03:32 -07004117 num_slabs++;
4118 }
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00004119 free_objects += n->free_objects;
4120 if (n->shared)
4121 shared_avail += n->shared->avail;
Christoph Lametere498be72005-09-09 13:03:32 -07004122
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00004123 spin_unlock_irq(&n->list_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004124 }
Pekka Enbergb28a02d2006-01-08 01:00:37 -08004125 num_slabs += active_slabs;
4126 num_objs = num_slabs * cachep->num;
Christoph Lametere498be72005-09-09 13:03:32 -07004127 if (num_objs - active_objs != free_objects && !error)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004128 error = "free_objects accounting error";
4129
Pekka Enbergb28a02d2006-01-08 01:00:37 -08004130 name = cachep->name;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004131 if (error)
4132 printk(KERN_ERR "slab: cache %s error: %s\n", name, error);
4133
Glauber Costa0d7561c2012-10-19 18:20:27 +04004134 sinfo->active_objs = active_objs;
4135 sinfo->num_objs = num_objs;
4136 sinfo->active_slabs = active_slabs;
4137 sinfo->num_slabs = num_slabs;
4138 sinfo->shared_avail = shared_avail;
4139 sinfo->limit = cachep->limit;
4140 sinfo->batchcount = cachep->batchcount;
4141 sinfo->shared = cachep->shared;
4142 sinfo->objects_per_slab = cachep->num;
4143 sinfo->cache_order = cachep->gfporder;
4144}
4145
4146void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *cachep)
4147{
Linus Torvalds1da177e2005-04-16 15:20:36 -07004148#if STATS
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00004149 { /* node stats */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004150 unsigned long high = cachep->high_mark;
4151 unsigned long allocs = cachep->num_allocations;
4152 unsigned long grown = cachep->grown;
4153 unsigned long reaped = cachep->reaped;
4154 unsigned long errors = cachep->errors;
4155 unsigned long max_freeable = cachep->max_freeable;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004156 unsigned long node_allocs = cachep->node_allocs;
Christoph Lametere498be72005-09-09 13:03:32 -07004157 unsigned long node_frees = cachep->node_frees;
Ravikiran G Thirumalaifb7faf32006-04-10 22:52:54 -07004158 unsigned long overflows = cachep->node_overflow;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004159
Joe Perchese92dd4f2010-03-26 19:27:58 -07004160 seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu "
4161 "%4lu %4lu %4lu %4lu %4lu",
4162 allocs, high, grown,
4163 reaped, errors, max_freeable, node_allocs,
4164 node_frees, overflows);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004165 }
4166 /* cpu stats */
4167 {
4168 unsigned long allochit = atomic_read(&cachep->allochit);
4169 unsigned long allocmiss = atomic_read(&cachep->allocmiss);
4170 unsigned long freehit = atomic_read(&cachep->freehit);
4171 unsigned long freemiss = atomic_read(&cachep->freemiss);
4172
4173 seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu",
Pekka Enbergb28a02d2006-01-08 01:00:37 -08004174 allochit, allocmiss, freehit, freemiss);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004175 }
4176#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07004177}
4178
Linus Torvalds1da177e2005-04-16 15:20:36 -07004179#define MAX_SLABINFO_WRITE 128
4180/**
4181 * slabinfo_write - Tuning for the slab allocator
4182 * @file: unused
4183 * @buffer: user buffer
4184 * @count: data length
4185 * @ppos: unused
4186 */
Glauber Costab7454ad2012-10-19 18:20:25 +04004187ssize_t slabinfo_write(struct file *file, const char __user *buffer,
Pekka Enbergb28a02d2006-01-08 01:00:37 -08004188 size_t count, loff_t *ppos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004189{
Pekka Enbergb28a02d2006-01-08 01:00:37 -08004190 char kbuf[MAX_SLABINFO_WRITE + 1], *tmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004191 int limit, batchcount, shared, res;
Christoph Hellwig7a7c3812006-06-23 02:03:17 -07004192 struct kmem_cache *cachep;
Pekka Enbergb28a02d2006-01-08 01:00:37 -08004193
Linus Torvalds1da177e2005-04-16 15:20:36 -07004194 if (count > MAX_SLABINFO_WRITE)
4195 return -EINVAL;
4196 if (copy_from_user(&kbuf, buffer, count))
4197 return -EFAULT;
Pekka Enbergb28a02d2006-01-08 01:00:37 -08004198 kbuf[MAX_SLABINFO_WRITE] = '\0';
Linus Torvalds1da177e2005-04-16 15:20:36 -07004199
4200 tmp = strchr(kbuf, ' ');
4201 if (!tmp)
4202 return -EINVAL;
4203 *tmp = '\0';
4204 tmp++;
4205 if (sscanf(tmp, " %d %d %d", &limit, &batchcount, &shared) != 3)
4206 return -EINVAL;
4207
4208 /* Find the cache in the chain of caches. */
Christoph Lameter18004c52012-07-06 15:25:12 -05004209 mutex_lock(&slab_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004210 res = -EINVAL;
Christoph Lameter18004c52012-07-06 15:25:12 -05004211 list_for_each_entry(cachep, &slab_caches, list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004212 if (!strcmp(cachep->name, kbuf)) {
Andrew Mortona737b3e2006-03-22 00:08:11 -08004213 if (limit < 1 || batchcount < 1 ||
4214 batchcount > limit || shared < 0) {
Christoph Lametere498be72005-09-09 13:03:32 -07004215 res = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004216 } else {
Christoph Lametere498be72005-09-09 13:03:32 -07004217 res = do_tune_cpucache(cachep, limit,
Pekka Enberg83b519e2009-06-10 19:40:04 +03004218 batchcount, shared,
4219 GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004220 }
4221 break;
4222 }
4223 }
Christoph Lameter18004c52012-07-06 15:25:12 -05004224 mutex_unlock(&slab_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004225 if (res >= 0)
4226 res = count;
4227 return res;
4228}
Al Viro871751e2006-03-25 03:06:39 -08004229
4230#ifdef CONFIG_DEBUG_SLAB_LEAK
4231
4232static void *leaks_start(struct seq_file *m, loff_t *pos)
4233{
Christoph Lameter18004c52012-07-06 15:25:12 -05004234 mutex_lock(&slab_mutex);
4235 return seq_list_start(&slab_caches, *pos);
Al Viro871751e2006-03-25 03:06:39 -08004236}
4237
4238static inline int add_caller(unsigned long *n, unsigned long v)
4239{
4240 unsigned long *p;
4241 int l;
4242 if (!v)
4243 return 1;
4244 l = n[1];
4245 p = n + 2;
4246 while (l) {
4247 int i = l/2;
4248 unsigned long *q = p + 2 * i;
4249 if (*q == v) {
4250 q[1]++;
4251 return 1;
4252 }
4253 if (*q > v) {
4254 l = i;
4255 } else {
4256 p = q + 2;
4257 l -= i + 1;
4258 }
4259 }
4260 if (++n[1] == n[0])
4261 return 0;
4262 memmove(p + 2, p, n[1] * 2 * sizeof(unsigned long) - ((void *)p - (void *)n));
4263 p[0] = v;
4264 p[1] = 1;
4265 return 1;
4266}
4267
Joonsoo Kim8456a642013-10-24 10:07:49 +09004268static void handle_slab(unsigned long *n, struct kmem_cache *c,
4269 struct page *page)
Al Viro871751e2006-03-25 03:06:39 -08004270{
4271 void *p;
Joonsoo Kim03787302014-06-23 13:22:06 -07004272 int i;
Joonsoo Kimb1cb0982013-10-24 10:07:45 +09004273
Al Viro871751e2006-03-25 03:06:39 -08004274 if (n[0] == n[1])
4275 return;
Joonsoo Kim8456a642013-10-24 10:07:49 +09004276 for (i = 0, p = page->s_mem; i < c->num; i++, p += c->size) {
Joonsoo Kim03787302014-06-23 13:22:06 -07004277 if (get_obj_status(page, i) != OBJECT_ACTIVE)
Al Viro871751e2006-03-25 03:06:39 -08004278 continue;
Joonsoo Kimb1cb0982013-10-24 10:07:45 +09004279
Al Viro871751e2006-03-25 03:06:39 -08004280 if (!add_caller(n, (unsigned long)*dbg_userword(c, p)))
4281 return;
4282 }
4283}
4284
4285static void show_symbol(struct seq_file *m, unsigned long address)
4286{
4287#ifdef CONFIG_KALLSYMS
Al Viro871751e2006-03-25 03:06:39 -08004288 unsigned long offset, size;
Tejun Heo9281ace2007-07-17 04:03:51 -07004289 char modname[MODULE_NAME_LEN], name[KSYM_NAME_LEN];
Al Viro871751e2006-03-25 03:06:39 -08004290
Alexey Dobriyana5c43da2007-05-08 00:28:47 -07004291 if (lookup_symbol_attrs(address, &size, &offset, modname, name) == 0) {
Al Viro871751e2006-03-25 03:06:39 -08004292 seq_printf(m, "%s+%#lx/%#lx", name, offset, size);
Alexey Dobriyana5c43da2007-05-08 00:28:47 -07004293 if (modname[0])
Al Viro871751e2006-03-25 03:06:39 -08004294 seq_printf(m, " [%s]", modname);
4295 return;
4296 }
4297#endif
4298 seq_printf(m, "%p", (void *)address);
4299}
4300
4301static int leaks_show(struct seq_file *m, void *p)
4302{
Thierry Reding0672aa72012-06-22 19:42:49 +02004303 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list);
Joonsoo Kim8456a642013-10-24 10:07:49 +09004304 struct page *page;
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00004305 struct kmem_cache_node *n;
Al Viro871751e2006-03-25 03:06:39 -08004306 const char *name;
Christoph Lameterdb845062013-02-05 18:45:23 +00004307 unsigned long *x = m->private;
Al Viro871751e2006-03-25 03:06:39 -08004308 int node;
4309 int i;
4310
4311 if (!(cachep->flags & SLAB_STORE_USER))
4312 return 0;
4313 if (!(cachep->flags & SLAB_RED_ZONE))
4314 return 0;
4315
4316 /* OK, we can do it */
4317
Christoph Lameterdb845062013-02-05 18:45:23 +00004318 x[1] = 0;
Al Viro871751e2006-03-25 03:06:39 -08004319
Christoph Lameter18bf8542014-08-06 16:04:11 -07004320 for_each_kmem_cache_node(cachep, node, n) {
Al Viro871751e2006-03-25 03:06:39 -08004321
4322 check_irq_on();
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00004323 spin_lock_irq(&n->list_lock);
Al Viro871751e2006-03-25 03:06:39 -08004324
Joonsoo Kim8456a642013-10-24 10:07:49 +09004325 list_for_each_entry(page, &n->slabs_full, lru)
4326 handle_slab(x, cachep, page);
4327 list_for_each_entry(page, &n->slabs_partial, lru)
4328 handle_slab(x, cachep, page);
Christoph Lameterce8eb6c2013-01-10 19:14:19 +00004329 spin_unlock_irq(&n->list_lock);
Al Viro871751e2006-03-25 03:06:39 -08004330 }
4331 name = cachep->name;
Christoph Lameterdb845062013-02-05 18:45:23 +00004332 if (x[0] == x[1]) {
Al Viro871751e2006-03-25 03:06:39 -08004333 /* Increase the buffer size */
Christoph Lameter18004c52012-07-06 15:25:12 -05004334 mutex_unlock(&slab_mutex);
Christoph Lameterdb845062013-02-05 18:45:23 +00004335 m->private = kzalloc(x[0] * 4 * sizeof(unsigned long), GFP_KERNEL);
Al Viro871751e2006-03-25 03:06:39 -08004336 if (!m->private) {
4337 /* Too bad, we are really out */
Christoph Lameterdb845062013-02-05 18:45:23 +00004338 m->private = x;
Christoph Lameter18004c52012-07-06 15:25:12 -05004339 mutex_lock(&slab_mutex);
Al Viro871751e2006-03-25 03:06:39 -08004340 return -ENOMEM;
4341 }
Christoph Lameterdb845062013-02-05 18:45:23 +00004342 *(unsigned long *)m->private = x[0] * 2;
4343 kfree(x);
Christoph Lameter18004c52012-07-06 15:25:12 -05004344 mutex_lock(&slab_mutex);
Al Viro871751e2006-03-25 03:06:39 -08004345 /* Now make sure this entry will be retried */
4346 m->count = m->size;
4347 return 0;
4348 }
Christoph Lameterdb845062013-02-05 18:45:23 +00004349 for (i = 0; i < x[1]; i++) {
4350 seq_printf(m, "%s: %lu ", name, x[2*i+3]);
4351 show_symbol(m, x[2*i+2]);
Al Viro871751e2006-03-25 03:06:39 -08004352 seq_putc(m, '\n');
4353 }
Siddha, Suresh Bd2e7b7d2006-09-25 23:31:47 -07004354
Al Viro871751e2006-03-25 03:06:39 -08004355 return 0;
4356}
4357
Alexey Dobriyana0ec95a2008-10-06 00:59:10 +04004358static const struct seq_operations slabstats_op = {
Al Viro871751e2006-03-25 03:06:39 -08004359 .start = leaks_start,
Wanpeng Li276a2432013-07-08 08:08:28 +08004360 .next = slab_next,
4361 .stop = slab_stop,
Al Viro871751e2006-03-25 03:06:39 -08004362 .show = leaks_show,
4363};
Alexey Dobriyana0ec95a2008-10-06 00:59:10 +04004364
4365static int slabstats_open(struct inode *inode, struct file *file)
4366{
4367 unsigned long *n = kzalloc(PAGE_SIZE, GFP_KERNEL);
4368 int ret = -ENOMEM;
4369 if (n) {
4370 ret = seq_open(file, &slabstats_op);
4371 if (!ret) {
4372 struct seq_file *m = file->private_data;
4373 *n = PAGE_SIZE / (2 * sizeof(unsigned long));
4374 m->private = n;
4375 n = NULL;
4376 }
4377 kfree(n);
4378 }
4379 return ret;
4380}
4381
4382static const struct file_operations proc_slabstats_operations = {
4383 .open = slabstats_open,
4384 .read = seq_read,
4385 .llseek = seq_lseek,
4386 .release = seq_release_private,
4387};
Al Viro871751e2006-03-25 03:06:39 -08004388#endif
Alexey Dobriyana0ec95a2008-10-06 00:59:10 +04004389
4390static int __init slab_proc_init(void)
4391{
4392#ifdef CONFIG_DEBUG_SLAB_LEAK
4393 proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations);
4394#endif
4395 return 0;
4396}
4397module_init(slab_proc_init);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004398#endif
4399
Manfred Spraul00e145b2005-09-03 15:55:07 -07004400/**
4401 * ksize - get the actual amount of memory allocated for a given object
4402 * @objp: Pointer to the object
4403 *
4404 * kmalloc may internally round up allocations and return more memory
4405 * than requested. ksize() can be used to determine the actual amount of
4406 * memory allocated. The caller may use this additional memory, even though
4407 * a smaller amount of memory was initially specified with the kmalloc call.
4408 * The caller must guarantee that objp points to a valid object previously
4409 * allocated with either kmalloc() or kmem_cache_alloc(). The object
4410 * must not be freed during the duration of the call.
4411 */
Pekka Enbergfd76bab2007-05-06 14:48:40 -07004412size_t ksize(const void *objp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004413{
Christoph Lameteref8b4522007-10-16 01:24:46 -07004414 BUG_ON(!objp);
4415 if (unlikely(objp == ZERO_SIZE_PTR))
Manfred Spraul00e145b2005-09-03 15:55:07 -07004416 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004417
Christoph Lameter8c138bc2012-06-13 10:24:58 -05004418 return virt_to_cache(objp)->object_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004419}
Kirill A. Shutemovb1aabec2009-02-10 15:21:44 +02004420EXPORT_SYMBOL(ksize);