percpu: add first_bit to keep track of the first free in the bitmap

This patch adds first_bit to keep track of the first free bit in the
bitmap. This hint helps prevent scanning of fully allocated blocks.

Signed-off-by: Dennis Zhou <dennisszhou@gmail.com>
Reviewed-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h
index 252ae9e..e60e049 100644
--- a/mm/percpu-internal.h
+++ b/mm/percpu-internal.h
@@ -36,7 +36,7 @@ struct pcpu_chunk {
 	struct pcpu_block_md	*md_blocks;	/* metadata blocks */
 
 	void			*data;		/* chunk data */
-	int			first_free;	/* no free below this */
+	int			first_bit;	/* no free below this */
 	bool			immutable;	/* no [de]population allowed */
 	int			start_offset;	/* the overlap with the previous
 						   region to have a page aligned
diff --git a/mm/percpu-stats.c b/mm/percpu-stats.c
index ad03d73..6142484e 100644
--- a/mm/percpu-stats.c
+++ b/mm/percpu-stats.c
@@ -121,6 +121,7 @@ static void chunk_map_stats(struct seq_file *m, struct pcpu_chunk *chunk,
 	P("nr_alloc", chunk->nr_alloc);
 	P("max_alloc_size", chunk->max_alloc_size);
 	P("empty_pop_pages", chunk->nr_empty_pop_pages);
+	P("first_bit", chunk->first_bit);
 	P("free_bytes", chunk->free_bytes);
 	P("contig_bytes", chunk->contig_bits * PCPU_MIN_ALLOC_SIZE);
 	P("sum_frag", sum_frag);
diff --git a/mm/percpu.c b/mm/percpu.c
index 708c6de..83abb190 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -427,7 +427,7 @@ static void pcpu_chunk_refresh_hint(struct pcpu_chunk *chunk)
 	chunk->contig_bits = 0;
 
 	bits = nr_empty_pop_pages = 0;
-	pcpu_for_each_unpop_region(chunk->alloc_map, rs, re, 0,
+	pcpu_for_each_unpop_region(chunk->alloc_map, rs, re, chunk->first_bit,
 				   pcpu_chunk_map_bits(chunk)) {
 		bits = re - rs;
 
@@ -646,7 +646,8 @@ static int pcpu_find_block_fit(struct pcpu_chunk *chunk, int alloc_bits,
 	int bit_off, bits;
 	int re; /* region end */
 
-	pcpu_for_each_unpop_region(chunk->alloc_map, bit_off, re, 0,
+	pcpu_for_each_unpop_region(chunk->alloc_map, bit_off, re,
+				   chunk->first_bit,
 				   pcpu_chunk_map_bits(chunk)) {
 		bits = re - bit_off;
 
@@ -715,6 +716,13 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int alloc_bits,
 
 	chunk->free_bytes -= alloc_bits * PCPU_MIN_ALLOC_SIZE;
 
+	/* update first free bit */
+	if (bit_off == chunk->first_bit)
+		chunk->first_bit = find_next_zero_bit(
+					chunk->alloc_map,
+					pcpu_chunk_map_bits(chunk),
+					bit_off + alloc_bits);
+
 	pcpu_block_update_hint_alloc(chunk, bit_off, alloc_bits);
 
 	pcpu_chunk_relocate(chunk, oslot);
@@ -750,6 +758,9 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int off)
 	/* update metadata */
 	chunk->free_bytes += bits * PCPU_MIN_ALLOC_SIZE;
 
+	/* update first free bit */
+	chunk->first_bit = min(chunk->first_bit, bit_off);
+
 	pcpu_block_update_hint_free(chunk, bit_off, bits);
 
 	pcpu_chunk_relocate(chunk, oslot);
@@ -841,6 +852,8 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
 		set_bit(0, chunk->bound_map);
 		set_bit(offset_bits, chunk->bound_map);
 
+		chunk->first_bit = offset_bits;
+
 		pcpu_block_update_hint_alloc(chunk, 0, offset_bits);
 	}