percpu: use metadata blocks to update the chunk contig hint

The largest free region will either be a block level contig hint or an
aggregate over the left_free and right_free areas of blocks. This is a
much smaller set of free areas that need to be checked than a full
traverse.

Signed-off-by: Dennis Zhou <dennisszhou@gmail.com>
Reviewed-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
diff --git a/mm/percpu.c b/mm/percpu.c
index 57b3168..0f05647 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -306,6 +306,67 @@ static unsigned long pcpu_block_off_to_off(int index, int off)
 }
 
 /**
+ * pcpu_next_md_free_region - finds the next hint free area
+ * @chunk: chunk of interest
+ * @bit_off: chunk offset
+ * @bits: size of free area
+ *
+ * Helper function for pcpu_for_each_md_free_region.  It checks
+ * block->contig_hint and performs aggregation across blocks to find the
+ * next hint.  It modifies bit_off and bits in-place to be consumed in the
+ * loop.
+ */
+static void pcpu_next_md_free_region(struct pcpu_chunk *chunk, int *bit_off,
+				     int *bits)
+{
+	int i = pcpu_off_to_block_index(*bit_off);
+	int block_off = pcpu_off_to_block_off(*bit_off);
+	struct pcpu_block_md *block;
+
+	*bits = 0;
+	for (block = chunk->md_blocks + i; i < pcpu_chunk_nr_blocks(chunk);
+	     block++, i++) {
+		/* handles contig area across blocks */
+		if (*bits) {
+			*bits += block->left_free;
+			if (block->left_free == PCPU_BITMAP_BLOCK_BITS)
+				continue;
+			return;
+		}
+
+		/*
+		 * This checks three things.  First is there a contig_hint to
+		 * check.  Second, have we checked this hint before by
+		 * comparing the block_off.  Third, is this the same as the
+		 * right contig hint.  In the last case, it spills over into
+		 * the next block and should be handled by the contig area
+		 * across blocks code.
+		 */
+		*bits = block->contig_hint;
+		if (*bits && block->contig_hint_start >= block_off &&
+		    *bits + block->contig_hint_start < PCPU_BITMAP_BLOCK_BITS) {
+			*bit_off = pcpu_block_off_to_off(i,
+					block->contig_hint_start);
+			return;
+		}
+
+		*bits = block->right_free;
+		*bit_off = (i + 1) * PCPU_BITMAP_BLOCK_BITS - block->right_free;
+	}
+}
+
+/*
+ * Metadata free area iterators.  These perform aggregation of free areas
+ * based on the metadata blocks and return the offset @bit_off and size in
+ * bits of the free area @bits.
+ */
+#define pcpu_for_each_md_free_region(chunk, bit_off, bits)		\
+	for (pcpu_next_md_free_region((chunk), &(bit_off), &(bits));	\
+	     (bit_off) < pcpu_chunk_map_bits((chunk));			\
+	     (bit_off) += (bits) + 1,					\
+	     pcpu_next_md_free_region((chunk), &(bit_off), &(bits)))
+
+/**
  * pcpu_mem_zalloc - allocate memory
  * @size: bytes to allocate
  *
@@ -425,29 +486,28 @@ static void pcpu_chunk_update(struct pcpu_chunk *chunk, int bit_off, int bits)
  * pcpu_chunk_refresh_hint - updates metadata about a chunk
  * @chunk: chunk of interest
  *
- * Iterates over the chunk to find the largest free area.
+ * Iterates over the metadata blocks to find the largest contig area.
+ * It also counts the populated pages and uses the delta to update the
+ * global count.
  *
  * Updates:
  *      chunk->contig_bits
  *      chunk->contig_bits_start
- *      nr_empty_pop_pages
+ *      nr_empty_pop_pages (chunk and global)
  */
 static void pcpu_chunk_refresh_hint(struct pcpu_chunk *chunk)
 {
-	int bits, nr_empty_pop_pages;
-	int rs, re; /* region start, region end */
+	int bit_off, bits, nr_empty_pop_pages;
 
 	/* clear metadata */
 	chunk->contig_bits = 0;
 
+	bit_off = chunk->first_bit;
 	bits = nr_empty_pop_pages = 0;
-	pcpu_for_each_unpop_region(chunk->alloc_map, rs, re, chunk->first_bit,
-				   pcpu_chunk_map_bits(chunk)) {
-		bits = re - rs;
+	pcpu_for_each_md_free_region(chunk, bit_off, bits) {
+		pcpu_chunk_update(chunk, bit_off, bits);
 
-		pcpu_chunk_update(chunk, rs, bits);
-
-		nr_empty_pop_pages += pcpu_cnt_pop_pages(chunk, rs, bits);
+		nr_empty_pop_pages += pcpu_cnt_pop_pages(chunk, bit_off, bits);
 	}
 
 	/*