blk-mq: use static mapping
blk-mq layer performs a remapping between s/w and h/w contexts and also
between h/w contexts and CPUs, whenever a CPU hotplug event happens.
This remapping has to wait for queue freezing which may take tens of
miliseconds, resulting in a high latency in CPU hotplug path.
This patch makes the above mentioned mappings static so that we can
avoid remapping when CPU hotplug event happens and this results in
improved CPU hotplug latency of up to 90 percent for CPU up path and
of up to 50 percent for CPU down path.
Change-Id: Idf38cb6c4e78c91fda3c86608c6d0441f01ab435
Signed-off-by: Imran Khan <kimran@codeaurora.org>
Signed-off-by: Kyle Yan <kyan@codeaurora.org>
diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index 19b1d9c..df02faf 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -35,37 +35,19 @@ int blk_mq_map_queues(struct blk_mq_tag_set *set)
{
unsigned int *map = set->mq_map;
unsigned int nr_queues = set->nr_hw_queues;
- const struct cpumask *online_mask = cpu_online_mask;
- unsigned int i, nr_cpus, nr_uniq_cpus, queue, first_sibling;
+ unsigned int i, queue, first_sibling;
cpumask_var_t cpus;
- if (!alloc_cpumask_var(&cpus, GFP_ATOMIC))
- return -ENOMEM;
-
- cpumask_clear(cpus);
- nr_cpus = nr_uniq_cpus = 0;
- for_each_cpu(i, online_mask) {
- nr_cpus++;
- first_sibling = get_first_sibling(i);
- if (!cpumask_test_cpu(first_sibling, cpus))
- nr_uniq_cpus++;
- cpumask_set_cpu(i, cpus);
- }
-
queue = 0;
for_each_possible_cpu(i) {
- if (!cpumask_test_cpu(i, online_mask)) {
- map[i] = 0;
- continue;
- }
-
/*
* Easy case - we have equal or more hardware queues. Or
* there are no thread siblings to take into account. Do
* 1:1 if enough, or sequential mapping if less.
*/
- if (nr_queues >= nr_cpus || nr_cpus == nr_uniq_cpus) {
- map[i] = cpu_to_queue_index(nr_cpus, nr_queues, queue);
+ if (nr_queues >= nr_cpu_ids) {
+ map[i] = cpu_to_queue_index(nr_cpu_ids, nr_queues,
+ queue);
queue++;
continue;
}
@@ -77,7 +59,7 @@ int blk_mq_map_queues(struct blk_mq_tag_set *set)
*/
first_sibling = get_first_sibling(i);
if (first_sibling == i) {
- map[i] = cpu_to_queue_index(nr_uniq_cpus, nr_queues,
+ map[i] = cpu_to_queue_index(nr_cpu_ids, nr_queues,
queue);
queue++;
} else