Ian Rogers | 7c43b0c | 2020-07-29 15:00:34 -0700 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * Benchmark find_next_bit and related bit operations. |
| 4 | * |
| 5 | * Copyright 2020 Google LLC. |
| 6 | */ |
| 7 | #include <stdlib.h> |
| 8 | #include "bench.h" |
| 9 | #include "../util/stat.h" |
| 10 | #include <linux/bitmap.h> |
| 11 | #include <linux/bitops.h> |
| 12 | #include <linux/time64.h> |
| 13 | #include <subcmd/parse-options.h> |
| 14 | |
| 15 | static unsigned int outer_iterations = 5; |
| 16 | static unsigned int inner_iterations = 100000; |
| 17 | |
| 18 | static const struct option options[] = { |
| 19 | OPT_UINTEGER('i', "outer-iterations", &outer_iterations, |
Colin Ian King | f9f9506 | 2020-08-12 07:46:47 +0100 | [diff] [blame] | 20 | "Number of outer iterations used"), |
Ian Rogers | 7c43b0c | 2020-07-29 15:00:34 -0700 | [diff] [blame] | 21 | OPT_UINTEGER('j', "inner-iterations", &inner_iterations, |
Colin Ian King | f9f9506 | 2020-08-12 07:46:47 +0100 | [diff] [blame] | 22 | "Number of inner iterations used"), |
Ian Rogers | 7c43b0c | 2020-07-29 15:00:34 -0700 | [diff] [blame] | 23 | OPT_END() |
| 24 | }; |
| 25 | |
| 26 | static const char *const bench_usage[] = { |
| 27 | "perf bench mem find_bit <options>", |
| 28 | NULL |
| 29 | }; |
| 30 | |
| 31 | static unsigned int accumulator; |
| 32 | static unsigned int use_of_val; |
| 33 | |
| 34 | static noinline void workload(int val) |
| 35 | { |
| 36 | use_of_val += val; |
| 37 | accumulator++; |
| 38 | } |
| 39 | |
| 40 | #if (defined(__i386__) || defined(__x86_64__)) && defined(__GCC_ASM_FLAG_OUTPUTS__) |
| 41 | static bool asm_test_bit(long nr, const unsigned long *addr) |
| 42 | { |
| 43 | bool oldbit; |
| 44 | |
| 45 | asm volatile("bt %2,%1" |
| 46 | : "=@ccc" (oldbit) |
| 47 | : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory"); |
| 48 | |
| 49 | return oldbit; |
| 50 | } |
| 51 | #else |
| 52 | #define asm_test_bit test_bit |
| 53 | #endif |
| 54 | |
| 55 | static int do_for_each_set_bit(unsigned int num_bits) |
| 56 | { |
| 57 | unsigned long *to_test = bitmap_alloc(num_bits); |
| 58 | struct timeval start, end, diff; |
| 59 | u64 runtime_us; |
| 60 | struct stats fb_time_stats, tb_time_stats; |
| 61 | double time_average, time_stddev; |
| 62 | unsigned int bit, i, j; |
| 63 | unsigned int set_bits, skip; |
| 64 | unsigned int old; |
| 65 | |
| 66 | init_stats(&fb_time_stats); |
| 67 | init_stats(&tb_time_stats); |
| 68 | |
| 69 | for (set_bits = 1; set_bits <= num_bits; set_bits <<= 1) { |
| 70 | bitmap_zero(to_test, num_bits); |
| 71 | skip = num_bits / set_bits; |
| 72 | for (i = 0; i < num_bits; i += skip) |
| 73 | set_bit(i, to_test); |
| 74 | |
| 75 | for (i = 0; i < outer_iterations; i++) { |
| 76 | old = accumulator; |
| 77 | gettimeofday(&start, NULL); |
| 78 | for (j = 0; j < inner_iterations; j++) { |
| 79 | for_each_set_bit(bit, to_test, num_bits) |
| 80 | workload(bit); |
| 81 | } |
| 82 | gettimeofday(&end, NULL); |
| 83 | assert(old + (inner_iterations * set_bits) == accumulator); |
| 84 | timersub(&end, &start, &diff); |
| 85 | runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; |
| 86 | update_stats(&fb_time_stats, runtime_us); |
| 87 | |
| 88 | old = accumulator; |
| 89 | gettimeofday(&start, NULL); |
| 90 | for (j = 0; j < inner_iterations; j++) { |
| 91 | for (bit = 0; bit < num_bits; bit++) { |
| 92 | if (asm_test_bit(bit, to_test)) |
| 93 | workload(bit); |
| 94 | } |
| 95 | } |
| 96 | gettimeofday(&end, NULL); |
| 97 | assert(old + (inner_iterations * set_bits) == accumulator); |
| 98 | timersub(&end, &start, &diff); |
| 99 | runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; |
| 100 | update_stats(&tb_time_stats, runtime_us); |
| 101 | } |
| 102 | |
| 103 | printf("%d operations %d bits set of %d bits\n", |
| 104 | inner_iterations, set_bits, num_bits); |
| 105 | time_average = avg_stats(&fb_time_stats); |
| 106 | time_stddev = stddev_stats(&fb_time_stats); |
| 107 | printf(" Average for_each_set_bit took: %.3f usec (+- %.3f usec)\n", |
| 108 | time_average, time_stddev); |
| 109 | time_average = avg_stats(&tb_time_stats); |
| 110 | time_stddev = stddev_stats(&tb_time_stats); |
| 111 | printf(" Average test_bit loop took: %.3f usec (+- %.3f usec)\n", |
| 112 | time_average, time_stddev); |
| 113 | |
| 114 | if (use_of_val == accumulator) /* Try to avoid compiler tricks. */ |
| 115 | printf("\n"); |
| 116 | } |
| 117 | bitmap_free(to_test); |
| 118 | return 0; |
| 119 | } |
| 120 | |
| 121 | int bench_mem_find_bit(int argc, const char **argv) |
| 122 | { |
| 123 | int err = 0, i; |
| 124 | |
| 125 | argc = parse_options(argc, argv, options, bench_usage, 0); |
| 126 | if (argc) { |
| 127 | usage_with_options(bench_usage, options); |
| 128 | exit(EXIT_FAILURE); |
| 129 | } |
| 130 | |
| 131 | for (i = 1; i <= 2048; i <<= 1) |
| 132 | do_for_each_set_bit(i); |
| 133 | |
| 134 | return err; |
| 135 | } |