blob: 6324449db4048d31f5d3d9ddc21cd76a9b40e4d5 [file] [log] [blame]
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -04001// SPDX-License-Identifier: GPL-2.0+
2//
3// Performance test comparing RCU vs other mechanisms
4// for acquiring references on objects.
5//
6// Copyright (C) Google, 2020.
7//
8// Author: Joel Fernandes <joel@joelfernandes.org>
9
10#define pr_fmt(fmt) fmt
11
12#include <linux/atomic.h>
13#include <linux/bitops.h>
14#include <linux/completion.h>
15#include <linux/cpu.h>
16#include <linux/delay.h>
17#include <linux/err.h>
18#include <linux/init.h>
19#include <linux/interrupt.h>
20#include <linux/kthread.h>
21#include <linux/kernel.h>
22#include <linux/mm.h>
23#include <linux/module.h>
24#include <linux/moduleparam.h>
25#include <linux/notifier.h>
26#include <linux/percpu.h>
27#include <linux/rcupdate.h>
28#include <linux/reboot.h>
29#include <linux/sched.h>
30#include <linux/spinlock.h>
31#include <linux/smp.h>
32#include <linux/stat.h>
33#include <linux/srcu.h>
34#include <linux/slab.h>
35#include <linux/torture.h>
36#include <linux/types.h>
37
38#include "rcu.h"
39
40#define PERF_FLAG "-ref-perf: "
41
42#define PERFOUT(s, x...) \
43 pr_alert("%s" PERF_FLAG s, perf_type, ## x)
44
45#define VERBOSE_PERFOUT(s, x...) \
46 do { if (verbose) pr_alert("%s" PERF_FLAG s, perf_type, ## x); } while (0)
47
48#define VERBOSE_PERFOUT_ERRSTRING(s, x...) \
49 do { if (verbose) pr_alert("%s" PERF_FLAG "!!! " s, perf_type, ## x); } while (0)
50
51MODULE_LICENSE("GPL");
52MODULE_AUTHOR("Joel Fernandes (Google) <joel@joelfernandes.org>");
53
54static char *perf_type = "rcu";
55module_param(perf_type, charp, 0444);
56MODULE_PARM_DESC(perf_type, "Type of test (rcu, srcu, refcnt, rwsem, rwlock.");
57
58torture_param(int, verbose, 0, "Enable verbose debugging printk()s");
59
Paul E. McKenney777a54c2020-05-25 14:16:44 -070060// Wait until there are multiple CPUs before starting test.
61torture_param(int, holdoff, IS_BUILTIN(CONFIG_RCU_REF_PERF_TEST) ? 10 : 0,
62 "Holdoff time before test start (s)");
63// Number of loops per experiment, all readers execute operations concurrently.
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -040064torture_param(long, loops, 10000000, "Number of loops per experiment.");
Paul E. McKenney8fc28782020-05-25 15:48:38 -070065// Number of readers, with -1 defaulting to about 75% of the CPUs.
66torture_param(int, nreaders, -1, "Number of readers, -1 for 75% of CPUs.");
67// Number of runs.
68torture_param(int, nruns, 30, "Number of experiments to run.");
69// Reader delay in nanoseconds, 0 for no delay.
70torture_param(int, readdelay, 0, "Read-side delay in nanoseconds.");
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -040071
72#ifdef MODULE
73# define REFPERF_SHUTDOWN 0
74#else
75# define REFPERF_SHUTDOWN 1
76#endif
77
78torture_param(bool, shutdown, REFPERF_SHUTDOWN,
79 "Shutdown at end of performance tests.");
80
81struct reader_task {
82 struct task_struct *task;
83 atomic_t start;
84 wait_queue_head_t wq;
85 u64 last_duration_ns;
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -040086};
87
88static struct task_struct *shutdown_task;
89static wait_queue_head_t shutdown_wq;
90
91static struct task_struct *main_task;
92static wait_queue_head_t main_wq;
93static int shutdown_start;
94
95static struct reader_task *reader_tasks;
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -040096
97// Number of readers that are part of the current experiment.
98static atomic_t nreaders_exp;
99
100// Use to wait for all threads to start.
101static atomic_t n_init;
102
103// Track which experiment is currently running.
104static int exp_idx;
105
106// Operations vector for selecting different types of tests.
107struct ref_perf_ops {
108 void (*init)(void);
109 void (*cleanup)(void);
Paul E. McKenney75dd8ef2020-05-25 14:59:06 -0700110 void (*readsection)(const int nloops);
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400111 const char *name;
112};
113
114static struct ref_perf_ops *cur_ops;
115
Paul E. McKenney75dd8ef2020-05-25 14:59:06 -0700116static void ref_rcu_read_section(const int nloops)
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400117{
Paul E. McKenney75dd8ef2020-05-25 14:59:06 -0700118 int i;
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400119
Paul E. McKenney75dd8ef2020-05-25 14:59:06 -0700120 for (i = nloops; i >= 0; i--) {
121 rcu_read_lock();
122 rcu_read_unlock();
123 }
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400124}
125
126static void rcu_sync_perf_init(void)
127{
128}
129
130static struct ref_perf_ops rcu_ops = {
131 .init = rcu_sync_perf_init,
Paul E. McKenney75dd8ef2020-05-25 14:59:06 -0700132 .readsection = ref_rcu_read_section,
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400133 .name = "rcu"
134};
135
136
137// Definitions for SRCU ref perf testing.
138DEFINE_STATIC_SRCU(srcu_refctl_perf);
139static struct srcu_struct *srcu_ctlp = &srcu_refctl_perf;
140
Paul E. McKenney75dd8ef2020-05-25 14:59:06 -0700141static void srcu_ref_perf_read_section(int nloops)
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400142{
Paul E. McKenney75dd8ef2020-05-25 14:59:06 -0700143 int i;
144 int idx;
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400145
Paul E. McKenney75dd8ef2020-05-25 14:59:06 -0700146 for (i = nloops; i >= 0; i--) {
147 idx = srcu_read_lock(srcu_ctlp);
148 srcu_read_unlock(srcu_ctlp, idx);
149 }
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400150}
151
152static struct ref_perf_ops srcu_ops = {
153 .init = rcu_sync_perf_init,
Paul E. McKenney75dd8ef2020-05-25 14:59:06 -0700154 .readsection = srcu_ref_perf_read_section,
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400155 .name = "srcu"
156};
157
158// Definitions for reference count
159static atomic_t refcnt;
160
Paul E. McKenney75dd8ef2020-05-25 14:59:06 -0700161static void ref_perf_refcnt_section(const int nloops)
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400162{
Paul E. McKenney75dd8ef2020-05-25 14:59:06 -0700163 int i;
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400164
Paul E. McKenney75dd8ef2020-05-25 14:59:06 -0700165 for (i = nloops; i >= 0; i--) {
166 atomic_inc(&refcnt);
167 atomic_dec(&refcnt);
168 }
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400169}
170
171static struct ref_perf_ops refcnt_ops = {
172 .init = rcu_sync_perf_init,
Paul E. McKenney75dd8ef2020-05-25 14:59:06 -0700173 .readsection = ref_perf_refcnt_section,
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400174 .name = "refcnt"
175};
176
177// Definitions for rwlock
178static rwlock_t test_rwlock;
179
180static void ref_perf_rwlock_init(void)
181{
182 rwlock_init(&test_rwlock);
183}
184
Paul E. McKenney75dd8ef2020-05-25 14:59:06 -0700185static void ref_perf_rwlock_section(const int nloops)
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400186{
Paul E. McKenney75dd8ef2020-05-25 14:59:06 -0700187 int i;
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400188
Paul E. McKenney75dd8ef2020-05-25 14:59:06 -0700189 for (i = nloops; i >= 0; i--) {
190 read_lock(&test_rwlock);
191 read_unlock(&test_rwlock);
192 }
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400193}
194
195static struct ref_perf_ops rwlock_ops = {
196 .init = ref_perf_rwlock_init,
Paul E. McKenney75dd8ef2020-05-25 14:59:06 -0700197 .readsection = ref_perf_rwlock_section,
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400198 .name = "rwlock"
199};
200
201// Definitions for rwsem
202static struct rw_semaphore test_rwsem;
203
204static void ref_perf_rwsem_init(void)
205{
206 init_rwsem(&test_rwsem);
207}
208
Paul E. McKenney75dd8ef2020-05-25 14:59:06 -0700209static void ref_perf_rwsem_section(const int nloops)
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400210{
Paul E. McKenney75dd8ef2020-05-25 14:59:06 -0700211 int i;
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400212
Paul E. McKenney75dd8ef2020-05-25 14:59:06 -0700213 for (i = nloops; i >= 0; i--) {
214 down_read(&test_rwsem);
215 up_read(&test_rwsem);
216 }
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400217}
218
219static struct ref_perf_ops rwsem_ops = {
220 .init = ref_perf_rwsem_init,
Paul E. McKenney75dd8ef2020-05-25 14:59:06 -0700221 .readsection = ref_perf_rwsem_section,
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400222 .name = "rwsem"
223};
224
225// Reader kthread. Repeatedly does empty RCU read-side
226// critical section, minimizing update-side interference.
227static int
228ref_perf_reader(void *arg)
229{
230 unsigned long flags;
231 long me = (long)arg;
232 struct reader_task *rt = &(reader_tasks[me]);
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400233 u64 start;
234 s64 duration;
235
236 VERBOSE_PERFOUT("ref_perf_reader %ld: task started", me);
237 set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids));
238 set_user_nice(current, MAX_NICE);
239 atomic_inc(&n_init);
Paul E. McKenney777a54c2020-05-25 14:16:44 -0700240 if (holdoff)
241 schedule_timeout_interruptible(holdoff * HZ);
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400242repeat:
243 VERBOSE_PERFOUT("ref_perf_reader %ld: waiting to start next experiment on cpu %d", me, smp_processor_id());
244
245 // Wait for signal that this reader can start.
246 wait_event(rt->wq, (atomic_read(&nreaders_exp) && atomic_read(&rt->start)) ||
247 torture_must_stop());
248
249 if (torture_must_stop())
250 goto end;
251
252 // Make sure that the CPU is affinitized appropriately during testing.
253 WARN_ON_ONCE(smp_processor_id() != me);
254
255 atomic_dec(&rt->start);
256
257 // To prevent noise, keep interrupts disabled. This also has the
258 // effect of preventing entries into slow path for rcu_read_unlock().
259 local_irq_save(flags);
260 start = ktime_get_mono_fast_ns();
261
262 VERBOSE_PERFOUT("ref_perf_reader %ld: experiment %d started", me, exp_idx);
263
Paul E. McKenney75dd8ef2020-05-25 14:59:06 -0700264 cur_ops->readsection(loops);
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400265
266 duration = ktime_get_mono_fast_ns() - start;
267 local_irq_restore(flags);
268
269 rt->last_duration_ns = WARN_ON_ONCE(duration < 0) ? 0 : duration;
270
271 atomic_dec(&nreaders_exp);
272
273 VERBOSE_PERFOUT("ref_perf_reader %ld: experiment %d ended, (readers remaining=%d)",
274 me, exp_idx, atomic_read(&nreaders_exp));
275
276 if (!atomic_read(&nreaders_exp))
277 wake_up(&main_wq);
278
279 if (!torture_must_stop())
280 goto repeat;
281end:
282 torture_kthread_stopping("ref_perf_reader");
283 return 0;
284}
285
Paul E. McKenneydbf28ef2020-05-25 17:22:24 -0700286void reset_readers(void)
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400287{
288 int i;
289 struct reader_task *rt;
290
Paul E. McKenneydbf28ef2020-05-25 17:22:24 -0700291 for (i = 0; i < nreaders; i++) {
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400292 rt = &(reader_tasks[i]);
293
294 rt->last_duration_ns = 0;
295 }
296}
297
298// Print the results of each reader and return the sum of all their durations.
299u64 process_durations(int n)
300{
301 int i;
302 struct reader_task *rt;
303 char buf1[64];
304 char buf[512];
305 u64 sum = 0;
306
307 buf[0] = 0;
308 sprintf(buf, "Experiment #%d (Format: <THREAD-NUM>:<Total loop time in ns>)",
309 exp_idx);
310
Paul E. McKenneydbf28ef2020-05-25 17:22:24 -0700311 for (i = 0; i < n && !torture_must_stop(); i++) {
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400312 rt = &(reader_tasks[i]);
313 sprintf(buf1, "%d: %llu\t", i, rt->last_duration_ns);
314
315 if (i % 5 == 0)
316 strcat(buf, "\n");
317 strcat(buf, buf1);
318
319 sum += rt->last_duration_ns;
320 }
321 strcat(buf, "\n");
322
323 PERFOUT("%s\n", buf);
324
325 return sum;
326}
327
328// The main_func is the main orchestrator, it performs a bunch of
329// experiments. For every experiment, it orders all the readers
330// involved to start and waits for them to finish the experiment. It
331// then reads their timestamps and starts the next experiment. Each
332// experiment progresses from 1 concurrent reader to N of them at which
333// point all the timestamps are printed.
334static int main_func(void *arg)
335{
336 int exp, r;
337 char buf1[64];
338 char buf[512];
Paul E. McKenneydbf28ef2020-05-25 17:22:24 -0700339 u64 *result_avg;
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400340
341 set_cpus_allowed_ptr(current, cpumask_of(nreaders % nr_cpu_ids));
342 set_user_nice(current, MAX_NICE);
343
344 VERBOSE_PERFOUT("main_func task started");
Paul E. McKenneydbf28ef2020-05-25 17:22:24 -0700345 result_avg = kzalloc(nruns * sizeof(*result_avg), GFP_KERNEL);
346 if (!result_avg)
347 VERBOSE_PERFOUT_ERRSTRING("out of memory");
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400348 atomic_inc(&n_init);
349
350 // Wait for all threads to start.
351 wait_event(main_wq, atomic_read(&n_init) == (nreaders + 1));
Paul E. McKenney777a54c2020-05-25 14:16:44 -0700352 if (holdoff)
353 schedule_timeout_interruptible(holdoff * HZ);
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400354
355 // Start exp readers up per experiment
Paul E. McKenneydbf28ef2020-05-25 17:22:24 -0700356 for (exp = 0; exp < nruns && !torture_must_stop(); exp++) {
357 if (!result_avg)
358 break;
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400359 if (torture_must_stop())
360 goto end;
361
Paul E. McKenneydbf28ef2020-05-25 17:22:24 -0700362 reset_readers();
363 atomic_set(&nreaders_exp, nreaders);
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400364
365 exp_idx = exp;
366
Paul E. McKenneydbf28ef2020-05-25 17:22:24 -0700367 for (r = 0; r < nreaders; r++) {
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400368 atomic_set(&reader_tasks[r].start, 1);
369 wake_up(&reader_tasks[r].wq);
370 }
371
372 VERBOSE_PERFOUT("main_func: experiment started, waiting for %d readers",
Paul E. McKenneydbf28ef2020-05-25 17:22:24 -0700373 nreaders);
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400374
375 wait_event(main_wq,
376 !atomic_read(&nreaders_exp) || torture_must_stop());
377
378 VERBOSE_PERFOUT("main_func: experiment ended");
379
380 if (torture_must_stop())
381 goto end;
382
Paul E. McKenneydbf28ef2020-05-25 17:22:24 -0700383 result_avg[exp] = 1000 * process_durations(nreaders) / (nreaders * loops);
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400384 }
385
386 // Print the average of all experiments
387 PERFOUT("END OF TEST. Calculating average duration per loop (nanoseconds)...\n");
388
389 buf[0] = 0;
390 strcat(buf, "\n");
391 strcat(buf, "Threads\tTime(ns)\n");
392
Paul E. McKenneydbf28ef2020-05-25 17:22:24 -0700393 for (exp = 0; exp < nruns; exp++) {
394 if (!result_avg)
395 break;
396 sprintf(buf1, "%d\t%llu.%03d\n", exp + 1, result_avg[exp] / 1000, (int)(result_avg[exp] % 1000));
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400397 strcat(buf, buf1);
398 }
399
Paul E. McKenneydbf28ef2020-05-25 17:22:24 -0700400 if (result_avg)
401 PERFOUT("%s", buf);
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400402
403 // This will shutdown everything including us.
404 if (shutdown) {
405 shutdown_start = 1;
406 wake_up(&shutdown_wq);
407 }
408
409 // Wait for torture to stop us
410 while (!torture_must_stop())
411 schedule_timeout_uninterruptible(1);
412
413end:
414 torture_kthread_stopping("main_func");
415 return 0;
416}
417
418static void
419ref_perf_print_module_parms(struct ref_perf_ops *cur_ops, const char *tag)
420{
421 pr_alert("%s" PERF_FLAG
Paul E. McKenneydbf28ef2020-05-25 17:22:24 -0700422 "--- %s: verbose=%d shutdown=%d holdoff=%d loops=%ld nreaders=%d nruns=%d\n", perf_type, tag,
423 verbose, shutdown, holdoff, loops, nreaders, nruns);
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400424}
425
426static void
427ref_perf_cleanup(void)
428{
429 int i;
430
431 if (torture_cleanup_begin())
432 return;
433
434 if (!cur_ops) {
435 torture_cleanup_end();
436 return;
437 }
438
439 if (reader_tasks) {
440 for (i = 0; i < nreaders; i++)
441 torture_stop_kthread("ref_perf_reader",
442 reader_tasks[i].task);
443 }
444 kfree(reader_tasks);
445
446 torture_stop_kthread("main_task", main_task);
447 kfree(main_task);
448
449 // Do perf-type-specific cleanup operations.
450 if (cur_ops->cleanup != NULL)
451 cur_ops->cleanup();
452
453 torture_cleanup_end();
454}
455
456// Shutdown kthread. Just waits to be awakened, then shuts down system.
457static int
458ref_perf_shutdown(void *arg)
459{
460 wait_event(shutdown_wq, shutdown_start);
461
462 smp_mb(); // Wake before output.
463 ref_perf_cleanup();
464 kernel_power_off();
465
466 return -EINVAL;
467}
468
469static int __init
470ref_perf_init(void)
471{
472 long i;
473 int firsterr = 0;
474 static struct ref_perf_ops *perf_ops[] = {
475 &rcu_ops, &srcu_ops, &refcnt_ops, &rwlock_ops, &rwsem_ops,
476 };
477
478 if (!torture_init_begin(perf_type, verbose))
479 return -EBUSY;
480
481 for (i = 0; i < ARRAY_SIZE(perf_ops); i++) {
482 cur_ops = perf_ops[i];
483 if (strcmp(perf_type, cur_ops->name) == 0)
484 break;
485 }
486 if (i == ARRAY_SIZE(perf_ops)) {
487 pr_alert("rcu-perf: invalid perf type: \"%s\"\n", perf_type);
488 pr_alert("rcu-perf types:");
489 for (i = 0; i < ARRAY_SIZE(perf_ops); i++)
490 pr_cont(" %s", perf_ops[i]->name);
491 pr_cont("\n");
492 WARN_ON(!IS_MODULE(CONFIG_RCU_REF_PERF_TEST));
493 firsterr = -EINVAL;
494 cur_ops = NULL;
495 goto unwind;
496 }
497 if (cur_ops->init)
498 cur_ops->init();
499
500 ref_perf_print_module_parms(cur_ops, "Start of test");
501
502 // Shutdown task
503 if (shutdown) {
504 init_waitqueue_head(&shutdown_wq);
505 firsterr = torture_create_kthread(ref_perf_shutdown, NULL,
506 shutdown_task);
507 if (firsterr)
508 goto unwind;
509 schedule_timeout_uninterruptible(1);
510 }
511
Paul E. McKenney8fc28782020-05-25 15:48:38 -0700512 // Reader tasks (default to ~75% of online CPUs).
513 if (nreaders < 0)
514 nreaders = (num_online_cpus() >> 1) + (num_online_cpus() >> 2);
Joel Fernandes (Google)653ed642020-05-25 00:36:48 -0400515 reader_tasks = kcalloc(nreaders, sizeof(reader_tasks[0]),
516 GFP_KERNEL);
517 if (!reader_tasks) {
518 VERBOSE_PERFOUT_ERRSTRING("out of memory");
519 firsterr = -ENOMEM;
520 goto unwind;
521 }
522
523 VERBOSE_PERFOUT("Starting %d reader threads\n", nreaders);
524
525 for (i = 0; i < nreaders; i++) {
526 firsterr = torture_create_kthread(ref_perf_reader, (void *)i,
527 reader_tasks[i].task);
528 if (firsterr)
529 goto unwind;
530
531 init_waitqueue_head(&(reader_tasks[i].wq));
532 }
533
534 // Main Task
535 init_waitqueue_head(&main_wq);
536 firsterr = torture_create_kthread(main_func, NULL, main_task);
537 if (firsterr)
538 goto unwind;
539 schedule_timeout_uninterruptible(1);
540
541
542 // Wait until all threads start
543 while (atomic_read(&n_init) < nreaders + 1)
544 schedule_timeout_uninterruptible(1);
545
546 wake_up(&main_wq);
547
548 torture_init_end();
549 return 0;
550
551unwind:
552 torture_init_end();
553 ref_perf_cleanup();
554 return firsterr;
555}
556
557module_init(ref_perf_init);
558module_exit(ref_perf_cleanup);