Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | |
Marco Elver | 2402d0e | 2020-02-22 00:10:27 +0100 | [diff] [blame^] | 3 | #include <linux/debug_locks.h> |
| 4 | #include <linux/delay.h> |
Marco Elver | 05f9a40 | 2020-01-10 19:48:34 +0100 | [diff] [blame] | 5 | #include <linux/jiffies.h> |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 6 | #include <linux/kernel.h> |
Marco Elver | f1bc962 | 2020-01-15 17:25:12 +0100 | [diff] [blame] | 7 | #include <linux/lockdep.h> |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 8 | #include <linux/preempt.h> |
| 9 | #include <linux/printk.h> |
| 10 | #include <linux/sched.h> |
| 11 | #include <linux/spinlock.h> |
| 12 | #include <linux/stacktrace.h> |
| 13 | |
| 14 | #include "kcsan.h" |
| 15 | #include "encoding.h" |
| 16 | |
| 17 | /* |
| 18 | * Max. number of stack entries to show in the report. |
| 19 | */ |
| 20 | #define NUM_STACK_ENTRIES 64 |
| 21 | |
| 22 | /* |
| 23 | * Other thread info: communicated from other racing thread to thread that set |
| 24 | * up the watchpoint, which then prints the complete report atomically. Only |
| 25 | * need one struct, as all threads should to be serialized regardless to print |
| 26 | * the reports, with reporting being in the slow-path. |
| 27 | */ |
| 28 | static struct { |
Ingo Molnar | 5cbaefe | 2019-11-20 10:41:43 +0100 | [diff] [blame] | 29 | const volatile void *ptr; |
| 30 | size_t size; |
Marco Elver | 47144ec | 2020-01-10 19:48:33 +0100 | [diff] [blame] | 31 | int access_type; |
Ingo Molnar | 5cbaefe | 2019-11-20 10:41:43 +0100 | [diff] [blame] | 32 | int task_pid; |
| 33 | int cpu_id; |
| 34 | unsigned long stack_entries[NUM_STACK_ENTRIES]; |
| 35 | int num_stack_entries; |
Marco Elver | 2402d0e | 2020-02-22 00:10:27 +0100 | [diff] [blame^] | 36 | |
| 37 | /* |
| 38 | * Optionally pass @current. Typically we do not need to pass @current |
| 39 | * via @other_info since just @task_pid is sufficient. Passing @current |
| 40 | * has additional overhead. |
| 41 | * |
| 42 | * To safely pass @current, we must either use get_task_struct/ |
| 43 | * put_task_struct, or stall the thread that populated @other_info. |
| 44 | * |
| 45 | * We cannot rely on get_task_struct/put_task_struct in case |
| 46 | * release_report() races with a task being released, and would have to |
| 47 | * free it in release_report(). This may result in deadlock if we want |
| 48 | * to use KCSAN on the allocators. |
| 49 | * |
| 50 | * Since we also want to reliably print held locks for |
| 51 | * CONFIG_KCSAN_VERBOSE, the current implementation stalls the thread |
| 52 | * that populated @other_info until it has been consumed. |
| 53 | */ |
| 54 | struct task_struct *task; |
| 55 | } other_info; |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 56 | |
| 57 | /* |
Marco Elver | d591ec3 | 2020-02-06 16:46:24 +0100 | [diff] [blame] | 58 | * Information about reported races; used to rate limit reporting. |
Marco Elver | 05f9a40 | 2020-01-10 19:48:34 +0100 | [diff] [blame] | 59 | */ |
| 60 | struct report_time { |
| 61 | /* |
Marco Elver | d591ec3 | 2020-02-06 16:46:24 +0100 | [diff] [blame] | 62 | * The last time the race was reported. |
Marco Elver | 05f9a40 | 2020-01-10 19:48:34 +0100 | [diff] [blame] | 63 | */ |
| 64 | unsigned long time; |
| 65 | |
| 66 | /* |
| 67 | * The frames of the 2 threads; if only 1 thread is known, one frame |
| 68 | * will be 0. |
| 69 | */ |
| 70 | unsigned long frame1; |
| 71 | unsigned long frame2; |
| 72 | }; |
| 73 | |
| 74 | /* |
| 75 | * Since we also want to be able to debug allocators with KCSAN, to avoid |
| 76 | * deadlock, report_times cannot be dynamically resized with krealloc in |
| 77 | * rate_limit_report. |
| 78 | * |
| 79 | * Therefore, we use a fixed-size array, which at most will occupy a page. This |
| 80 | * still adequately rate limits reports, assuming that a) number of unique data |
Marco Elver | d591ec3 | 2020-02-06 16:46:24 +0100 | [diff] [blame] | 81 | * races is not excessive, and b) occurrence of unique races within the |
Marco Elver | 05f9a40 | 2020-01-10 19:48:34 +0100 | [diff] [blame] | 82 | * same time window is limited. |
| 83 | */ |
| 84 | #define REPORT_TIMES_MAX (PAGE_SIZE / sizeof(struct report_time)) |
| 85 | #define REPORT_TIMES_SIZE \ |
| 86 | (CONFIG_KCSAN_REPORT_ONCE_IN_MS > REPORT_TIMES_MAX ? \ |
| 87 | REPORT_TIMES_MAX : \ |
| 88 | CONFIG_KCSAN_REPORT_ONCE_IN_MS) |
| 89 | static struct report_time report_times[REPORT_TIMES_SIZE]; |
| 90 | |
| 91 | /* |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 92 | * This spinlock protects reporting and other_info, since other_info is usually |
| 93 | * required when reporting. |
| 94 | */ |
| 95 | static DEFINE_SPINLOCK(report_lock); |
| 96 | |
| 97 | /* |
Marco Elver | d591ec3 | 2020-02-06 16:46:24 +0100 | [diff] [blame] | 98 | * Checks if the race identified by thread frames frame1 and frame2 has |
Marco Elver | 05f9a40 | 2020-01-10 19:48:34 +0100 | [diff] [blame] | 99 | * been reported since (now - KCSAN_REPORT_ONCE_IN_MS). |
| 100 | */ |
| 101 | static bool rate_limit_report(unsigned long frame1, unsigned long frame2) |
| 102 | { |
| 103 | struct report_time *use_entry = &report_times[0]; |
| 104 | unsigned long invalid_before; |
| 105 | int i; |
| 106 | |
| 107 | BUILD_BUG_ON(CONFIG_KCSAN_REPORT_ONCE_IN_MS != 0 && REPORT_TIMES_SIZE == 0); |
| 108 | |
| 109 | if (CONFIG_KCSAN_REPORT_ONCE_IN_MS == 0) |
| 110 | return false; |
| 111 | |
| 112 | invalid_before = jiffies - msecs_to_jiffies(CONFIG_KCSAN_REPORT_ONCE_IN_MS); |
| 113 | |
Marco Elver | d591ec3 | 2020-02-06 16:46:24 +0100 | [diff] [blame] | 114 | /* Check if a matching race report exists. */ |
Marco Elver | 05f9a40 | 2020-01-10 19:48:34 +0100 | [diff] [blame] | 115 | for (i = 0; i < REPORT_TIMES_SIZE; ++i) { |
| 116 | struct report_time *rt = &report_times[i]; |
| 117 | |
| 118 | /* |
| 119 | * Must always select an entry for use to store info as we |
| 120 | * cannot resize report_times; at the end of the scan, use_entry |
| 121 | * will be the oldest entry, which ideally also happened before |
| 122 | * KCSAN_REPORT_ONCE_IN_MS ago. |
| 123 | */ |
| 124 | if (time_before(rt->time, use_entry->time)) |
| 125 | use_entry = rt; |
| 126 | |
| 127 | /* |
| 128 | * Initially, no need to check any further as this entry as well |
| 129 | * as following entries have never been used. |
| 130 | */ |
| 131 | if (rt->time == 0) |
| 132 | break; |
| 133 | |
| 134 | /* Check if entry expired. */ |
| 135 | if (time_before(rt->time, invalid_before)) |
| 136 | continue; /* before KCSAN_REPORT_ONCE_IN_MS ago */ |
| 137 | |
Marco Elver | d591ec3 | 2020-02-06 16:46:24 +0100 | [diff] [blame] | 138 | /* Reported recently, check if race matches. */ |
Marco Elver | 05f9a40 | 2020-01-10 19:48:34 +0100 | [diff] [blame] | 139 | if ((rt->frame1 == frame1 && rt->frame2 == frame2) || |
| 140 | (rt->frame1 == frame2 && rt->frame2 == frame1)) |
| 141 | return true; |
| 142 | } |
| 143 | |
| 144 | use_entry->time = jiffies; |
| 145 | use_entry->frame1 = frame1; |
| 146 | use_entry->frame2 = frame2; |
| 147 | return false; |
| 148 | } |
| 149 | |
| 150 | /* |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 151 | * Special rules to skip reporting. |
| 152 | */ |
Ingo Molnar | 5cbaefe | 2019-11-20 10:41:43 +0100 | [diff] [blame] | 153 | static bool |
Marco Elver | b738f61 | 2020-02-11 17:04:21 +0100 | [diff] [blame] | 154 | skip_report(enum kcsan_value_change value_change, unsigned long top_frame) |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 155 | { |
Marco Elver | 81af89e | 2020-02-11 17:04:22 +0100 | [diff] [blame] | 156 | /* Should never get here if value_change==FALSE. */ |
| 157 | WARN_ON_ONCE(value_change == KCSAN_VALUE_CHANGE_FALSE); |
| 158 | |
Marco Elver | ad4f8ee | 2020-01-29 16:01:02 +0100 | [diff] [blame] | 159 | /* |
Marco Elver | b738f61 | 2020-02-11 17:04:21 +0100 | [diff] [blame] | 160 | * The first call to skip_report always has value_change==TRUE, since we |
Marco Elver | ad4f8ee | 2020-01-29 16:01:02 +0100 | [diff] [blame] | 161 | * cannot know the value written of an instrumented access. For the 2nd |
| 162 | * call there are 6 cases with CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY: |
| 163 | * |
Marco Elver | b738f61 | 2020-02-11 17:04:21 +0100 | [diff] [blame] | 164 | * 1. read watchpoint, conflicting write (value_change==TRUE): report; |
| 165 | * 2. read watchpoint, conflicting write (value_change==MAYBE): skip; |
| 166 | * 3. write watchpoint, conflicting write (value_change==TRUE): report; |
| 167 | * 4. write watchpoint, conflicting write (value_change==MAYBE): skip; |
| 168 | * 5. write watchpoint, conflicting read (value_change==MAYBE): skip; |
| 169 | * 6. write watchpoint, conflicting read (value_change==TRUE): report; |
Marco Elver | ad4f8ee | 2020-01-29 16:01:02 +0100 | [diff] [blame] | 170 | * |
| 171 | * Cases 1-4 are intuitive and expected; case 5 ensures we do not report |
Marco Elver | d591ec3 | 2020-02-06 16:46:24 +0100 | [diff] [blame] | 172 | * data races where the write may have rewritten the same value; case 6 |
| 173 | * is possible either if the size is larger than what we check value |
| 174 | * changes for or the access type is KCSAN_ACCESS_ASSERT. |
Marco Elver | ad4f8ee | 2020-01-29 16:01:02 +0100 | [diff] [blame] | 175 | */ |
Marco Elver | b738f61 | 2020-02-11 17:04:21 +0100 | [diff] [blame] | 176 | if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY) && |
| 177 | value_change == KCSAN_VALUE_CHANGE_MAYBE) { |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 178 | /* |
| 179 | * The access is a write, but the data value did not change. |
| 180 | * |
| 181 | * We opt-out of this filter for certain functions at request of |
| 182 | * maintainers. |
| 183 | */ |
| 184 | char buf[64]; |
| 185 | |
| 186 | snprintf(buf, sizeof(buf), "%ps", (void *)top_frame); |
| 187 | if (!strnstr(buf, "rcu_", sizeof(buf)) && |
| 188 | !strnstr(buf, "_rcu", sizeof(buf)) && |
| 189 | !strnstr(buf, "_srcu", sizeof(buf))) |
| 190 | return true; |
| 191 | } |
| 192 | |
| 193 | return kcsan_skip_report_debugfs(top_frame); |
| 194 | } |
| 195 | |
Marco Elver | 47144ec | 2020-01-10 19:48:33 +0100 | [diff] [blame] | 196 | static const char *get_access_type(int type) |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 197 | { |
Marco Elver | 47144ec | 2020-01-10 19:48:33 +0100 | [diff] [blame] | 198 | switch (type) { |
| 199 | case 0: |
| 200 | return "read"; |
| 201 | case KCSAN_ACCESS_ATOMIC: |
| 202 | return "read (marked)"; |
| 203 | case KCSAN_ACCESS_WRITE: |
| 204 | return "write"; |
| 205 | case KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: |
| 206 | return "write (marked)"; |
Marco Elver | d591ec3 | 2020-02-06 16:46:24 +0100 | [diff] [blame] | 207 | |
| 208 | /* |
| 209 | * ASSERT variants: |
| 210 | */ |
| 211 | case KCSAN_ACCESS_ASSERT: |
| 212 | case KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_ATOMIC: |
| 213 | return "assert no writes"; |
| 214 | case KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE: |
| 215 | case KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: |
| 216 | return "assert no accesses"; |
| 217 | |
Marco Elver | 47144ec | 2020-01-10 19:48:33 +0100 | [diff] [blame] | 218 | default: |
| 219 | BUG(); |
| 220 | } |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 221 | } |
| 222 | |
Marco Elver | d591ec3 | 2020-02-06 16:46:24 +0100 | [diff] [blame] | 223 | static const char *get_bug_type(int type) |
| 224 | { |
| 225 | return (type & KCSAN_ACCESS_ASSERT) != 0 ? "assert: race" : "data-race"; |
| 226 | } |
| 227 | |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 228 | /* Return thread description: in task or interrupt. */ |
| 229 | static const char *get_thread_desc(int task_id) |
| 230 | { |
| 231 | if (task_id != -1) { |
| 232 | static char buf[32]; /* safe: protected by report_lock */ |
| 233 | |
| 234 | snprintf(buf, sizeof(buf), "task %i", task_id); |
| 235 | return buf; |
| 236 | } |
| 237 | return "interrupt"; |
| 238 | } |
| 239 | |
| 240 | /* Helper to skip KCSAN-related functions in stack-trace. */ |
| 241 | static int get_stack_skipnr(unsigned long stack_entries[], int num_entries) |
| 242 | { |
| 243 | char buf[64]; |
| 244 | int skip = 0; |
| 245 | |
| 246 | for (; skip < num_entries; ++skip) { |
| 247 | snprintf(buf, sizeof(buf), "%ps", (void *)stack_entries[skip]); |
| 248 | if (!strnstr(buf, "csan_", sizeof(buf)) && |
| 249 | !strnstr(buf, "tsan_", sizeof(buf)) && |
| 250 | !strnstr(buf, "_once_size", sizeof(buf))) { |
| 251 | break; |
| 252 | } |
| 253 | } |
| 254 | return skip; |
| 255 | } |
| 256 | |
| 257 | /* Compares symbolized strings of addr1 and addr2. */ |
| 258 | static int sym_strcmp(void *addr1, void *addr2) |
| 259 | { |
| 260 | char buf1[64]; |
| 261 | char buf2[64]; |
| 262 | |
| 263 | snprintf(buf1, sizeof(buf1), "%pS", addr1); |
| 264 | snprintf(buf2, sizeof(buf2), "%pS", addr2); |
Ingo Molnar | 5cbaefe | 2019-11-20 10:41:43 +0100 | [diff] [blame] | 265 | |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 266 | return strncmp(buf1, buf2, sizeof(buf1)); |
| 267 | } |
| 268 | |
Marco Elver | 2402d0e | 2020-02-22 00:10:27 +0100 | [diff] [blame^] | 269 | static void print_verbose_info(struct task_struct *task) |
| 270 | { |
| 271 | if (!task) |
| 272 | return; |
| 273 | |
| 274 | pr_err("\n"); |
| 275 | debug_show_held_locks(task); |
| 276 | print_irqtrace_events(task); |
| 277 | } |
| 278 | |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 279 | /* |
| 280 | * Returns true if a report was generated, false otherwise. |
| 281 | */ |
Marco Elver | 47144ec | 2020-01-10 19:48:33 +0100 | [diff] [blame] | 282 | static bool print_report(const volatile void *ptr, size_t size, int access_type, |
Marco Elver | b738f61 | 2020-02-11 17:04:21 +0100 | [diff] [blame] | 283 | enum kcsan_value_change value_change, int cpu_id, |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 284 | enum kcsan_report_type type) |
| 285 | { |
| 286 | unsigned long stack_entries[NUM_STACK_ENTRIES] = { 0 }; |
Ingo Molnar | 5cbaefe | 2019-11-20 10:41:43 +0100 | [diff] [blame] | 287 | int num_stack_entries = stack_trace_save(stack_entries, NUM_STACK_ENTRIES, 1); |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 288 | int skipnr = get_stack_skipnr(stack_entries, num_stack_entries); |
Marco Elver | 05f9a40 | 2020-01-10 19:48:34 +0100 | [diff] [blame] | 289 | unsigned long this_frame = stack_entries[skipnr]; |
| 290 | unsigned long other_frame = 0; |
| 291 | int other_skipnr = 0; /* silence uninit warnings */ |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 292 | |
| 293 | /* |
| 294 | * Must check report filter rules before starting to print. |
| 295 | */ |
Marco Elver | b738f61 | 2020-02-11 17:04:21 +0100 | [diff] [blame] | 296 | if (skip_report(KCSAN_VALUE_CHANGE_TRUE, stack_entries[skipnr])) |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 297 | return false; |
| 298 | |
| 299 | if (type == KCSAN_REPORT_RACE_SIGNAL) { |
| 300 | other_skipnr = get_stack_skipnr(other_info.stack_entries, |
| 301 | other_info.num_stack_entries); |
Marco Elver | 05f9a40 | 2020-01-10 19:48:34 +0100 | [diff] [blame] | 302 | other_frame = other_info.stack_entries[other_skipnr]; |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 303 | |
Ingo Molnar | 5cbaefe | 2019-11-20 10:41:43 +0100 | [diff] [blame] | 304 | /* @value_change is only known for the other thread */ |
Marco Elver | ad4f8ee | 2020-01-29 16:01:02 +0100 | [diff] [blame] | 305 | if (skip_report(value_change, other_frame)) |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 306 | return false; |
| 307 | } |
| 308 | |
Marco Elver | 05f9a40 | 2020-01-10 19:48:34 +0100 | [diff] [blame] | 309 | if (rate_limit_report(this_frame, other_frame)) |
| 310 | return false; |
| 311 | |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 312 | /* Print report header. */ |
| 313 | pr_err("==================================================================\n"); |
| 314 | switch (type) { |
| 315 | case KCSAN_REPORT_RACE_SIGNAL: { |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 316 | int cmp; |
| 317 | |
| 318 | /* |
| 319 | * Order functions lexographically for consistent bug titles. |
| 320 | * Do not print offset of functions to keep title short. |
| 321 | */ |
Marco Elver | 05f9a40 | 2020-01-10 19:48:34 +0100 | [diff] [blame] | 322 | cmp = sym_strcmp((void *)other_frame, (void *)this_frame); |
Marco Elver | d591ec3 | 2020-02-06 16:46:24 +0100 | [diff] [blame] | 323 | pr_err("BUG: KCSAN: %s in %ps / %ps\n", |
| 324 | get_bug_type(access_type | other_info.access_type), |
Marco Elver | 05f9a40 | 2020-01-10 19:48:34 +0100 | [diff] [blame] | 325 | (void *)(cmp < 0 ? other_frame : this_frame), |
| 326 | (void *)(cmp < 0 ? this_frame : other_frame)); |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 327 | } break; |
| 328 | |
| 329 | case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN: |
Marco Elver | d591ec3 | 2020-02-06 16:46:24 +0100 | [diff] [blame] | 330 | pr_err("BUG: KCSAN: %s in %pS\n", get_bug_type(access_type), |
| 331 | (void *)this_frame); |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 332 | break; |
| 333 | |
| 334 | default: |
| 335 | BUG(); |
| 336 | } |
| 337 | |
| 338 | pr_err("\n"); |
| 339 | |
| 340 | /* Print information about the racing accesses. */ |
| 341 | switch (type) { |
| 342 | case KCSAN_REPORT_RACE_SIGNAL: |
| 343 | pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n", |
Marco Elver | 47144ec | 2020-01-10 19:48:33 +0100 | [diff] [blame] | 344 | get_access_type(other_info.access_type), other_info.ptr, |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 345 | other_info.size, get_thread_desc(other_info.task_pid), |
| 346 | other_info.cpu_id); |
| 347 | |
| 348 | /* Print the other thread's stack trace. */ |
| 349 | stack_trace_print(other_info.stack_entries + other_skipnr, |
| 350 | other_info.num_stack_entries - other_skipnr, |
| 351 | 0); |
| 352 | |
Marco Elver | 2402d0e | 2020-02-22 00:10:27 +0100 | [diff] [blame^] | 353 | if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) |
| 354 | print_verbose_info(other_info.task); |
| 355 | |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 356 | pr_err("\n"); |
| 357 | pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n", |
Marco Elver | 47144ec | 2020-01-10 19:48:33 +0100 | [diff] [blame] | 358 | get_access_type(access_type), ptr, size, |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 359 | get_thread_desc(in_task() ? task_pid_nr(current) : -1), |
| 360 | cpu_id); |
| 361 | break; |
| 362 | |
| 363 | case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN: |
| 364 | pr_err("race at unknown origin, with %s to 0x%px of %zu bytes by %s on cpu %i:\n", |
Marco Elver | 47144ec | 2020-01-10 19:48:33 +0100 | [diff] [blame] | 365 | get_access_type(access_type), ptr, size, |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 366 | get_thread_desc(in_task() ? task_pid_nr(current) : -1), |
| 367 | cpu_id); |
| 368 | break; |
| 369 | |
| 370 | default: |
| 371 | BUG(); |
| 372 | } |
| 373 | /* Print stack trace of this thread. */ |
| 374 | stack_trace_print(stack_entries + skipnr, num_stack_entries - skipnr, |
| 375 | 0); |
| 376 | |
Marco Elver | 2402d0e | 2020-02-22 00:10:27 +0100 | [diff] [blame^] | 377 | if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) |
| 378 | print_verbose_info(current); |
| 379 | |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 380 | /* Print report footer. */ |
| 381 | pr_err("\n"); |
| 382 | pr_err("Reported by Kernel Concurrency Sanitizer on:\n"); |
| 383 | dump_stack_print_info(KERN_DEFAULT); |
| 384 | pr_err("==================================================================\n"); |
| 385 | |
| 386 | return true; |
| 387 | } |
| 388 | |
| 389 | static void release_report(unsigned long *flags, enum kcsan_report_type type) |
| 390 | { |
| 391 | if (type == KCSAN_REPORT_RACE_SIGNAL) |
| 392 | other_info.ptr = NULL; /* mark for reuse */ |
| 393 | |
| 394 | spin_unlock_irqrestore(&report_lock, *flags); |
| 395 | } |
| 396 | |
| 397 | /* |
Marco Elver | 2402d0e | 2020-02-22 00:10:27 +0100 | [diff] [blame^] | 398 | * Sets @other_info.task and awaits consumption of @other_info. |
| 399 | * |
| 400 | * Precondition: report_lock is held. |
| 401 | * Postcondition: report_lock is held. |
| 402 | */ |
| 403 | static void |
| 404 | set_other_info_task_blocking(unsigned long *flags, const volatile void *ptr) |
| 405 | { |
| 406 | /* |
| 407 | * We may be instrumenting a code-path where current->state is already |
| 408 | * something other than TASK_RUNNING. |
| 409 | */ |
| 410 | const bool is_running = current->state == TASK_RUNNING; |
| 411 | /* |
| 412 | * To avoid deadlock in case we are in an interrupt here and this is a |
| 413 | * race with a task on the same CPU (KCSAN_INTERRUPT_WATCHER), provide a |
| 414 | * timeout to ensure this works in all contexts. |
| 415 | * |
| 416 | * Await approximately the worst case delay of the reporting thread (if |
| 417 | * we are not interrupted). |
| 418 | */ |
| 419 | int timeout = max(kcsan_udelay_task, kcsan_udelay_interrupt); |
| 420 | |
| 421 | other_info.task = current; |
| 422 | do { |
| 423 | if (is_running) { |
| 424 | /* |
| 425 | * Let lockdep know the real task is sleeping, to print |
| 426 | * the held locks (recall we turned lockdep off, so |
| 427 | * locking/unlocking @report_lock won't be recorded). |
| 428 | */ |
| 429 | set_current_state(TASK_UNINTERRUPTIBLE); |
| 430 | } |
| 431 | spin_unlock_irqrestore(&report_lock, *flags); |
| 432 | /* |
| 433 | * We cannot call schedule() since we also cannot reliably |
| 434 | * determine if sleeping here is permitted -- see in_atomic(). |
| 435 | */ |
| 436 | |
| 437 | udelay(1); |
| 438 | spin_lock_irqsave(&report_lock, *flags); |
| 439 | if (timeout-- < 0) { |
| 440 | /* |
| 441 | * Abort. Reset other_info.task to NULL, since it |
| 442 | * appears the other thread is still going to consume |
| 443 | * it. It will result in no verbose info printed for |
| 444 | * this task. |
| 445 | */ |
| 446 | other_info.task = NULL; |
| 447 | break; |
| 448 | } |
| 449 | /* |
| 450 | * If @ptr nor @current matches, then our information has been |
| 451 | * consumed and we may continue. If not, retry. |
| 452 | */ |
| 453 | } while (other_info.ptr == ptr && other_info.task == current); |
| 454 | if (is_running) |
| 455 | set_current_state(TASK_RUNNING); |
| 456 | } |
| 457 | |
| 458 | /* |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 459 | * Depending on the report type either sets other_info and returns false, or |
| 460 | * acquires the matching other_info and returns true. If other_info is not |
| 461 | * required for the report type, simply acquires report_lock and returns true. |
| 462 | */ |
| 463 | static bool prepare_report(unsigned long *flags, const volatile void *ptr, |
Marco Elver | 47144ec | 2020-01-10 19:48:33 +0100 | [diff] [blame] | 464 | size_t size, int access_type, int cpu_id, |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 465 | enum kcsan_report_type type) |
| 466 | { |
| 467 | if (type != KCSAN_REPORT_CONSUMED_WATCHPOINT && |
| 468 | type != KCSAN_REPORT_RACE_SIGNAL) { |
| 469 | /* other_info not required; just acquire report_lock */ |
| 470 | spin_lock_irqsave(&report_lock, *flags); |
| 471 | return true; |
| 472 | } |
| 473 | |
| 474 | retry: |
| 475 | spin_lock_irqsave(&report_lock, *flags); |
| 476 | |
| 477 | switch (type) { |
| 478 | case KCSAN_REPORT_CONSUMED_WATCHPOINT: |
| 479 | if (other_info.ptr != NULL) |
| 480 | break; /* still in use, retry */ |
| 481 | |
Ingo Molnar | 5cbaefe | 2019-11-20 10:41:43 +0100 | [diff] [blame] | 482 | other_info.ptr = ptr; |
| 483 | other_info.size = size; |
Marco Elver | 47144ec | 2020-01-10 19:48:33 +0100 | [diff] [blame] | 484 | other_info.access_type = access_type; |
Ingo Molnar | 5cbaefe | 2019-11-20 10:41:43 +0100 | [diff] [blame] | 485 | other_info.task_pid = in_task() ? task_pid_nr(current) : -1; |
| 486 | other_info.cpu_id = cpu_id; |
| 487 | other_info.num_stack_entries = stack_trace_save(other_info.stack_entries, NUM_STACK_ENTRIES, 1); |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 488 | |
Marco Elver | 2402d0e | 2020-02-22 00:10:27 +0100 | [diff] [blame^] | 489 | if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) |
| 490 | set_other_info_task_blocking(flags, ptr); |
| 491 | |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 492 | spin_unlock_irqrestore(&report_lock, *flags); |
| 493 | |
| 494 | /* |
| 495 | * The other thread will print the summary; other_info may now |
| 496 | * be consumed. |
| 497 | */ |
| 498 | return false; |
| 499 | |
| 500 | case KCSAN_REPORT_RACE_SIGNAL: |
| 501 | if (other_info.ptr == NULL) |
| 502 | break; /* no data available yet, retry */ |
| 503 | |
| 504 | /* |
| 505 | * First check if this is the other_info we are expecting, i.e. |
| 506 | * matches based on how watchpoint was encoded. |
| 507 | */ |
| 508 | if (!matching_access((unsigned long)other_info.ptr & |
| 509 | WATCHPOINT_ADDR_MASK, |
| 510 | other_info.size, |
| 511 | (unsigned long)ptr & WATCHPOINT_ADDR_MASK, |
| 512 | size)) |
| 513 | break; /* mismatching watchpoint, retry */ |
| 514 | |
| 515 | if (!matching_access((unsigned long)other_info.ptr, |
| 516 | other_info.size, (unsigned long)ptr, |
| 517 | size)) { |
| 518 | /* |
| 519 | * If the actual accesses to not match, this was a false |
| 520 | * positive due to watchpoint encoding. |
| 521 | */ |
| 522 | kcsan_counter_inc( |
| 523 | KCSAN_COUNTER_ENCODING_FALSE_POSITIVES); |
| 524 | |
| 525 | /* discard this other_info */ |
| 526 | release_report(flags, KCSAN_REPORT_RACE_SIGNAL); |
| 527 | return false; |
| 528 | } |
| 529 | |
Marco Elver | 3a5b45e | 2020-02-10 15:56:39 +0100 | [diff] [blame] | 530 | access_type |= other_info.access_type; |
| 531 | if ((access_type & KCSAN_ACCESS_WRITE) == 0) { |
| 532 | /* |
| 533 | * While the address matches, this is not the other_info |
| 534 | * from the thread that consumed our watchpoint, since |
| 535 | * neither this nor the access in other_info is a write. |
| 536 | * It is invalid to continue with the report, since we |
| 537 | * only have information about reads. |
| 538 | * |
| 539 | * This can happen due to concurrent races on the same |
| 540 | * address, with at least 4 threads. To avoid locking up |
| 541 | * other_info and all other threads, we have to consume |
| 542 | * it regardless. |
| 543 | * |
| 544 | * A concrete case to illustrate why we might lock up if |
| 545 | * we do not consume other_info: |
| 546 | * |
| 547 | * We have 4 threads, all accessing the same address |
| 548 | * (or matching address ranges). Assume the following |
| 549 | * watcher and watchpoint consumer pairs: |
| 550 | * write1-read1, read2-write2. The first to populate |
| 551 | * other_info is write2, however, write1 consumes it, |
| 552 | * resulting in a report of write1-write2. This report |
| 553 | * is valid, however, now read1 populates other_info; |
| 554 | * read2-read1 is an invalid conflict, yet, no other |
| 555 | * conflicting access is left. Therefore, we must |
| 556 | * consume read1's other_info. |
| 557 | * |
| 558 | * Since this case is assumed to be rare, it is |
| 559 | * reasonable to omit this report: one of the other |
| 560 | * reports includes information about the same shared |
| 561 | * data, and at this point the likelihood that we |
| 562 | * re-report the same race again is high. |
| 563 | */ |
| 564 | release_report(flags, KCSAN_REPORT_RACE_SIGNAL); |
| 565 | return false; |
| 566 | } |
| 567 | |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 568 | /* |
| 569 | * Matching & usable access in other_info: keep other_info_lock |
| 570 | * locked, as this thread consumes it to print the full report; |
| 571 | * unlocked in release_report. |
| 572 | */ |
| 573 | return true; |
| 574 | |
| 575 | default: |
| 576 | BUG(); |
| 577 | } |
| 578 | |
| 579 | spin_unlock_irqrestore(&report_lock, *flags); |
Ingo Molnar | 5cbaefe | 2019-11-20 10:41:43 +0100 | [diff] [blame] | 580 | |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 581 | goto retry; |
| 582 | } |
| 583 | |
Marco Elver | 47144ec | 2020-01-10 19:48:33 +0100 | [diff] [blame] | 584 | void kcsan_report(const volatile void *ptr, size_t size, int access_type, |
Marco Elver | b738f61 | 2020-02-11 17:04:21 +0100 | [diff] [blame] | 585 | enum kcsan_value_change value_change, int cpu_id, |
| 586 | enum kcsan_report_type type) |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 587 | { |
| 588 | unsigned long flags = 0; |
| 589 | |
Marco Elver | f1bc962 | 2020-01-15 17:25:12 +0100 | [diff] [blame] | 590 | /* |
| 591 | * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if |
| 592 | * we do not turn off lockdep here; this could happen due to recursion |
Marco Elver | d591ec3 | 2020-02-06 16:46:24 +0100 | [diff] [blame] | 593 | * into lockdep via KCSAN if we detect a race in utilities used by |
Marco Elver | f1bc962 | 2020-01-15 17:25:12 +0100 | [diff] [blame] | 594 | * lockdep. |
| 595 | */ |
| 596 | lockdep_off(); |
| 597 | |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 598 | kcsan_disable_current(); |
Marco Elver | 47144ec | 2020-01-10 19:48:33 +0100 | [diff] [blame] | 599 | if (prepare_report(&flags, ptr, size, access_type, cpu_id, type)) { |
Marco Elver | 81af89e | 2020-02-11 17:04:22 +0100 | [diff] [blame] | 600 | /* |
| 601 | * Never report if value_change is FALSE, only if we it is |
| 602 | * either TRUE or MAYBE. In case of MAYBE, further filtering may |
| 603 | * be done once we know the full stack trace in print_report(). |
| 604 | */ |
| 605 | bool reported = value_change != KCSAN_VALUE_CHANGE_FALSE && |
| 606 | print_report(ptr, size, access_type, value_change, cpu_id, type); |
| 607 | |
| 608 | if (reported && panic_on_warn) |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 609 | panic("panic_on_warn set ...\n"); |
| 610 | |
| 611 | release_report(&flags, type); |
| 612 | } |
| 613 | kcsan_enable_current(); |
Marco Elver | f1bc962 | 2020-01-15 17:25:12 +0100 | [diff] [blame] | 614 | |
| 615 | lockdep_on(); |
Marco Elver | dfd402a | 2019-11-14 19:02:54 +0100 | [diff] [blame] | 616 | } |