Pekka Paalanen | 8b7d89d | 2008-05-12 21:20:56 +0200 | [diff] [blame^] | 1 | /* |
| 2 | * This program is free software; you can redistribute it and/or modify |
| 3 | * it under the terms of the GNU General Public License as published by |
| 4 | * the Free Software Foundation; either version 2 of the License, or |
| 5 | * (at your option) any later version. |
| 6 | * |
| 7 | * This program is distributed in the hope that it will be useful, |
| 8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 10 | * GNU General Public License for more details. |
| 11 | * |
| 12 | * You should have received a copy of the GNU General Public License |
| 13 | * along with this program; if not, write to the Free Software |
| 14 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
| 15 | * |
| 16 | * Copyright (C) IBM Corporation, 2005 |
| 17 | * Jeff Muizelaar, 2006, 2007 |
| 18 | * Pekka Paalanen, 2008 <pq@iki.fi> |
| 19 | * |
| 20 | * Derived from the read-mod example from relay-examples by Tom Zanussi. |
| 21 | */ |
| 22 | #include <linux/module.h> |
| 23 | #include <linux/relay.h> |
| 24 | #include <linux/debugfs.h> |
| 25 | #include <linux/proc_fs.h> |
| 26 | #include <asm/io.h> |
| 27 | #include <linux/version.h> |
| 28 | #include <linux/kallsyms.h> |
| 29 | #include <asm/pgtable.h> |
| 30 | #include <linux/mmiotrace.h> |
| 31 | #include <asm/e820.h> /* for ISA_START_ADDRESS */ |
| 32 | |
| 33 | #include "kmmio.h" |
| 34 | #include "pf_in.h" |
| 35 | |
| 36 | /* This app's relay channel files will appear in /debug/mmio-trace */ |
| 37 | #define APP_DIR "mmio-trace" |
| 38 | /* the marker injection file in /proc */ |
| 39 | #define MARKER_FILE "mmio-marker" |
| 40 | |
| 41 | #define MODULE_NAME "mmiotrace" |
| 42 | |
| 43 | struct trap_reason { |
| 44 | unsigned long addr; |
| 45 | unsigned long ip; |
| 46 | enum reason_type type; |
| 47 | int active_traces; |
| 48 | }; |
| 49 | |
| 50 | static struct trap_reason pf_reason[NR_CPUS]; |
| 51 | static struct mm_io_header_rw cpu_trace[NR_CPUS]; |
| 52 | |
| 53 | static struct file_operations mmio_fops = { |
| 54 | .owner = THIS_MODULE, |
| 55 | }; |
| 56 | |
| 57 | static const size_t subbuf_size = 256*1024; |
| 58 | static struct rchan *chan; |
| 59 | static struct dentry *dir; |
| 60 | static int suspended; /* XXX should this be per cpu? */ |
| 61 | static struct proc_dir_entry *proc_marker_file; |
| 62 | |
| 63 | /* module parameters */ |
| 64 | static unsigned int n_subbufs = 32*4; |
| 65 | static unsigned long filter_offset; |
| 66 | static int nommiotrace; |
| 67 | static int ISA_trace; |
| 68 | static int trace_pc; |
| 69 | |
| 70 | module_param(n_subbufs, uint, 0); |
| 71 | module_param(filter_offset, ulong, 0); |
| 72 | module_param(nommiotrace, bool, 0); |
| 73 | module_param(ISA_trace, bool, 0); |
| 74 | module_param(trace_pc, bool, 0); |
| 75 | |
| 76 | MODULE_PARM_DESC(n_subbufs, "Number of 256kB buffers, default 128."); |
| 77 | MODULE_PARM_DESC(filter_offset, "Start address of traced mappings."); |
| 78 | MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing."); |
| 79 | MODULE_PARM_DESC(ISA_trace, "Do not exclude the low ISA range."); |
| 80 | MODULE_PARM_DESC(trace_pc, "Record address of faulting instructions."); |
| 81 | |
| 82 | static void record_timestamp(struct mm_io_header *header) |
| 83 | { |
| 84 | struct timespec now; |
| 85 | |
| 86 | getnstimeofday(&now); |
| 87 | header->sec = now.tv_sec; |
| 88 | header->nsec = now.tv_nsec; |
| 89 | } |
| 90 | |
| 91 | /* |
| 92 | * Write callback for the /proc entry: |
| 93 | * Read a marker and write it to the mmio trace log |
| 94 | */ |
| 95 | static int write_marker(struct file *file, const char __user *buffer, |
| 96 | unsigned long count, void *data) |
| 97 | { |
| 98 | char *event = NULL; |
| 99 | struct mm_io_header *headp; |
| 100 | int len = (count > 65535) ? 65535 : count; |
| 101 | |
| 102 | event = kzalloc(sizeof(*headp) + len, GFP_KERNEL); |
| 103 | if (!event) |
| 104 | return -ENOMEM; |
| 105 | |
| 106 | headp = (struct mm_io_header *)event; |
| 107 | headp->type = MMIO_MAGIC | (MMIO_MARKER << MMIO_OPCODE_SHIFT); |
| 108 | headp->data_len = len; |
| 109 | record_timestamp(headp); |
| 110 | |
| 111 | if (copy_from_user(event + sizeof(*headp), buffer, len)) { |
| 112 | kfree(event); |
| 113 | return -EFAULT; |
| 114 | } |
| 115 | |
| 116 | relay_write(chan, event, sizeof(*headp) + len); |
| 117 | kfree(event); |
| 118 | return len; |
| 119 | } |
| 120 | |
| 121 | static void print_pte(unsigned long address) |
| 122 | { |
| 123 | pgd_t *pgd = pgd_offset_k(address); |
| 124 | pud_t *pud = pud_offset(pgd, address); |
| 125 | pmd_t *pmd = pmd_offset(pud, address); |
| 126 | if (pmd_large(*pmd)) { |
| 127 | printk(KERN_EMERG MODULE_NAME ": 4MB pages are not " |
| 128 | "currently supported: %lx\n", |
| 129 | address); |
| 130 | BUG(); |
| 131 | } |
| 132 | printk(KERN_DEBUG MODULE_NAME ": pte for 0x%lx: 0x%lx 0x%lx\n", |
| 133 | address, |
| 134 | pte_val(*pte_offset_kernel(pmd, address)), |
| 135 | pte_val(*pte_offset_kernel(pmd, address)) & _PAGE_PRESENT); |
| 136 | } |
| 137 | |
| 138 | /* |
| 139 | * For some reason the pre/post pairs have been called in an |
| 140 | * unmatched order. Report and die. |
| 141 | */ |
| 142 | static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) |
| 143 | { |
| 144 | const unsigned long cpu = smp_processor_id(); |
| 145 | printk(KERN_EMERG MODULE_NAME ": unexpected fault for address: %lx, " |
| 146 | "last fault for address: %lx\n", |
| 147 | addr, pf_reason[cpu].addr); |
| 148 | print_pte(addr); |
| 149 | #ifdef __i386__ |
| 150 | print_symbol(KERN_EMERG "faulting EIP is at %s\n", regs->ip); |
| 151 | print_symbol(KERN_EMERG "last faulting EIP was at %s\n", |
| 152 | pf_reason[cpu].ip); |
| 153 | printk(KERN_EMERG |
| 154 | "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", |
| 155 | regs->ax, regs->bx, regs->cx, regs->dx); |
| 156 | printk(KERN_EMERG |
| 157 | "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", |
| 158 | regs->si, regs->di, regs->bp, regs->sp); |
| 159 | #else |
| 160 | print_symbol(KERN_EMERG "faulting RIP is at %s\n", regs->ip); |
| 161 | print_symbol(KERN_EMERG "last faulting RIP was at %s\n", |
| 162 | pf_reason[cpu].ip); |
| 163 | printk(KERN_EMERG "rax: %016lx rcx: %016lx rdx: %016lx\n", |
| 164 | regs->ax, regs->cx, regs->dx); |
| 165 | printk(KERN_EMERG "rsi: %016lx rdi: %016lx " |
| 166 | "rbp: %016lx rsp: %016lx\n", |
| 167 | regs->si, regs->di, regs->bp, regs->sp); |
| 168 | #endif |
| 169 | BUG(); |
| 170 | } |
| 171 | |
| 172 | static void pre(struct kmmio_probe *p, struct pt_regs *regs, |
| 173 | unsigned long addr) |
| 174 | { |
| 175 | const unsigned long cpu = smp_processor_id(); |
| 176 | const unsigned long instptr = instruction_pointer(regs); |
| 177 | const enum reason_type type = get_ins_type(instptr); |
| 178 | |
| 179 | /* it doesn't make sense to have more than one active trace per cpu */ |
| 180 | if (pf_reason[cpu].active_traces) |
| 181 | die_kmmio_nesting_error(regs, addr); |
| 182 | else |
| 183 | pf_reason[cpu].active_traces++; |
| 184 | |
| 185 | pf_reason[cpu].type = type; |
| 186 | pf_reason[cpu].addr = addr; |
| 187 | pf_reason[cpu].ip = instptr; |
| 188 | |
| 189 | cpu_trace[cpu].header.type = MMIO_MAGIC; |
| 190 | cpu_trace[cpu].header.pid = 0; |
| 191 | cpu_trace[cpu].header.data_len = sizeof(struct mm_io_rw); |
| 192 | cpu_trace[cpu].rw.address = addr; |
| 193 | |
| 194 | /* |
| 195 | * Only record the program counter when requested. |
| 196 | * It may taint clean-room reverse engineering. |
| 197 | */ |
| 198 | if (trace_pc) |
| 199 | cpu_trace[cpu].rw.pc = instptr; |
| 200 | else |
| 201 | cpu_trace[cpu].rw.pc = 0; |
| 202 | |
| 203 | record_timestamp(&cpu_trace[cpu].header); |
| 204 | |
| 205 | switch (type) { |
| 206 | case REG_READ: |
| 207 | cpu_trace[cpu].header.type |= |
| 208 | (MMIO_READ << MMIO_OPCODE_SHIFT) | |
| 209 | (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT); |
| 210 | break; |
| 211 | case REG_WRITE: |
| 212 | cpu_trace[cpu].header.type |= |
| 213 | (MMIO_WRITE << MMIO_OPCODE_SHIFT) | |
| 214 | (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT); |
| 215 | cpu_trace[cpu].rw.value = get_ins_reg_val(instptr, regs); |
| 216 | break; |
| 217 | case IMM_WRITE: |
| 218 | cpu_trace[cpu].header.type |= |
| 219 | (MMIO_WRITE << MMIO_OPCODE_SHIFT) | |
| 220 | (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT); |
| 221 | cpu_trace[cpu].rw.value = get_ins_imm_val(instptr); |
| 222 | break; |
| 223 | default: |
| 224 | { |
| 225 | unsigned char *ip = (unsigned char *)instptr; |
| 226 | cpu_trace[cpu].header.type |= |
| 227 | (MMIO_UNKNOWN_OP << MMIO_OPCODE_SHIFT); |
| 228 | cpu_trace[cpu].rw.value = (*ip) << 16 | |
| 229 | *(ip + 1) << 8 | |
| 230 | *(ip + 2); |
| 231 | } |
| 232 | } |
| 233 | } |
| 234 | |
| 235 | static void post(struct kmmio_probe *p, unsigned long condition, |
| 236 | struct pt_regs *regs) |
| 237 | { |
| 238 | const unsigned long cpu = smp_processor_id(); |
| 239 | |
| 240 | /* this should always return the active_trace count to 0 */ |
| 241 | pf_reason[cpu].active_traces--; |
| 242 | if (pf_reason[cpu].active_traces) { |
| 243 | printk(KERN_EMERG MODULE_NAME ": unexpected post handler"); |
| 244 | BUG(); |
| 245 | } |
| 246 | |
| 247 | switch (pf_reason[cpu].type) { |
| 248 | case REG_READ: |
| 249 | cpu_trace[cpu].rw.value = get_ins_reg_val(pf_reason[cpu].ip, |
| 250 | regs); |
| 251 | break; |
| 252 | default: |
| 253 | break; |
| 254 | } |
| 255 | relay_write(chan, &cpu_trace[cpu], sizeof(struct mm_io_header_rw)); |
| 256 | } |
| 257 | |
| 258 | /* |
| 259 | * subbuf_start() relay callback. |
| 260 | * |
| 261 | * Defined so that we know when events are dropped due to the buffer-full |
| 262 | * condition. |
| 263 | */ |
| 264 | static int subbuf_start_handler(struct rchan_buf *buf, void *subbuf, |
| 265 | void *prev_subbuf, size_t prev_padding) |
| 266 | { |
| 267 | if (relay_buf_full(buf)) { |
| 268 | if (!suspended) { |
| 269 | suspended = 1; |
| 270 | printk(KERN_ERR MODULE_NAME |
| 271 | ": cpu %d buffer full!!!\n", |
| 272 | smp_processor_id()); |
| 273 | } |
| 274 | return 0; |
| 275 | } else if (suspended) { |
| 276 | suspended = 0; |
| 277 | printk(KERN_ERR MODULE_NAME |
| 278 | ": cpu %d buffer no longer full.\n", |
| 279 | smp_processor_id()); |
| 280 | } |
| 281 | |
| 282 | return 1; |
| 283 | } |
| 284 | |
| 285 | /* file_create() callback. Creates relay file in debugfs. */ |
| 286 | static struct dentry *create_buf_file_handler(const char *filename, |
| 287 | struct dentry *parent, |
| 288 | int mode, |
| 289 | struct rchan_buf *buf, |
| 290 | int *is_global) |
| 291 | { |
| 292 | struct dentry *buf_file; |
| 293 | |
| 294 | mmio_fops.read = relay_file_operations.read; |
| 295 | mmio_fops.open = relay_file_operations.open; |
| 296 | mmio_fops.poll = relay_file_operations.poll; |
| 297 | mmio_fops.mmap = relay_file_operations.mmap; |
| 298 | mmio_fops.release = relay_file_operations.release; |
| 299 | mmio_fops.splice_read = relay_file_operations.splice_read; |
| 300 | |
| 301 | buf_file = debugfs_create_file(filename, mode, parent, buf, |
| 302 | &mmio_fops); |
| 303 | |
| 304 | return buf_file; |
| 305 | } |
| 306 | |
| 307 | /* file_remove() default callback. Removes relay file in debugfs. */ |
| 308 | static int remove_buf_file_handler(struct dentry *dentry) |
| 309 | { |
| 310 | debugfs_remove(dentry); |
| 311 | return 0; |
| 312 | } |
| 313 | |
| 314 | static struct rchan_callbacks relay_callbacks = { |
| 315 | .subbuf_start = subbuf_start_handler, |
| 316 | .create_buf_file = create_buf_file_handler, |
| 317 | .remove_buf_file = remove_buf_file_handler, |
| 318 | }; |
| 319 | |
| 320 | /* |
| 321 | * create_channel - creates channel /debug/APP_DIR/cpuXXX |
| 322 | * Returns channel on success, NULL otherwise |
| 323 | */ |
| 324 | static struct rchan *create_channel(unsigned size, unsigned n) |
| 325 | { |
| 326 | return relay_open("cpu", dir, size, n, &relay_callbacks, NULL); |
| 327 | } |
| 328 | |
| 329 | /* destroy_channel - destroys channel /debug/APP_DIR/cpuXXX */ |
| 330 | static void destroy_channel(void) |
| 331 | { |
| 332 | if (chan) { |
| 333 | relay_close(chan); |
| 334 | chan = NULL; |
| 335 | } |
| 336 | } |
| 337 | |
| 338 | struct remap_trace { |
| 339 | struct list_head list; |
| 340 | struct kmmio_probe probe; |
| 341 | }; |
| 342 | static LIST_HEAD(trace_list); |
| 343 | static DEFINE_SPINLOCK(trace_list_lock); |
| 344 | |
| 345 | static void do_ioremap_trace_core(unsigned long offset, unsigned long size, |
| 346 | void __iomem *addr) |
| 347 | { |
| 348 | struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL); |
| 349 | struct mm_io_header_map event = { |
| 350 | .header = { |
| 351 | .type = MMIO_MAGIC | |
| 352 | (MMIO_PROBE << MMIO_OPCODE_SHIFT), |
| 353 | .sec = 0, |
| 354 | .nsec = 0, |
| 355 | .pid = 0, |
| 356 | .data_len = sizeof(struct mm_io_map) |
| 357 | }, |
| 358 | .map = { |
| 359 | .phys = offset, |
| 360 | .addr = (unsigned long)addr, |
| 361 | .len = size, |
| 362 | .pc = 0 |
| 363 | } |
| 364 | }; |
| 365 | record_timestamp(&event.header); |
| 366 | |
| 367 | *trace = (struct remap_trace) { |
| 368 | .probe = { |
| 369 | .addr = (unsigned long)addr, |
| 370 | .len = size, |
| 371 | .pre_handler = pre, |
| 372 | .post_handler = post, |
| 373 | } |
| 374 | }; |
| 375 | |
| 376 | relay_write(chan, &event, sizeof(event)); |
| 377 | spin_lock(&trace_list_lock); |
| 378 | list_add_tail(&trace->list, &trace_list); |
| 379 | spin_unlock(&trace_list_lock); |
| 380 | if (!nommiotrace) |
| 381 | register_kmmio_probe(&trace->probe); |
| 382 | } |
| 383 | |
| 384 | static void ioremap_trace_core(unsigned long offset, unsigned long size, |
| 385 | void __iomem *addr) |
| 386 | { |
| 387 | if ((filter_offset) && (offset != filter_offset)) |
| 388 | return; |
| 389 | |
| 390 | /* Don't trace the low PCI/ISA area, it's always mapped.. */ |
| 391 | if (!ISA_trace && (offset < ISA_END_ADDRESS) && |
| 392 | (offset + size > ISA_START_ADDRESS)) { |
| 393 | printk(KERN_NOTICE MODULE_NAME ": Ignoring map of low " |
| 394 | "PCI/ISA area (0x%lx-0x%lx)\n", |
| 395 | offset, offset + size); |
| 396 | return; |
| 397 | } |
| 398 | do_ioremap_trace_core(offset, size, addr); |
| 399 | } |
| 400 | |
| 401 | void __iomem *ioremap_cache_trace(unsigned long offset, unsigned long size) |
| 402 | { |
| 403 | void __iomem *p = ioremap_cache(offset, size); |
| 404 | printk(KERN_DEBUG MODULE_NAME ": ioremap_cache(0x%lx, 0x%lx) = %p\n", |
| 405 | offset, size, p); |
| 406 | ioremap_trace_core(offset, size, p); |
| 407 | return p; |
| 408 | } |
| 409 | EXPORT_SYMBOL(ioremap_cache_trace); |
| 410 | |
| 411 | void __iomem *ioremap_nocache_trace(unsigned long offset, unsigned long size) |
| 412 | { |
| 413 | void __iomem *p = ioremap_nocache(offset, size); |
| 414 | printk(KERN_DEBUG MODULE_NAME ": ioremap_nocache(0x%lx, 0x%lx) = %p\n", |
| 415 | offset, size, p); |
| 416 | ioremap_trace_core(offset, size, p); |
| 417 | return p; |
| 418 | } |
| 419 | EXPORT_SYMBOL(ioremap_nocache_trace); |
| 420 | |
| 421 | void iounmap_trace(volatile void __iomem *addr) |
| 422 | { |
| 423 | struct mm_io_header_map event = { |
| 424 | .header = { |
| 425 | .type = MMIO_MAGIC | |
| 426 | (MMIO_UNPROBE << MMIO_OPCODE_SHIFT), |
| 427 | .sec = 0, |
| 428 | .nsec = 0, |
| 429 | .pid = 0, |
| 430 | .data_len = sizeof(struct mm_io_map) |
| 431 | }, |
| 432 | .map = { |
| 433 | .phys = 0, |
| 434 | .addr = (unsigned long)addr, |
| 435 | .len = 0, |
| 436 | .pc = 0 |
| 437 | } |
| 438 | }; |
| 439 | struct remap_trace *trace; |
| 440 | struct remap_trace *tmp; |
| 441 | printk(KERN_DEBUG MODULE_NAME ": Unmapping %p.\n", addr); |
| 442 | record_timestamp(&event.header); |
| 443 | |
| 444 | spin_lock(&trace_list_lock); |
| 445 | list_for_each_entry_safe(trace, tmp, &trace_list, list) { |
| 446 | if ((unsigned long)addr == trace->probe.addr) { |
| 447 | if (!nommiotrace) |
| 448 | unregister_kmmio_probe(&trace->probe); |
| 449 | list_del(&trace->list); |
| 450 | kfree(trace); |
| 451 | break; |
| 452 | } |
| 453 | } |
| 454 | spin_unlock(&trace_list_lock); |
| 455 | relay_write(chan, &event, sizeof(event)); |
| 456 | iounmap(addr); |
| 457 | } |
| 458 | EXPORT_SYMBOL(iounmap_trace); |
| 459 | |
| 460 | static void clear_trace_list(void) |
| 461 | { |
| 462 | struct remap_trace *trace; |
| 463 | struct remap_trace *tmp; |
| 464 | |
| 465 | spin_lock(&trace_list_lock); |
| 466 | list_for_each_entry_safe(trace, tmp, &trace_list, list) { |
| 467 | printk(KERN_WARNING MODULE_NAME ": purging non-iounmapped " |
| 468 | "trace @0x%08lx, size 0x%lx.\n", |
| 469 | trace->probe.addr, trace->probe.len); |
| 470 | if (!nommiotrace) |
| 471 | unregister_kmmio_probe(&trace->probe); |
| 472 | list_del(&trace->list); |
| 473 | kfree(trace); |
| 474 | break; |
| 475 | } |
| 476 | spin_unlock(&trace_list_lock); |
| 477 | } |
| 478 | |
| 479 | static int __init init(void) |
| 480 | { |
| 481 | if (n_subbufs < 2) |
| 482 | return -EINVAL; |
| 483 | |
| 484 | dir = debugfs_create_dir(APP_DIR, NULL); |
| 485 | if (!dir) { |
| 486 | printk(KERN_ERR MODULE_NAME |
| 487 | ": Couldn't create relay app directory.\n"); |
| 488 | return -ENOMEM; |
| 489 | } |
| 490 | |
| 491 | chan = create_channel(subbuf_size, n_subbufs); |
| 492 | if (!chan) { |
| 493 | debugfs_remove(dir); |
| 494 | printk(KERN_ERR MODULE_NAME |
| 495 | ": relay app channel creation failed\n"); |
| 496 | return -ENOMEM; |
| 497 | } |
| 498 | |
| 499 | init_kmmio(); |
| 500 | |
| 501 | proc_marker_file = create_proc_entry(MARKER_FILE, 0, NULL); |
| 502 | if (proc_marker_file) |
| 503 | proc_marker_file->write_proc = write_marker; |
| 504 | |
| 505 | printk(KERN_DEBUG MODULE_NAME ": loaded.\n"); |
| 506 | if (nommiotrace) |
| 507 | printk(KERN_DEBUG MODULE_NAME ": MMIO tracing disabled.\n"); |
| 508 | if (ISA_trace) |
| 509 | printk(KERN_WARNING MODULE_NAME |
| 510 | ": Warning! low ISA range will be traced.\n"); |
| 511 | return 0; |
| 512 | } |
| 513 | |
| 514 | static void __exit cleanup(void) |
| 515 | { |
| 516 | printk(KERN_DEBUG MODULE_NAME ": unload...\n"); |
| 517 | clear_trace_list(); |
| 518 | cleanup_kmmio(); |
| 519 | remove_proc_entry(MARKER_FILE, NULL); |
| 520 | destroy_channel(); |
| 521 | if (dir) |
| 522 | debugfs_remove(dir); |
| 523 | } |
| 524 | |
| 525 | module_init(init); |
| 526 | module_exit(cleanup); |
| 527 | MODULE_LICENSE("GPL"); |