Feng(Eric) Liu | d4c9ff2 | 2008-04-10 08:47:53 -0400 | [diff] [blame] | 1 | /* |
| 2 | * kvm trace |
| 3 | * |
| 4 | * It is designed to allow debugging traces of kvm to be generated |
| 5 | * on UP / SMP machines. Each trace entry can be timestamped so that |
| 6 | * it's possible to reconstruct a chronological record of trace events. |
| 7 | * The implementation refers to blktrace kernel support. |
| 8 | * |
| 9 | * Copyright (c) 2008 Intel Corporation |
| 10 | * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk> |
| 11 | * |
| 12 | * Authors: Feng(Eric) Liu, eric.e.liu@intel.com |
| 13 | * |
| 14 | * Date: Feb 2008 |
| 15 | */ |
| 16 | |
| 17 | #include <linux/module.h> |
| 18 | #include <linux/relay.h> |
| 19 | #include <linux/debugfs.h> |
| 20 | |
| 21 | #include <linux/kvm_host.h> |
| 22 | |
| 23 | #define KVM_TRACE_STATE_RUNNING (1 << 0) |
| 24 | #define KVM_TRACE_STATE_PAUSE (1 << 1) |
| 25 | #define KVM_TRACE_STATE_CLEARUP (1 << 2) |
| 26 | |
| 27 | struct kvm_trace { |
| 28 | int trace_state; |
| 29 | struct rchan *rchan; |
| 30 | struct dentry *lost_file; |
| 31 | atomic_t lost_records; |
| 32 | }; |
| 33 | static struct kvm_trace *kvm_trace; |
| 34 | |
| 35 | struct kvm_trace_probe { |
| 36 | const char *name; |
| 37 | const char *format; |
| 38 | u32 cycle_in; |
| 39 | marker_probe_func *probe_func; |
| 40 | }; |
| 41 | |
| 42 | static inline int calc_rec_size(int cycle, int extra) |
| 43 | { |
| 44 | int rec_size = KVM_TRC_HEAD_SIZE; |
| 45 | |
| 46 | rec_size += extra; |
| 47 | return cycle ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size; |
| 48 | } |
| 49 | |
| 50 | static void kvm_add_trace(void *probe_private, void *call_data, |
| 51 | const char *format, va_list *args) |
| 52 | { |
| 53 | struct kvm_trace_probe *p = probe_private; |
| 54 | struct kvm_trace *kt = kvm_trace; |
| 55 | struct kvm_trace_rec rec; |
| 56 | struct kvm_vcpu *vcpu; |
| 57 | int i, extra, size; |
| 58 | |
| 59 | if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING)) |
| 60 | return; |
| 61 | |
| 62 | rec.event = va_arg(*args, u32); |
| 63 | vcpu = va_arg(*args, struct kvm_vcpu *); |
| 64 | rec.pid = current->tgid; |
| 65 | rec.vcpu_id = vcpu->vcpu_id; |
| 66 | |
| 67 | extra = va_arg(*args, u32); |
| 68 | WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX)); |
| 69 | extra = min_t(u32, extra, KVM_TRC_EXTRA_MAX); |
| 70 | rec.extra_u32 = extra; |
| 71 | |
| 72 | rec.cycle_in = p->cycle_in; |
| 73 | |
| 74 | if (rec.cycle_in) { |
| 75 | u64 cycle = 0; |
| 76 | |
| 77 | cycle = get_cycles(); |
| 78 | rec.u.cycle.cycle_lo = (u32)cycle; |
| 79 | rec.u.cycle.cycle_hi = (u32)(cycle >> 32); |
| 80 | |
| 81 | for (i = 0; i < rec.extra_u32; i++) |
| 82 | rec.u.cycle.extra_u32[i] = va_arg(*args, u32); |
| 83 | } else { |
| 84 | for (i = 0; i < rec.extra_u32; i++) |
| 85 | rec.u.nocycle.extra_u32[i] = va_arg(*args, u32); |
| 86 | } |
| 87 | |
| 88 | size = calc_rec_size(rec.cycle_in, rec.extra_u32 * sizeof(u32)); |
| 89 | relay_write(kt->rchan, &rec, size); |
| 90 | } |
| 91 | |
| 92 | static struct kvm_trace_probe kvm_trace_probes[] = { |
| 93 | { "kvm_trace_entryexit", "%u %p %u %u %u %u %u %u", 1, kvm_add_trace }, |
| 94 | { "kvm_trace_handler", "%u %p %u %u %u %u %u %u", 0, kvm_add_trace }, |
| 95 | }; |
| 96 | |
| 97 | static int lost_records_get(void *data, u64 *val) |
| 98 | { |
| 99 | struct kvm_trace *kt = data; |
| 100 | |
| 101 | *val = atomic_read(&kt->lost_records); |
| 102 | return 0; |
| 103 | } |
| 104 | |
| 105 | DEFINE_SIMPLE_ATTRIBUTE(kvm_trace_lost_ops, lost_records_get, NULL, "%llu\n"); |
| 106 | |
| 107 | /* |
| 108 | * The relay channel is used in "no-overwrite" mode, it keeps trace of how |
| 109 | * many times we encountered a full subbuffer, to tell user space app the |
| 110 | * lost records there were. |
| 111 | */ |
| 112 | static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, |
| 113 | void *prev_subbuf, size_t prev_padding) |
| 114 | { |
| 115 | struct kvm_trace *kt; |
| 116 | |
| 117 | if (!relay_buf_full(buf)) |
| 118 | return 1; |
| 119 | |
| 120 | kt = buf->chan->private_data; |
| 121 | atomic_inc(&kt->lost_records); |
| 122 | |
| 123 | return 0; |
| 124 | } |
| 125 | |
| 126 | static struct dentry *kvm_create_buf_file_callack(const char *filename, |
| 127 | struct dentry *parent, |
| 128 | int mode, |
| 129 | struct rchan_buf *buf, |
| 130 | int *is_global) |
| 131 | { |
| 132 | return debugfs_create_file(filename, mode, parent, buf, |
| 133 | &relay_file_operations); |
| 134 | } |
| 135 | |
| 136 | static int kvm_remove_buf_file_callback(struct dentry *dentry) |
| 137 | { |
| 138 | debugfs_remove(dentry); |
| 139 | return 0; |
| 140 | } |
| 141 | |
| 142 | static struct rchan_callbacks kvm_relay_callbacks = { |
| 143 | .subbuf_start = kvm_subbuf_start_callback, |
| 144 | .create_buf_file = kvm_create_buf_file_callack, |
| 145 | .remove_buf_file = kvm_remove_buf_file_callback, |
| 146 | }; |
| 147 | |
| 148 | static int do_kvm_trace_enable(struct kvm_user_trace_setup *kuts) |
| 149 | { |
| 150 | struct kvm_trace *kt; |
| 151 | int i, r = -ENOMEM; |
| 152 | |
| 153 | if (!kuts->buf_size || !kuts->buf_nr) |
| 154 | return -EINVAL; |
| 155 | |
| 156 | kt = kzalloc(sizeof(*kt), GFP_KERNEL); |
| 157 | if (!kt) |
| 158 | goto err; |
| 159 | |
| 160 | r = -EIO; |
| 161 | atomic_set(&kt->lost_records, 0); |
Hollis Blanchard | 76f7c87 | 2008-04-15 16:05:42 -0500 | [diff] [blame^] | 162 | kt->lost_file = debugfs_create_file("lost_records", 0444, kvm_debugfs_dir, |
Feng(Eric) Liu | d4c9ff2 | 2008-04-10 08:47:53 -0400 | [diff] [blame] | 163 | kt, &kvm_trace_lost_ops); |
| 164 | if (!kt->lost_file) |
| 165 | goto err; |
| 166 | |
Hollis Blanchard | 76f7c87 | 2008-04-15 16:05:42 -0500 | [diff] [blame^] | 167 | kt->rchan = relay_open("trace", kvm_debugfs_dir, kuts->buf_size, |
Feng(Eric) Liu | d4c9ff2 | 2008-04-10 08:47:53 -0400 | [diff] [blame] | 168 | kuts->buf_nr, &kvm_relay_callbacks, kt); |
| 169 | if (!kt->rchan) |
| 170 | goto err; |
| 171 | |
| 172 | kvm_trace = kt; |
| 173 | |
| 174 | for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) { |
| 175 | struct kvm_trace_probe *p = &kvm_trace_probes[i]; |
| 176 | |
| 177 | r = marker_probe_register(p->name, p->format, p->probe_func, p); |
| 178 | if (r) |
| 179 | printk(KERN_INFO "Unable to register probe %s\n", |
| 180 | p->name); |
| 181 | } |
| 182 | |
| 183 | kvm_trace->trace_state = KVM_TRACE_STATE_RUNNING; |
| 184 | |
| 185 | return 0; |
| 186 | err: |
| 187 | if (kt) { |
| 188 | if (kt->lost_file) |
| 189 | debugfs_remove(kt->lost_file); |
| 190 | if (kt->rchan) |
| 191 | relay_close(kt->rchan); |
| 192 | kfree(kt); |
| 193 | } |
| 194 | return r; |
| 195 | } |
| 196 | |
| 197 | static int kvm_trace_enable(char __user *arg) |
| 198 | { |
| 199 | struct kvm_user_trace_setup kuts; |
| 200 | int ret; |
| 201 | |
| 202 | ret = copy_from_user(&kuts, arg, sizeof(kuts)); |
| 203 | if (ret) |
| 204 | return -EFAULT; |
| 205 | |
| 206 | ret = do_kvm_trace_enable(&kuts); |
| 207 | if (ret) |
| 208 | return ret; |
| 209 | |
| 210 | return 0; |
| 211 | } |
| 212 | |
| 213 | static int kvm_trace_pause(void) |
| 214 | { |
| 215 | struct kvm_trace *kt = kvm_trace; |
| 216 | int r = -EINVAL; |
| 217 | |
| 218 | if (kt == NULL) |
| 219 | return r; |
| 220 | |
| 221 | if (kt->trace_state == KVM_TRACE_STATE_RUNNING) { |
| 222 | kt->trace_state = KVM_TRACE_STATE_PAUSE; |
| 223 | relay_flush(kt->rchan); |
| 224 | r = 0; |
| 225 | } |
| 226 | |
| 227 | return r; |
| 228 | } |
| 229 | |
| 230 | void kvm_trace_cleanup(void) |
| 231 | { |
| 232 | struct kvm_trace *kt = kvm_trace; |
| 233 | int i; |
| 234 | |
| 235 | if (kt == NULL) |
| 236 | return; |
| 237 | |
| 238 | if (kt->trace_state == KVM_TRACE_STATE_RUNNING || |
| 239 | kt->trace_state == KVM_TRACE_STATE_PAUSE) { |
| 240 | |
| 241 | kt->trace_state = KVM_TRACE_STATE_CLEARUP; |
| 242 | |
| 243 | for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) { |
| 244 | struct kvm_trace_probe *p = &kvm_trace_probes[i]; |
| 245 | marker_probe_unregister(p->name, p->probe_func, p); |
| 246 | } |
| 247 | |
| 248 | relay_close(kt->rchan); |
| 249 | debugfs_remove(kt->lost_file); |
| 250 | kfree(kt); |
| 251 | } |
| 252 | } |
| 253 | |
| 254 | int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg) |
| 255 | { |
| 256 | void __user *argp = (void __user *)arg; |
| 257 | long r = -EINVAL; |
| 258 | |
| 259 | if (!capable(CAP_SYS_ADMIN)) |
| 260 | return -EPERM; |
| 261 | |
| 262 | switch (ioctl) { |
| 263 | case KVM_TRACE_ENABLE: |
| 264 | r = kvm_trace_enable(argp); |
| 265 | break; |
| 266 | case KVM_TRACE_PAUSE: |
| 267 | r = kvm_trace_pause(); |
| 268 | break; |
| 269 | case KVM_TRACE_DISABLE: |
| 270 | r = 0; |
| 271 | kvm_trace_cleanup(); |
| 272 | break; |
| 273 | } |
| 274 | |
| 275 | return r; |
| 276 | } |