Michael Kelley | fd1fea6 | 2019-07-01 04:25:56 +0000 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | |
| 3 | /* |
| 4 | * Clocksource driver for the synthetic counter and timers |
| 5 | * provided by the Hyper-V hypervisor to guest VMs, as described |
| 6 | * in the Hyper-V Top Level Functional Spec (TLFS). This driver |
| 7 | * is instruction set architecture independent. |
| 8 | * |
| 9 | * Copyright (C) 2019, Microsoft, Inc. |
| 10 | * |
| 11 | * Author: Michael Kelley <mikelley@microsoft.com> |
| 12 | */ |
| 13 | |
| 14 | #include <linux/percpu.h> |
| 15 | #include <linux/cpumask.h> |
| 16 | #include <linux/clockchips.h> |
Michael Kelley | dd2cb34 | 2019-07-01 04:26:06 +0000 | [diff] [blame] | 17 | #include <linux/clocksource.h> |
| 18 | #include <linux/sched_clock.h> |
Michael Kelley | fd1fea6 | 2019-07-01 04:25:56 +0000 | [diff] [blame] | 19 | #include <linux/mm.h> |
Michael Kelley | 4df4cb9e9 | 2019-11-13 01:11:49 +0000 | [diff] [blame] | 20 | #include <linux/cpuhotplug.h> |
Michael Kelley | ec866be6 | 2021-03-02 13:38:22 -0800 | [diff] [blame] | 21 | #include <linux/interrupt.h> |
| 22 | #include <linux/irq.h> |
| 23 | #include <linux/acpi.h> |
Michael Kelley | fd1fea6 | 2019-07-01 04:25:56 +0000 | [diff] [blame] | 24 | #include <clocksource/hyperv_timer.h> |
| 25 | #include <asm/hyperv-tlfs.h> |
| 26 | #include <asm/mshyperv.h> |
| 27 | |
| 28 | static struct clock_event_device __percpu *hv_clock_event; |
Tianyu Lan | bd00cd52 | 2019-08-14 20:32:16 +0800 | [diff] [blame] | 29 | static u64 hv_sched_clock_offset __ro_after_init; |
Michael Kelley | fd1fea6 | 2019-07-01 04:25:56 +0000 | [diff] [blame] | 30 | |
| 31 | /* |
| 32 | * If false, we're using the old mechanism for stimer0 interrupts |
| 33 | * where it sends a VMbus message when it expires. The old |
| 34 | * mechanism is used when running on older versions of Hyper-V |
| 35 | * that don't support Direct Mode. While Hyper-V provides |
| 36 | * four stimer's per CPU, Linux uses only stimer0. |
Michael Kelley | 4df4cb9e9 | 2019-11-13 01:11:49 +0000 | [diff] [blame] | 37 | * |
| 38 | * Because Direct Mode does not require processing a VMbus |
| 39 | * message, stimer interrupts can be enabled earlier in the |
| 40 | * process of booting a CPU, and consistent with when timer |
| 41 | * interrupts are enabled for other clocksource drivers. |
| 42 | * However, for legacy versions of Hyper-V when Direct Mode |
| 43 | * is not enabled, setting up stimer interrupts must be |
| 44 | * delayed until VMbus is initialized and can process the |
| 45 | * interrupt message. |
Michael Kelley | fd1fea6 | 2019-07-01 04:25:56 +0000 | [diff] [blame] | 46 | */ |
| 47 | static bool direct_mode_enabled; |
| 48 | |
Michael Kelley | ec866be6 | 2021-03-02 13:38:22 -0800 | [diff] [blame] | 49 | static int stimer0_irq = -1; |
Michael Kelley | fd1fea6 | 2019-07-01 04:25:56 +0000 | [diff] [blame] | 50 | static int stimer0_message_sint; |
Michael Kelley | ec866be6 | 2021-03-02 13:38:22 -0800 | [diff] [blame] | 51 | static DEFINE_PER_CPU(long, stimer0_evt); |
Michael Kelley | fd1fea6 | 2019-07-01 04:25:56 +0000 | [diff] [blame] | 52 | |
| 53 | /* |
Michael Kelley | ec866be6 | 2021-03-02 13:38:22 -0800 | [diff] [blame] | 54 | * Common code for stimer0 interrupts coming via Direct Mode or |
| 55 | * as a VMbus message. |
Michael Kelley | fd1fea6 | 2019-07-01 04:25:56 +0000 | [diff] [blame] | 56 | */ |
| 57 | void hv_stimer0_isr(void) |
| 58 | { |
| 59 | struct clock_event_device *ce; |
| 60 | |
| 61 | ce = this_cpu_ptr(hv_clock_event); |
| 62 | ce->event_handler(ce); |
| 63 | } |
| 64 | EXPORT_SYMBOL_GPL(hv_stimer0_isr); |
| 65 | |
Michael Kelley | ec866be6 | 2021-03-02 13:38:22 -0800 | [diff] [blame] | 66 | /* |
| 67 | * stimer0 interrupt handler for architectures that support |
| 68 | * per-cpu interrupts, which also implies Direct Mode. |
| 69 | */ |
| 70 | static irqreturn_t hv_stimer0_percpu_isr(int irq, void *dev_id) |
| 71 | { |
| 72 | hv_stimer0_isr(); |
| 73 | return IRQ_HANDLED; |
| 74 | } |
| 75 | |
Michael Kelley | fd1fea6 | 2019-07-01 04:25:56 +0000 | [diff] [blame] | 76 | static int hv_ce_set_next_event(unsigned long delta, |
| 77 | struct clock_event_device *evt) |
| 78 | { |
| 79 | u64 current_tick; |
| 80 | |
Andrea Parri | 0af3e13 | 2020-01-09 17:06:49 +0100 | [diff] [blame] | 81 | current_tick = hv_read_reference_counter(); |
Michael Kelley | fd1fea6 | 2019-07-01 04:25:56 +0000 | [diff] [blame] | 82 | current_tick += delta; |
Michael Kelley | f3c5e63 | 2021-03-02 13:38:15 -0800 | [diff] [blame] | 83 | hv_set_register(HV_REGISTER_STIMER0_COUNT, current_tick); |
Michael Kelley | fd1fea6 | 2019-07-01 04:25:56 +0000 | [diff] [blame] | 84 | return 0; |
| 85 | } |
| 86 | |
| 87 | static int hv_ce_shutdown(struct clock_event_device *evt) |
| 88 | { |
Michael Kelley | f3c5e63 | 2021-03-02 13:38:15 -0800 | [diff] [blame] | 89 | hv_set_register(HV_REGISTER_STIMER0_COUNT, 0); |
| 90 | hv_set_register(HV_REGISTER_STIMER0_CONFIG, 0); |
Michael Kelley | ec866be6 | 2021-03-02 13:38:22 -0800 | [diff] [blame] | 91 | if (direct_mode_enabled && stimer0_irq >= 0) |
| 92 | disable_percpu_irq(stimer0_irq); |
Michael Kelley | fd1fea6 | 2019-07-01 04:25:56 +0000 | [diff] [blame] | 93 | |
| 94 | return 0; |
| 95 | } |
| 96 | |
| 97 | static int hv_ce_set_oneshot(struct clock_event_device *evt) |
| 98 | { |
| 99 | union hv_stimer_config timer_cfg; |
| 100 | |
| 101 | timer_cfg.as_uint64 = 0; |
| 102 | timer_cfg.enable = 1; |
| 103 | timer_cfg.auto_enable = 1; |
| 104 | if (direct_mode_enabled) { |
| 105 | /* |
| 106 | * When it expires, the timer will directly interrupt |
| 107 | * on the specified hardware vector/IRQ. |
| 108 | */ |
| 109 | timer_cfg.direct_mode = 1; |
Michael Kelley | ec866be6 | 2021-03-02 13:38:22 -0800 | [diff] [blame] | 110 | timer_cfg.apic_vector = HYPERV_STIMER0_VECTOR; |
| 111 | if (stimer0_irq >= 0) |
| 112 | enable_percpu_irq(stimer0_irq, IRQ_TYPE_NONE); |
Michael Kelley | fd1fea6 | 2019-07-01 04:25:56 +0000 | [diff] [blame] | 113 | } else { |
| 114 | /* |
| 115 | * When it expires, the timer will generate a VMbus message, |
| 116 | * to be handled by the normal VMbus interrupt handler. |
| 117 | */ |
| 118 | timer_cfg.direct_mode = 0; |
| 119 | timer_cfg.sintx = stimer0_message_sint; |
| 120 | } |
Michael Kelley | f3c5e63 | 2021-03-02 13:38:15 -0800 | [diff] [blame] | 121 | hv_set_register(HV_REGISTER_STIMER0_CONFIG, timer_cfg.as_uint64); |
Michael Kelley | fd1fea6 | 2019-07-01 04:25:56 +0000 | [diff] [blame] | 122 | return 0; |
| 123 | } |
| 124 | |
| 125 | /* |
| 126 | * hv_stimer_init - Per-cpu initialization of the clockevent |
| 127 | */ |
Michael Kelley | 4df4cb9e9 | 2019-11-13 01:11:49 +0000 | [diff] [blame] | 128 | static int hv_stimer_init(unsigned int cpu) |
Michael Kelley | fd1fea6 | 2019-07-01 04:25:56 +0000 | [diff] [blame] | 129 | { |
| 130 | struct clock_event_device *ce; |
| 131 | |
Michael Kelley | 4df4cb9e9 | 2019-11-13 01:11:49 +0000 | [diff] [blame] | 132 | if (!hv_clock_event) |
| 133 | return 0; |
Michael Kelley | fd1fea6 | 2019-07-01 04:25:56 +0000 | [diff] [blame] | 134 | |
| 135 | ce = per_cpu_ptr(hv_clock_event, cpu); |
| 136 | ce->name = "Hyper-V clockevent"; |
| 137 | ce->features = CLOCK_EVT_FEAT_ONESHOT; |
| 138 | ce->cpumask = cpumask_of(cpu); |
| 139 | ce->rating = 1000; |
| 140 | ce->set_state_shutdown = hv_ce_shutdown; |
| 141 | ce->set_state_oneshot = hv_ce_set_oneshot; |
| 142 | ce->set_next_event = hv_ce_set_next_event; |
| 143 | |
| 144 | clockevents_config_and_register(ce, |
| 145 | HV_CLOCK_HZ, |
| 146 | HV_MIN_DELTA_TICKS, |
| 147 | HV_MAX_MAX_DELTA_TICKS); |
Michael Kelley | 4df4cb9e9 | 2019-11-13 01:11:49 +0000 | [diff] [blame] | 148 | return 0; |
Michael Kelley | fd1fea6 | 2019-07-01 04:25:56 +0000 | [diff] [blame] | 149 | } |
Michael Kelley | fd1fea6 | 2019-07-01 04:25:56 +0000 | [diff] [blame] | 150 | |
| 151 | /* |
| 152 | * hv_stimer_cleanup - Per-cpu cleanup of the clockevent |
| 153 | */ |
Michael Kelley | 4df4cb9e9 | 2019-11-13 01:11:49 +0000 | [diff] [blame] | 154 | int hv_stimer_cleanup(unsigned int cpu) |
Michael Kelley | fd1fea6 | 2019-07-01 04:25:56 +0000 | [diff] [blame] | 155 | { |
| 156 | struct clock_event_device *ce; |
| 157 | |
Michael Kelley | 4df4cb9e9 | 2019-11-13 01:11:49 +0000 | [diff] [blame] | 158 | if (!hv_clock_event) |
| 159 | return 0; |
| 160 | |
| 161 | /* |
| 162 | * In the legacy case where Direct Mode is not enabled |
| 163 | * (which can only be on x86/64), stimer cleanup happens |
| 164 | * relatively early in the CPU offlining process. We |
| 165 | * must unbind the stimer-based clockevent device so |
| 166 | * that the LAPIC timer can take over until clockevents |
| 167 | * are no longer needed in the offlining process. Note |
| 168 | * that clockevents_unbind_device() eventually calls |
| 169 | * hv_ce_shutdown(). |
| 170 | * |
| 171 | * The unbind should not be done when Direct Mode is |
| 172 | * enabled because we may be on an architecture where |
| 173 | * there are no other clockevent devices to fallback to. |
| 174 | */ |
| 175 | ce = per_cpu_ptr(hv_clock_event, cpu); |
| 176 | if (direct_mode_enabled) |
Michael Kelley | fd1fea6 | 2019-07-01 04:25:56 +0000 | [diff] [blame] | 177 | hv_ce_shutdown(ce); |
Michael Kelley | 4df4cb9e9 | 2019-11-13 01:11:49 +0000 | [diff] [blame] | 178 | else |
| 179 | clockevents_unbind_device(ce, cpu); |
| 180 | |
| 181 | return 0; |
Michael Kelley | fd1fea6 | 2019-07-01 04:25:56 +0000 | [diff] [blame] | 182 | } |
| 183 | EXPORT_SYMBOL_GPL(hv_stimer_cleanup); |
| 184 | |
Michael Kelley | ec866be6 | 2021-03-02 13:38:22 -0800 | [diff] [blame] | 185 | /* |
| 186 | * These placeholders are overridden by arch specific code on |
| 187 | * architectures that need special setup of the stimer0 IRQ because |
| 188 | * they don't support per-cpu IRQs (such as x86/x64). |
| 189 | */ |
| 190 | void __weak hv_setup_stimer0_handler(void (*handler)(void)) |
Michael Kelley | fd1fea6 | 2019-07-01 04:25:56 +0000 | [diff] [blame] | 191 | { |
Michael Kelley | ec866be6 | 2021-03-02 13:38:22 -0800 | [diff] [blame] | 192 | }; |
| 193 | |
| 194 | void __weak hv_remove_stimer0_handler(void) |
| 195 | { |
| 196 | }; |
| 197 | |
| 198 | /* Called only on architectures with per-cpu IRQs (i.e., not x86/x64) */ |
| 199 | static int hv_setup_stimer0_irq(void) |
| 200 | { |
| 201 | int ret; |
| 202 | |
| 203 | ret = acpi_register_gsi(NULL, HYPERV_STIMER0_VECTOR, |
| 204 | ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_HIGH); |
| 205 | if (ret < 0) { |
| 206 | pr_err("Can't register Hyper-V stimer0 GSI. Error %d", ret); |
| 207 | return ret; |
| 208 | } |
| 209 | stimer0_irq = ret; |
| 210 | |
| 211 | ret = request_percpu_irq(stimer0_irq, hv_stimer0_percpu_isr, |
| 212 | "Hyper-V stimer0", &stimer0_evt); |
| 213 | if (ret) { |
| 214 | pr_err("Can't request Hyper-V stimer0 IRQ %d. Error %d", |
| 215 | stimer0_irq, ret); |
| 216 | acpi_unregister_gsi(stimer0_irq); |
| 217 | stimer0_irq = -1; |
| 218 | } |
| 219 | return ret; |
| 220 | } |
| 221 | |
| 222 | static void hv_remove_stimer0_irq(void) |
| 223 | { |
| 224 | if (stimer0_irq == -1) { |
| 225 | hv_remove_stimer0_handler(); |
| 226 | } else { |
| 227 | free_percpu_irq(stimer0_irq, &stimer0_evt); |
| 228 | acpi_unregister_gsi(stimer0_irq); |
| 229 | stimer0_irq = -1; |
| 230 | } |
| 231 | } |
| 232 | |
| 233 | /* hv_stimer_alloc - Global initialization of the clockevent and stimer0 */ |
| 234 | int hv_stimer_alloc(bool have_percpu_irqs) |
| 235 | { |
| 236 | int ret; |
Michael Kelley | 4df4cb9e9 | 2019-11-13 01:11:49 +0000 | [diff] [blame] | 237 | |
| 238 | /* |
| 239 | * Synthetic timers are always available except on old versions of |
| 240 | * Hyper-V on x86. In that case, return as error as Linux will use a |
| 241 | * clockevent based on emulated LAPIC timer hardware. |
| 242 | */ |
| 243 | if (!(ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE)) |
| 244 | return -EINVAL; |
Michael Kelley | fd1fea6 | 2019-07-01 04:25:56 +0000 | [diff] [blame] | 245 | |
| 246 | hv_clock_event = alloc_percpu(struct clock_event_device); |
| 247 | if (!hv_clock_event) |
| 248 | return -ENOMEM; |
| 249 | |
| 250 | direct_mode_enabled = ms_hyperv.misc_features & |
| 251 | HV_STIMER_DIRECT_MODE_AVAILABLE; |
Michael Kelley | fd1fea6 | 2019-07-01 04:25:56 +0000 | [diff] [blame] | 252 | |
Michael Kelley | ec866be6 | 2021-03-02 13:38:22 -0800 | [diff] [blame] | 253 | /* |
| 254 | * If Direct Mode isn't enabled, the remainder of the initialization |
| 255 | * is done later by hv_stimer_legacy_init() |
| 256 | */ |
| 257 | if (!direct_mode_enabled) |
| 258 | return 0; |
| 259 | |
| 260 | if (have_percpu_irqs) { |
| 261 | ret = hv_setup_stimer0_irq(); |
| 262 | if (ret) |
| 263 | goto free_clock_event; |
| 264 | } else { |
| 265 | hv_setup_stimer0_handler(hv_stimer0_isr); |
| 266 | } |
| 267 | |
| 268 | /* |
| 269 | * Since we are in Direct Mode, stimer initialization |
| 270 | * can be done now with a CPUHP value in the same range |
| 271 | * as other clockevent devices. |
| 272 | */ |
| 273 | ret = cpuhp_setup_state(CPUHP_AP_HYPERV_TIMER_STARTING, |
| 274 | "clockevents/hyperv/stimer:starting", |
| 275 | hv_stimer_init, hv_stimer_cleanup); |
| 276 | if (ret < 0) { |
| 277 | hv_remove_stimer0_irq(); |
| 278 | goto free_clock_event; |
Michael Kelley | 4df4cb9e9 | 2019-11-13 01:11:49 +0000 | [diff] [blame] | 279 | } |
| 280 | return ret; |
| 281 | |
Michael Kelley | ec866be6 | 2021-03-02 13:38:22 -0800 | [diff] [blame] | 282 | free_clock_event: |
Michael Kelley | 4df4cb9e9 | 2019-11-13 01:11:49 +0000 | [diff] [blame] | 283 | free_percpu(hv_clock_event); |
| 284 | hv_clock_event = NULL; |
| 285 | return ret; |
Michael Kelley | fd1fea6 | 2019-07-01 04:25:56 +0000 | [diff] [blame] | 286 | } |
| 287 | EXPORT_SYMBOL_GPL(hv_stimer_alloc); |
| 288 | |
Michael Kelley | 4df4cb9e9 | 2019-11-13 01:11:49 +0000 | [diff] [blame] | 289 | /* |
| 290 | * hv_stimer_legacy_init -- Called from the VMbus driver to handle |
| 291 | * the case when Direct Mode is not enabled, and the stimer |
| 292 | * must be initialized late in the CPU onlining process. |
| 293 | * |
| 294 | */ |
| 295 | void hv_stimer_legacy_init(unsigned int cpu, int sint) |
| 296 | { |
| 297 | if (direct_mode_enabled) |
| 298 | return; |
| 299 | |
| 300 | /* |
| 301 | * This function gets called by each vCPU, so setting the |
| 302 | * global stimer_message_sint value each time is conceptually |
| 303 | * not ideal, but the value passed in is always the same and |
| 304 | * it avoids introducing yet another interface into this |
| 305 | * clocksource driver just to set the sint in the legacy case. |
| 306 | */ |
| 307 | stimer0_message_sint = sint; |
| 308 | (void)hv_stimer_init(cpu); |
| 309 | } |
| 310 | EXPORT_SYMBOL_GPL(hv_stimer_legacy_init); |
| 311 | |
| 312 | /* |
| 313 | * hv_stimer_legacy_cleanup -- Called from the VMbus driver to |
| 314 | * handle the case when Direct Mode is not enabled, and the |
| 315 | * stimer must be cleaned up early in the CPU offlining |
| 316 | * process. |
| 317 | */ |
| 318 | void hv_stimer_legacy_cleanup(unsigned int cpu) |
| 319 | { |
| 320 | if (direct_mode_enabled) |
| 321 | return; |
| 322 | (void)hv_stimer_cleanup(cpu); |
| 323 | } |
| 324 | EXPORT_SYMBOL_GPL(hv_stimer_legacy_cleanup); |
| 325 | |
Michael Kelley | fd1fea6 | 2019-07-01 04:25:56 +0000 | [diff] [blame] | 326 | /* |
| 327 | * Do a global cleanup of clockevents for the cases of kexec and |
| 328 | * vmbus exit |
| 329 | */ |
| 330 | void hv_stimer_global_cleanup(void) |
| 331 | { |
| 332 | int cpu; |
Michael Kelley | fd1fea6 | 2019-07-01 04:25:56 +0000 | [diff] [blame] | 333 | |
Michael Kelley | 4df4cb9e9 | 2019-11-13 01:11:49 +0000 | [diff] [blame] | 334 | /* |
| 335 | * hv_stime_legacy_cleanup() will stop the stimer if Direct |
| 336 | * Mode is not enabled, and fallback to the LAPIC timer. |
| 337 | */ |
| 338 | for_each_present_cpu(cpu) { |
| 339 | hv_stimer_legacy_cleanup(cpu); |
Michael Kelley | fd1fea6 | 2019-07-01 04:25:56 +0000 | [diff] [blame] | 340 | } |
Michael Kelley | 4df4cb9e9 | 2019-11-13 01:11:49 +0000 | [diff] [blame] | 341 | |
Michael Kelley | ec866be6 | 2021-03-02 13:38:22 -0800 | [diff] [blame] | 342 | if (!hv_clock_event) |
| 343 | return; |
| 344 | |
| 345 | if (direct_mode_enabled) { |
| 346 | cpuhp_remove_state(CPUHP_AP_HYPERV_TIMER_STARTING); |
| 347 | hv_remove_stimer0_irq(); |
| 348 | stimer0_irq = -1; |
| 349 | } |
| 350 | free_percpu(hv_clock_event); |
| 351 | hv_clock_event = NULL; |
| 352 | |
Michael Kelley | fd1fea6 | 2019-07-01 04:25:56 +0000 | [diff] [blame] | 353 | } |
| 354 | EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup); |
Michael Kelley | dd2cb34 | 2019-07-01 04:26:06 +0000 | [diff] [blame] | 355 | |
| 356 | /* |
| 357 | * Code and definitions for the Hyper-V clocksources. Two |
| 358 | * clocksources are defined: one that reads the Hyper-V defined MSR, and |
| 359 | * the other that uses the TSC reference page feature as defined in the |
| 360 | * TLFS. The MSR version is for compatibility with old versions of |
| 361 | * Hyper-V and 32-bit x86. The TSC reference page version is preferred. |
| 362 | */ |
| 363 | |
Boqun Feng | ddc61bb | 2019-11-26 10:17:20 +0800 | [diff] [blame] | 364 | static union { |
| 365 | struct ms_hyperv_tsc_page page; |
| 366 | u8 reserved[PAGE_SIZE]; |
| 367 | } tsc_pg __aligned(PAGE_SIZE); |
Michael Kelley | dd2cb34 | 2019-07-01 04:26:06 +0000 | [diff] [blame] | 368 | |
| 369 | struct ms_hyperv_tsc_page *hv_get_tsc_page(void) |
| 370 | { |
Boqun Feng | ddc61bb | 2019-11-26 10:17:20 +0800 | [diff] [blame] | 371 | return &tsc_pg.page; |
Michael Kelley | dd2cb34 | 2019-07-01 04:26:06 +0000 | [diff] [blame] | 372 | } |
| 373 | EXPORT_SYMBOL_GPL(hv_get_tsc_page); |
| 374 | |
Andrea Parri | 0af3e13 | 2020-01-09 17:06:49 +0100 | [diff] [blame] | 375 | static u64 notrace read_hv_clock_tsc(void) |
Michael Kelley | dd2cb34 | 2019-07-01 04:26:06 +0000 | [diff] [blame] | 376 | { |
Boqun Feng | ddc61bb | 2019-11-26 10:17:20 +0800 | [diff] [blame] | 377 | u64 current_tick = hv_read_tsc_page(hv_get_tsc_page()); |
Michael Kelley | dd2cb34 | 2019-07-01 04:26:06 +0000 | [diff] [blame] | 378 | |
| 379 | if (current_tick == U64_MAX) |
Michael Kelley | f3c5e63 | 2021-03-02 13:38:15 -0800 | [diff] [blame] | 380 | current_tick = hv_get_register(HV_REGISTER_TIME_REF_COUNT); |
Michael Kelley | dd2cb34 | 2019-07-01 04:26:06 +0000 | [diff] [blame] | 381 | |
| 382 | return current_tick; |
| 383 | } |
| 384 | |
Andrea Parri | 0af3e13 | 2020-01-09 17:06:49 +0100 | [diff] [blame] | 385 | static u64 notrace read_hv_clock_tsc_cs(struct clocksource *arg) |
| 386 | { |
| 387 | return read_hv_clock_tsc(); |
| 388 | } |
| 389 | |
Mohammed Gamal | 1f3aed0 | 2020-09-24 17:11:17 +0200 | [diff] [blame] | 390 | static u64 notrace read_hv_sched_clock_tsc(void) |
Michael Kelley | dd2cb34 | 2019-07-01 04:26:06 +0000 | [diff] [blame] | 391 | { |
Yubo Xie | 749da8c | 2020-03-26 19:11:59 -0700 | [diff] [blame] | 392 | return (read_hv_clock_tsc() - hv_sched_clock_offset) * |
| 393 | (NSEC_PER_SEC / HV_CLOCK_HZ); |
Michael Kelley | dd2cb34 | 2019-07-01 04:26:06 +0000 | [diff] [blame] | 394 | } |
| 395 | |
Dexuan Cui | 1349401 | 2019-11-19 23:12:26 -0800 | [diff] [blame] | 396 | static void suspend_hv_clock_tsc(struct clocksource *arg) |
| 397 | { |
| 398 | u64 tsc_msr; |
| 399 | |
| 400 | /* Disable the TSC page */ |
Michael Kelley | f3c5e63 | 2021-03-02 13:38:15 -0800 | [diff] [blame] | 401 | tsc_msr = hv_get_register(HV_REGISTER_REFERENCE_TSC); |
Dexuan Cui | 1349401 | 2019-11-19 23:12:26 -0800 | [diff] [blame] | 402 | tsc_msr &= ~BIT_ULL(0); |
Michael Kelley | f3c5e63 | 2021-03-02 13:38:15 -0800 | [diff] [blame] | 403 | hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr); |
Dexuan Cui | 1349401 | 2019-11-19 23:12:26 -0800 | [diff] [blame] | 404 | } |
| 405 | |
| 406 | |
| 407 | static void resume_hv_clock_tsc(struct clocksource *arg) |
| 408 | { |
| 409 | phys_addr_t phys_addr = virt_to_phys(&tsc_pg); |
| 410 | u64 tsc_msr; |
| 411 | |
| 412 | /* Re-enable the TSC page */ |
Michael Kelley | f3c5e63 | 2021-03-02 13:38:15 -0800 | [diff] [blame] | 413 | tsc_msr = hv_get_register(HV_REGISTER_REFERENCE_TSC); |
Dexuan Cui | 1349401 | 2019-11-19 23:12:26 -0800 | [diff] [blame] | 414 | tsc_msr &= GENMASK_ULL(11, 0); |
| 415 | tsc_msr |= BIT_ULL(0) | (u64)phys_addr; |
Michael Kelley | f3c5e63 | 2021-03-02 13:38:15 -0800 | [diff] [blame] | 416 | hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr); |
Dexuan Cui | 1349401 | 2019-11-19 23:12:26 -0800 | [diff] [blame] | 417 | } |
| 418 | |
Vitaly Kuznetsov | 3486d2c | 2021-05-13 09:32:46 +0200 | [diff] [blame] | 419 | #ifdef HAVE_VDSO_CLOCKMODE_HVCLOCK |
Thomas Gleixner | eec399d | 2020-02-07 13:38:54 +0100 | [diff] [blame] | 420 | static int hv_cs_enable(struct clocksource *cs) |
| 421 | { |
Michael Kelley | e4ab465 | 2021-03-02 13:38:19 -0800 | [diff] [blame] | 422 | vclocks_set_used(VDSO_CLOCKMODE_HVCLOCK); |
Thomas Gleixner | eec399d | 2020-02-07 13:38:54 +0100 | [diff] [blame] | 423 | return 0; |
| 424 | } |
Michael Kelley | e4ab465 | 2021-03-02 13:38:19 -0800 | [diff] [blame] | 425 | #endif |
Thomas Gleixner | eec399d | 2020-02-07 13:38:54 +0100 | [diff] [blame] | 426 | |
Michael Kelley | dd2cb34 | 2019-07-01 04:26:06 +0000 | [diff] [blame] | 427 | static struct clocksource hyperv_cs_tsc = { |
| 428 | .name = "hyperv_clocksource_tsc_page", |
Michael Kelley | 4c78738 | 2021-03-02 13:38:21 -0800 | [diff] [blame] | 429 | .rating = 500, |
Andrea Parri | 0af3e13 | 2020-01-09 17:06:49 +0100 | [diff] [blame] | 430 | .read = read_hv_clock_tsc_cs, |
Michael Kelley | dd2cb34 | 2019-07-01 04:26:06 +0000 | [diff] [blame] | 431 | .mask = CLOCKSOURCE_MASK(64), |
| 432 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
Dexuan Cui | 1349401 | 2019-11-19 23:12:26 -0800 | [diff] [blame] | 433 | .suspend= suspend_hv_clock_tsc, |
| 434 | .resume = resume_hv_clock_tsc, |
Vitaly Kuznetsov | 3486d2c | 2021-05-13 09:32:46 +0200 | [diff] [blame] | 435 | #ifdef HAVE_VDSO_CLOCKMODE_HVCLOCK |
Thomas Gleixner | eec399d | 2020-02-07 13:38:54 +0100 | [diff] [blame] | 436 | .enable = hv_cs_enable, |
Michael Kelley | e4ab465 | 2021-03-02 13:38:19 -0800 | [diff] [blame] | 437 | .vdso_clock_mode = VDSO_CLOCKMODE_HVCLOCK, |
| 438 | #else |
| 439 | .vdso_clock_mode = VDSO_CLOCKMODE_NONE, |
| 440 | #endif |
Michael Kelley | dd2cb34 | 2019-07-01 04:26:06 +0000 | [diff] [blame] | 441 | }; |
Michael Kelley | dd2cb34 | 2019-07-01 04:26:06 +0000 | [diff] [blame] | 442 | |
Andrea Parri | 0af3e13 | 2020-01-09 17:06:49 +0100 | [diff] [blame] | 443 | static u64 notrace read_hv_clock_msr(void) |
Michael Kelley | dd2cb34 | 2019-07-01 04:26:06 +0000 | [diff] [blame] | 444 | { |
Michael Kelley | dd2cb34 | 2019-07-01 04:26:06 +0000 | [diff] [blame] | 445 | /* |
| 446 | * Read the partition counter to get the current tick count. This count |
| 447 | * is set to 0 when the partition is created and is incremented in |
| 448 | * 100 nanosecond units. |
| 449 | */ |
Michael Kelley | f3c5e63 | 2021-03-02 13:38:15 -0800 | [diff] [blame] | 450 | return hv_get_register(HV_REGISTER_TIME_REF_COUNT); |
Michael Kelley | dd2cb34 | 2019-07-01 04:26:06 +0000 | [diff] [blame] | 451 | } |
| 452 | |
Andrea Parri | 0af3e13 | 2020-01-09 17:06:49 +0100 | [diff] [blame] | 453 | static u64 notrace read_hv_clock_msr_cs(struct clocksource *arg) |
| 454 | { |
| 455 | return read_hv_clock_msr(); |
| 456 | } |
| 457 | |
Mohammed Gamal | 1f3aed0 | 2020-09-24 17:11:17 +0200 | [diff] [blame] | 458 | static u64 notrace read_hv_sched_clock_msr(void) |
Michael Kelley | dd2cb34 | 2019-07-01 04:26:06 +0000 | [diff] [blame] | 459 | { |
Yubo Xie | 749da8c | 2020-03-26 19:11:59 -0700 | [diff] [blame] | 460 | return (read_hv_clock_msr() - hv_sched_clock_offset) * |
| 461 | (NSEC_PER_SEC / HV_CLOCK_HZ); |
Michael Kelley | dd2cb34 | 2019-07-01 04:26:06 +0000 | [diff] [blame] | 462 | } |
| 463 | |
| 464 | static struct clocksource hyperv_cs_msr = { |
| 465 | .name = "hyperv_clocksource_msr", |
Michael Kelley | 4c78738 | 2021-03-02 13:38:21 -0800 | [diff] [blame] | 466 | .rating = 500, |
Andrea Parri | 0af3e13 | 2020-01-09 17:06:49 +0100 | [diff] [blame] | 467 | .read = read_hv_clock_msr_cs, |
Michael Kelley | dd2cb34 | 2019-07-01 04:26:06 +0000 | [diff] [blame] | 468 | .mask = CLOCKSOURCE_MASK(64), |
| 469 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
| 470 | }; |
| 471 | |
Michael Kelley | eb3e1d3 | 2021-03-02 13:38:20 -0800 | [diff] [blame] | 472 | /* |
| 473 | * Reference to pv_ops must be inline so objtool |
| 474 | * detection of noinstr violations can work correctly. |
| 475 | */ |
| 476 | #ifdef CONFIG_GENERIC_SCHED_CLOCK |
| 477 | static __always_inline void hv_setup_sched_clock(void *sched_clock) |
| 478 | { |
| 479 | /* |
| 480 | * We're on an architecture with generic sched clock (not x86/x64). |
| 481 | * The Hyper-V sched clock read function returns nanoseconds, not |
| 482 | * the normal 100ns units of the Hyper-V synthetic clock. |
| 483 | */ |
| 484 | sched_clock_register(sched_clock, 64, NSEC_PER_SEC); |
| 485 | } |
| 486 | #elif defined CONFIG_PARAVIRT |
| 487 | static __always_inline void hv_setup_sched_clock(void *sched_clock) |
| 488 | { |
| 489 | /* We're on x86/x64 *and* using PV ops */ |
Linus Torvalds | 4d480db | 2021-04-26 10:44:16 -0700 | [diff] [blame] | 490 | paravirt_set_sched_clock(sched_clock); |
Michael Kelley | eb3e1d3 | 2021-03-02 13:38:20 -0800 | [diff] [blame] | 491 | } |
| 492 | #else /* !CONFIG_GENERIC_SCHED_CLOCK && !CONFIG_PARAVIRT */ |
| 493 | static __always_inline void hv_setup_sched_clock(void *sched_clock) {} |
| 494 | #endif /* CONFIG_GENERIC_SCHED_CLOCK */ |
| 495 | |
Michael Kelley | dd2cb34 | 2019-07-01 04:26:06 +0000 | [diff] [blame] | 496 | static bool __init hv_init_tsc_clocksource(void) |
| 497 | { |
| 498 | u64 tsc_msr; |
| 499 | phys_addr_t phys_addr; |
| 500 | |
| 501 | if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE)) |
| 502 | return false; |
| 503 | |
Wei Liu | 7d4163c | 2021-02-03 15:04:23 +0000 | [diff] [blame] | 504 | if (hv_root_partition) |
| 505 | return false; |
| 506 | |
Michael Kelley | 4c78738 | 2021-03-02 13:38:21 -0800 | [diff] [blame] | 507 | /* |
| 508 | * If Hyper-V offers TSC_INVARIANT, then the virtualized TSC correctly |
| 509 | * handles frequency and offset changes due to live migration, |
| 510 | * pause/resume, and other VM management operations. So lower the |
| 511 | * Hyper-V Reference TSC rating, causing the generic TSC to be used. |
| 512 | * TSC_INVARIANT is not offered on ARM64, so the Hyper-V Reference |
| 513 | * TSC will be preferred over the virtualized ARM64 arch counter. |
Michael Kelley | ec866be6 | 2021-03-02 13:38:22 -0800 | [diff] [blame] | 514 | * While the Hyper-V MSR clocksource won't be used since the |
| 515 | * Reference TSC clocksource is present, change its rating as |
| 516 | * well for consistency. |
Michael Kelley | 4c78738 | 2021-03-02 13:38:21 -0800 | [diff] [blame] | 517 | */ |
Michael Kelley | ec866be6 | 2021-03-02 13:38:22 -0800 | [diff] [blame] | 518 | if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) { |
Michael Kelley | 4c78738 | 2021-03-02 13:38:21 -0800 | [diff] [blame] | 519 | hyperv_cs_tsc.rating = 250; |
Michael Kelley | ec866be6 | 2021-03-02 13:38:22 -0800 | [diff] [blame] | 520 | hyperv_cs_msr.rating = 250; |
| 521 | } |
Michael Kelley | 4c78738 | 2021-03-02 13:38:21 -0800 | [diff] [blame] | 522 | |
Andrea Parri | 0af3e13 | 2020-01-09 17:06:49 +0100 | [diff] [blame] | 523 | hv_read_reference_counter = read_hv_clock_tsc; |
Boqun Feng | ddc61bb | 2019-11-26 10:17:20 +0800 | [diff] [blame] | 524 | phys_addr = virt_to_phys(hv_get_tsc_page()); |
Michael Kelley | dd2cb34 | 2019-07-01 04:26:06 +0000 | [diff] [blame] | 525 | |
| 526 | /* |
| 527 | * The Hyper-V TLFS specifies to preserve the value of reserved |
| 528 | * bits in registers. So read the existing value, preserve the |
| 529 | * low order 12 bits, and add in the guest physical address |
| 530 | * (which already has at least the low 12 bits set to zero since |
| 531 | * it is page aligned). Also set the "enable" bit, which is bit 0. |
| 532 | */ |
Michael Kelley | f3c5e63 | 2021-03-02 13:38:15 -0800 | [diff] [blame] | 533 | tsc_msr = hv_get_register(HV_REGISTER_REFERENCE_TSC); |
Michael Kelley | dd2cb34 | 2019-07-01 04:26:06 +0000 | [diff] [blame] | 534 | tsc_msr &= GENMASK_ULL(11, 0); |
| 535 | tsc_msr = tsc_msr | 0x1 | (u64)phys_addr; |
Michael Kelley | f3c5e63 | 2021-03-02 13:38:15 -0800 | [diff] [blame] | 536 | hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr); |
Michael Kelley | dd2cb34 | 2019-07-01 04:26:06 +0000 | [diff] [blame] | 537 | |
Michael Kelley | dd2cb34 | 2019-07-01 04:26:06 +0000 | [diff] [blame] | 538 | clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); |
| 539 | |
Andrea Parri | 0af3e13 | 2020-01-09 17:06:49 +0100 | [diff] [blame] | 540 | hv_sched_clock_offset = hv_read_reference_counter(); |
Tianyu Lan | bd00cd52 | 2019-08-14 20:32:16 +0800 | [diff] [blame] | 541 | hv_setup_sched_clock(read_hv_sched_clock_tsc); |
| 542 | |
Michael Kelley | dd2cb34 | 2019-07-01 04:26:06 +0000 | [diff] [blame] | 543 | return true; |
| 544 | } |
Michael Kelley | dd2cb34 | 2019-07-01 04:26:06 +0000 | [diff] [blame] | 545 | |
| 546 | void __init hv_init_clocksource(void) |
| 547 | { |
| 548 | /* |
| 549 | * Try to set up the TSC page clocksource. If it succeeds, we're |
Ingo Molnar | 4bf07f6 | 2021-03-22 22:39:03 +0100 | [diff] [blame] | 550 | * done. Otherwise, set up the MSR clocksource. At least one of |
Michael Kelley | dd2cb34 | 2019-07-01 04:26:06 +0000 | [diff] [blame] | 551 | * these will always be available except on very old versions of |
| 552 | * Hyper-V on x86. In that case we won't have a Hyper-V |
| 553 | * clocksource, but Linux will still run with a clocksource based |
| 554 | * on the emulated PIT or LAPIC timer. |
| 555 | */ |
| 556 | if (hv_init_tsc_clocksource()) |
| 557 | return; |
| 558 | |
| 559 | if (!(ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE)) |
| 560 | return; |
| 561 | |
Andrea Parri | 0af3e13 | 2020-01-09 17:06:49 +0100 | [diff] [blame] | 562 | hv_read_reference_counter = read_hv_clock_msr; |
Michael Kelley | dd2cb34 | 2019-07-01 04:26:06 +0000 | [diff] [blame] | 563 | clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100); |
| 564 | |
Andrea Parri | 0af3e13 | 2020-01-09 17:06:49 +0100 | [diff] [blame] | 565 | hv_sched_clock_offset = hv_read_reference_counter(); |
Tianyu Lan | bd00cd52 | 2019-08-14 20:32:16 +0800 | [diff] [blame] | 566 | hv_setup_sched_clock(read_hv_sched_clock_msr); |
Michael Kelley | dd2cb34 | 2019-07-01 04:26:06 +0000 | [diff] [blame] | 567 | } |
| 568 | EXPORT_SYMBOL_GPL(hv_init_clocksource); |