KVM: x86: support using the vmx preemption timer for tsc deadline timer

The VMX preemption timer can be used to virtualize the TSC deadline timer.
The VMX preemption timer is armed when the vCPU is running, and a VMExit
will happen if the virtual TSC deadline timer expires.

When the vCPU thread is blocked because of HLT, KVM will switch to use
an hrtimer, and then go back to the VMX preemption timer when the vCPU
thread is unblocked.

This solution avoids the complex OS's hrtimer system, and the host
timer interrupt handling cost, replacing them with a little math
(for guest->host TSC and host TSC->preemption timer conversion)
and a cheaper VMexit.  This benefits latency for isolated pCPUs.

[A word about performance... Yunhong reported a 30% reduction in average
 latency from cyclictest.  I made a similar test with tscdeadline_latency
 from kvm-unit-tests, and measured

 - ~20 clock cycles loss (out of ~3200, so less than 1% but still
   statistically significant) in the worst case where the test halts
   just after programming the TSC deadline timer

 - ~800 clock cycles gain (25% reduction in latency) in the best case
   where the test busy waits.

 I removed the VMX bits from Yunhong's patch, to concentrate them in the
 next patch - Paolo]

Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index f1cf8a5..fdc05ae 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1343,6 +1343,68 @@
 	local_irq_restore(flags);
 }
 
+bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
+}
+EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
+
+void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
+	WARN_ON(!apic->lapic_timer.hv_timer_in_use);
+	WARN_ON(swait_active(&vcpu->wq));
+	kvm_x86_ops->cancel_hv_timer(vcpu);
+	apic->lapic_timer.hv_timer_in_use = false;
+	apic_timer_expired(apic);
+}
+EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
+
+void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
+	WARN_ON(apic->lapic_timer.hv_timer_in_use);
+
+	if (apic_lvtt_tscdeadline(apic) &&
+	    !atomic_read(&apic->lapic_timer.pending)) {
+		u64 tscdeadline = apic->lapic_timer.tscdeadline;
+
+		if (!kvm_x86_ops->set_hv_timer(vcpu, tscdeadline)) {
+			apic->lapic_timer.hv_timer_in_use = true;
+			hrtimer_cancel(&apic->lapic_timer.timer);
+
+			/* In case the sw timer triggered in the window */
+			if (atomic_read(&apic->lapic_timer.pending)) {
+				apic->lapic_timer.hv_timer_in_use = false;
+				kvm_x86_ops->cancel_hv_timer(apic->vcpu);
+			}
+		}
+		trace_kvm_hv_timer_state(vcpu->vcpu_id,
+				apic->lapic_timer.hv_timer_in_use);
+	}
+}
+EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
+
+void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
+	/* Possibly the TSC deadline timer is not enabled yet */
+	if (!apic->lapic_timer.hv_timer_in_use)
+		return;
+
+	kvm_x86_ops->cancel_hv_timer(vcpu);
+	apic->lapic_timer.hv_timer_in_use = false;
+
+	if (atomic_read(&apic->lapic_timer.pending))
+		return;
+
+	start_sw_tscdeadline(apic);
+}
+EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
+
 static void start_apic_timer(struct kvm_lapic *apic)
 {
 	ktime_t now;
@@ -1389,7 +1451,16 @@
 			   ktime_to_ns(ktime_add_ns(now,
 					apic->lapic_timer.period)));
 	} else if (apic_lvtt_tscdeadline(apic)) {
-		start_sw_tscdeadline(apic);
+		/* lapic timer in tsc deadline mode */
+		u64 tscdeadline = apic->lapic_timer.tscdeadline;
+
+		if (kvm_x86_ops->set_hv_timer &&
+		    !kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) {
+			apic->lapic_timer.hv_timer_in_use = true;
+			trace_kvm_hv_timer_state(apic->vcpu->vcpu_id,
+					apic->lapic_timer.hv_timer_in_use);
+		} else
+			start_sw_tscdeadline(apic);
 	}
 }