x86: kvm: rate-limit global clock updates When we update a vcpu's local clock it may pick up an NTP correction. We can't wait an indeterminate amount of time for other vcpus to pick up that correction, so commit 0061d53daf26f introduced a global clock update. However, we can't request a global clock update on every vcpu load either (which is what happens if the tsc is marked as unstable). The solution is to rate-limit the global clock updates. Marcelo calculated that we should delay the global clock updates no more than 0.1s as follows: Assume an NTP correction c is applied to one vcpu, but not the other, then in n seconds the delta of the vcpu system_timestamps will be c * n. If we assume a correction of 500ppm (worst-case), then the two vcpus will diverge 50us in 0.1s, which is a considerable amount. Signed-off-by: Andrew Jones <drjones@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

commit: 7e44e4495a398eb553ce561f29f9148f40a3448f [log] [tgz]
author: Andrew Jones <drjones@redhat.com> Fri Feb 28 12:52:54 2014 +0100
committer: Paolo Bonzini <pbonzini@redhat.com> Tue Mar 04 11:50:47 2014 +0100
tree: 874a17c193b6be9621fbc32ba7c7ce633e316658
parent: ccf9844e5d99c1ee9a5b8c4f1332ac5211cbce03 [diff] [blame]
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 773eba7..5ed9293 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c

@@ -1628,14 +1628,21 @@
  * the others.
  *
  * So in those cases, request a kvmclock update for all vcpus.
- * The worst case for a remote vcpu to update its kvmclock
- * is then bounded by maximum nohz sleep latency.
+ * We need to rate-limit these requests though, as they can
+ * considerably slow guests that have a large number of vcpus.
+ * The time for a remote vcpu to update its kvmclock is bound
+ * by the delay we use to rate-limit the updates.
  */
 
-static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
+#define KVMCLOCK_UPDATE_DELAY msecs_to_jiffies(100)
+
+static void kvmclock_update_fn(struct work_struct *work)
 {
 	int i;
-	struct kvm *kvm = v->kvm;
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
+					   kvmclock_update_work);
+	struct kvm *kvm = container_of(ka, struct kvm, arch);
 	struct kvm_vcpu *vcpu;
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -1644,6 +1651,15 @@
 	}
 }
 
+static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
+{
+	struct kvm *kvm = v->kvm;
+
+	set_bit(KVM_REQ_CLOCK_UPDATE, &v->requests);
+	schedule_delayed_work(&kvm->arch.kvmclock_update_work,
+					KVMCLOCK_UPDATE_DELAY);
+}
+
 static bool msr_mtrr_valid(unsigned msr)
 {
 	switch (msr) {
@@ -7022,6 +7038,8 @@
 
 	pvclock_update_vm_gtod_copy(kvm);
 
+	INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
+
 	return 0;
 }
 
@@ -7059,6 +7077,7 @@
 
 void kvm_arch_sync_events(struct kvm *kvm)
 {
+	cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
 	kvm_free_all_assigned_devices(kvm);
 	kvm_free_pit(kvm);
 }
commit	7e44e4495a398eb553ce561f29f9148f40a3448f	[log] [tgz]
author	Andrew Jones <drjones@redhat.com>	Fri Feb 28 12:52:54 2014 +0100
committer	Paolo Bonzini <pbonzini@redhat.com>	Tue Mar 04 11:50:47 2014 +0100
tree	874a17c193b6be9621fbc32ba7c7ce633e316658
parent	ccf9844e5d99c1ee9a5b8c4f1332ac5211cbce03 [diff] [blame]