KVM: MMU: load PDPTRs outside mmu_lock

On SVM, reading PDPTRs might access guest memory, which might fault
and thus might sleep.  On the other hand, it is not possible to
release the lock after make_mmu_pages_available has been called.

Therefore, push the call to make_mmu_pages_available and the
mmu_lock critical section within mmu_alloc_direct_roots and
mmu_alloc_shadow_roots.

Reported-by: Wanpeng Li <wanpengli@tencent.com>
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 7a99e59..e2a43a8 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3245,6 +3245,12 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 	u8 shadow_root_level = mmu->shadow_root_level;
 	hpa_t root;
 	unsigned i;
+	int r;
+
+	write_lock(&vcpu->kvm->mmu_lock);
+	r = make_mmu_pages_available(vcpu);
+	if (r < 0)
+		goto out_unlock;
 
 	if (is_tdp_mmu_enabled(vcpu->kvm)) {
 		root = kvm_tdp_mmu_get_vcpu_root_hpa(vcpu);
@@ -3253,8 +3259,10 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 		root = mmu_alloc_root(vcpu, 0, 0, shadow_root_level, true);
 		mmu->root_hpa = root;
 	} else if (shadow_root_level == PT32E_ROOT_LEVEL) {
-		if (WARN_ON_ONCE(!mmu->pae_root))
-			return -EIO;
+		if (WARN_ON_ONCE(!mmu->pae_root)) {
+			r = -EIO;
+			goto out_unlock;
+		}
 
 		for (i = 0; i < 4; ++i) {
 			WARN_ON_ONCE(IS_VALID_PAE_ROOT(mmu->pae_root[i]));
@@ -3267,13 +3275,15 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 		mmu->root_hpa = __pa(mmu->pae_root);
 	} else {
 		WARN_ONCE(1, "Bad TDP root level = %d\n", shadow_root_level);
-		return -EIO;
+		r = -EIO;
+		goto out_unlock;
 	}
 
 	/* root_pgd is ignored for direct MMUs. */
 	mmu->root_pgd = 0;
-
-	return 0;
+out_unlock:
+	write_unlock(&vcpu->kvm->mmu_lock);
+	return r;
 }
 
 static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
@@ -3282,7 +3292,8 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 	u64 pdptrs[4], pm_mask;
 	gfn_t root_gfn, root_pgd;
 	hpa_t root;
-	int i;
+	unsigned i;
+	int r;
 
 	root_pgd = mmu->get_guest_pgd(vcpu);
 	root_gfn = root_pgd >> PAGE_SHIFT;
@@ -3290,6 +3301,10 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 	if (mmu_check_root(vcpu, root_gfn))
 		return 1;
 
+	/*
+	 * On SVM, reading PDPTRs might access guest memory, which might fault
+	 * and thus might sleep.  Grab the PDPTRs before acquiring mmu_lock.
+	 */
 	if (mmu->root_level == PT32E_ROOT_LEVEL) {
 		for (i = 0; i < 4; ++i) {
 			pdptrs[i] = mmu->get_pdptr(vcpu, i);
@@ -3301,6 +3316,11 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 		}
 	}
 
+	write_lock(&vcpu->kvm->mmu_lock);
+	r = make_mmu_pages_available(vcpu);
+	if (r < 0)
+		goto out_unlock;
+
 	/*
 	 * Do we shadow a long mode page table? If so we need to
 	 * write-protect the guests page table root.
@@ -3312,8 +3332,10 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 		goto set_root_pgd;
 	}
 
-	if (WARN_ON_ONCE(!mmu->pae_root))
-		return -EIO;
+	if (WARN_ON_ONCE(!mmu->pae_root)) {
+		r = -EIO;
+		goto out_unlock;
+	}
 
 	/*
 	 * We shadow a 32 bit page table. This may be a legacy 2-level
@@ -3324,8 +3346,10 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 	if (mmu->shadow_root_level == PT64_ROOT_4LEVEL) {
 		pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
 
-		if (WARN_ON_ONCE(!mmu->lm_root))
-			return -EIO;
+		if (WARN_ON_ONCE(!mmu->lm_root)) {
+			r = -EIO;
+			goto out_unlock;
+		}
 
 		mmu->lm_root[0] = __pa(mmu->pae_root) | pm_mask;
 	}
@@ -3353,6 +3377,8 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 
 set_root_pgd:
 	mmu->root_pgd = root_pgd;
+out_unlock:
+	write_unlock(&vcpu->kvm->mmu_lock);
 
 	return 0;
 }
@@ -4853,14 +4879,10 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
 	r = mmu_alloc_special_roots(vcpu);
 	if (r)
 		goto out;
-	write_lock(&vcpu->kvm->mmu_lock);
-	if (make_mmu_pages_available(vcpu))
-		r = -ENOSPC;
-	else if (vcpu->arch.mmu->direct_map)
+	if (vcpu->arch.mmu->direct_map)
 		r = mmu_alloc_direct_roots(vcpu);
 	else
 		r = mmu_alloc_shadow_roots(vcpu);
-	write_unlock(&vcpu->kvm->mmu_lock);
 	if (r)
 		goto out;