KVM: switch to get_user_pages_fast

Convert gfn_to_pfn to use get_user_pages_fast, which can do lockless
pagetable lookups on x86. Kernel compilation on 4-way guest is 3.7%
faster on VMX.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index b671f61..6dd08e0 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -102,14 +102,10 @@
 	pt_element_t *table;
 	struct page *page;
 
-	down_read(&current->mm->mmap_sem);
 	page = gfn_to_page(kvm, table_gfn);
-	up_read(&current->mm->mmap_sem);
 
 	table = kmap_atomic(page, KM_USER0);
-
 	ret = CMPXCHG(&table[index], orig_pte, new_pte);
-
 	kunmap_atomic(table, KM_USER0);
 
 	kvm_release_page_dirty(page);
@@ -418,7 +414,6 @@
 		return 0;
 	}
 
-	down_read(&current->mm->mmap_sem);
 	if (walker.level == PT_DIRECTORY_LEVEL) {
 		gfn_t large_gfn;
 		large_gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE-1);
@@ -428,9 +423,8 @@
 		}
 	}
 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
-	/* implicit mb(), we'll read before PT lock is unlocked */
+	smp_rmb();
 	pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
-	up_read(&current->mm->mmap_sem);
 
 	/* mmio */
 	if (is_error_pfn(pfn)) {