KVM: MMU: Optimize prefetch_page()
Instead of reading each pte individually, read 256 bytes worth of ptes and
batch process them.
Signed-off-by: Avi Kivity <avi@qumranet.com>
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 934c7b6..4d91822 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -460,8 +460,9 @@
static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp)
{
- int i, offset = 0, r = 0;
- pt_element_t pt;
+ int i, j, offset, r;
+ pt_element_t pt[256 / sizeof(pt_element_t)];
+ gpa_t pte_gpa;
if (sp->role.metaphysical
|| (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) {
@@ -469,19 +470,20 @@
return;
}
- if (PTTYPE == 32)
+ pte_gpa = gfn_to_gpa(sp->gfn);
+ if (PTTYPE == 32) {
offset = sp->role.quadrant << PT64_LEVEL_BITS;
+ pte_gpa += offset * sizeof(pt_element_t);
+ }
- for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
- gpa_t pte_gpa = gfn_to_gpa(sp->gfn);
- pte_gpa += (i+offset) * sizeof(pt_element_t);
-
- r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &pt,
- sizeof(pt_element_t));
- if (r || is_present_pte(pt))
- sp->spt[i] = shadow_trap_nonpresent_pte;
- else
- sp->spt[i] = shadow_notrap_nonpresent_pte;
+ for (i = 0; i < PT64_ENT_PER_PAGE; i += ARRAY_SIZE(pt)) {
+ r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, pt, sizeof pt);
+ pte_gpa += ARRAY_SIZE(pt) * sizeof(pt_element_t);
+ for (j = 0; j < ARRAY_SIZE(pt); ++j)
+ if (r || is_present_pte(pt[j]))
+ sp->spt[i+j] = shadow_trap_nonpresent_pte;
+ else
+ sp->spt[i+j] = shadow_notrap_nonpresent_pte;
}
}