KVM: PPC: Keep page physical addresses in per-slot arrays
This allocates an array for each memory slot that is added to store
the physical addresses of the pages in the slot. This array is
vmalloc'd and accessed in kvmppc_h_enter using real_vmalloc_addr().
This allows us to remove the ram_pginfo field from the kvm_arch
struct, and removes the 64GB guest RAM limit that we had.
We use the low-order bits of the array entries to store a flag
indicating that we have done get_page on the corresponding page,
and therefore need to call put_page when we are finished with the
page. Currently this is set for all pages except those in our
special RMO regions.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 629df2e..7a17ab5 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -38,6 +38,7 @@
#define KVM_MEMORY_SLOTS 32
/* memory slots that does not exposed to userspace */
#define KVM_PRIVATE_MEM_SLOTS 4
+#define KVM_MEM_SLOTS_NUM (KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
#ifdef CONFIG_KVM_MMIO
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
@@ -175,25 +176,27 @@
unsigned long guest_rpte;
};
+/* Low-order bits in kvm->arch.slot_phys[][] */
+#define KVMPPC_GOT_PAGE 0x80
+
struct kvm_arch {
#ifdef CONFIG_KVM_BOOK3S_64_HV
unsigned long hpt_virt;
struct revmap_entry *revmap;
- unsigned long ram_npages;
unsigned long ram_psize;
unsigned long ram_porder;
- struct kvmppc_pginfo *ram_pginfo;
unsigned int lpid;
unsigned int host_lpid;
unsigned long host_lpcr;
unsigned long sdr1;
unsigned long host_sdr1;
int tlbie_lock;
- int n_rma_pages;
unsigned long lpcr;
unsigned long rmor;
struct kvmppc_rma_info *rma;
struct list_head spapr_tce_tables;
+ unsigned long *slot_phys[KVM_MEM_SLOTS_NUM];
+ int slot_npages[KVM_MEM_SLOTS_NUM];
unsigned short last_vcpu[NR_CPUS];
struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
#endif /* CONFIG_KVM_BOOK3S_64_HV */
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 80ece8d..e4c6069 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -98,16 +98,16 @@
void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem)
{
unsigned long i;
- unsigned long npages = kvm->arch.ram_npages;
- unsigned long pfn;
+ unsigned long npages;
+ unsigned long pa;
unsigned long *hpte;
unsigned long hash;
unsigned long porder = kvm->arch.ram_porder;
struct revmap_entry *rev;
- struct kvmppc_pginfo *pginfo = kvm->arch.ram_pginfo;
+ unsigned long *physp;
- if (!pginfo)
- return;
+ physp = kvm->arch.slot_phys[mem->slot];
+ npages = kvm->arch.slot_npages[mem->slot];
/* VRMA can't be > 1TB */
if (npages > 1ul << (40 - porder))
@@ -117,9 +117,10 @@
npages = HPT_NPTEG;
for (i = 0; i < npages; ++i) {
- pfn = pginfo[i].pfn;
- if (!pfn)
+ pa = physp[i];
+ if (!pa)
break;
+ pa &= PAGE_MASK;
/* can't use hpt_hash since va > 64 bits */
hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK;
/*
@@ -131,8 +132,7 @@
hash = (hash << 3) + 7;
hpte = (unsigned long *) (kvm->arch.hpt_virt + (hash << 4));
/* HPTE low word - RPN, protection, etc. */
- hpte[1] = (pfn << PAGE_SHIFT) | HPTE_R_R | HPTE_R_C |
- HPTE_R_M | PP_RWXX;
+ hpte[1] = pa | HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX;
smp_wmb();
hpte[0] = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
(i << (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED |
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index b1e3b9c..a84fedc 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -50,14 +50,6 @@
#include <linux/vmalloc.h>
#include <linux/highmem.h>
-/*
- * For now, limit memory to 64GB and require it to be large pages.
- * This value is chosen because it makes the ram_pginfo array be
- * 64kB in size, which is about as large as we want to be trying
- * to allocate with kmalloc.
- */
-#define MAX_MEM_ORDER 36
-
#define LARGE_PAGE_ORDER 24 /* 16MB pages */
/* #define EXIT_DEBUG */
@@ -147,10 +139,12 @@
unsigned long vcpuid, unsigned long vpa)
{
struct kvm *kvm = vcpu->kvm;
- unsigned long pg_index, ra, len;
+ unsigned long gfn, pg_index, ra, len;
unsigned long pg_offset;
void *va;
struct kvm_vcpu *tvcpu;
+ struct kvm_memory_slot *memslot;
+ unsigned long *physp;
tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
if (!tvcpu)
@@ -164,14 +158,20 @@
if (vpa & 0x7f)
return H_PARAMETER;
/* registering new area; convert logical addr to real */
- pg_index = vpa >> kvm->arch.ram_porder;
+ gfn = vpa >> PAGE_SHIFT;
+ memslot = gfn_to_memslot(kvm, gfn);
+ if (!memslot || !(memslot->flags & KVM_MEMSLOT_INVALID))
+ return H_PARAMETER;
+ physp = kvm->arch.slot_phys[memslot->id];
+ if (!physp)
+ return H_PARAMETER;
+ pg_index = (gfn - memslot->base_gfn) >>
+ (kvm->arch.ram_porder - PAGE_SHIFT);
pg_offset = vpa & (kvm->arch.ram_psize - 1);
- if (pg_index >= kvm->arch.ram_npages)
+ ra = physp[pg_index];
+ if (!ra)
return H_PARAMETER;
- if (kvm->arch.ram_pginfo[pg_index].pfn == 0)
- return H_PARAMETER;
- ra = kvm->arch.ram_pginfo[pg_index].pfn << PAGE_SHIFT;
- ra |= pg_offset;
+ ra = (ra & PAGE_MASK) | pg_offset;
va = __va(ra);
if (flags <= 1)
len = *(unsigned short *)(va + 4);
@@ -1075,12 +1075,11 @@
struct kvm_userspace_memory_region *mem)
{
unsigned long psize, porder;
- unsigned long i, npages, totalpages;
- unsigned long pg_ix;
- struct kvmppc_pginfo *pginfo;
+ unsigned long i, npages;
unsigned long hva;
struct kvmppc_rma_info *ri = NULL;
struct page *page;
+ unsigned long *phys;
/* For now, only allow 16MB pages */
porder = LARGE_PAGE_ORDER;
@@ -1092,20 +1091,21 @@
return -EINVAL;
}
+ /* Allocate a slot_phys array */
npages = mem->memory_size >> porder;
- totalpages = (mem->guest_phys_addr + mem->memory_size) >> porder;
-
- /* More memory than we have space to track? */
- if (totalpages > (1ul << (MAX_MEM_ORDER - LARGE_PAGE_ORDER)))
- return -EINVAL;
+ phys = kvm->arch.slot_phys[mem->slot];
+ if (!phys) {
+ phys = vzalloc(npages * sizeof(unsigned long));
+ if (!phys)
+ return -ENOMEM;
+ kvm->arch.slot_phys[mem->slot] = phys;
+ kvm->arch.slot_npages[mem->slot] = npages;
+ }
/* Do we already have an RMA registered? */
if (mem->guest_phys_addr == 0 && kvm->arch.rma)
return -EINVAL;
- if (totalpages > kvm->arch.ram_npages)
- kvm->arch.ram_npages = totalpages;
-
/* Is this one of our preallocated RMAs? */
if (mem->guest_phys_addr == 0) {
struct vm_area_struct *vma;
@@ -1138,7 +1138,6 @@
}
atomic_inc(&ri->use_count);
kvm->arch.rma = ri;
- kvm->arch.n_rma_pages = rma_size >> porder;
/* Update LPCR and RMOR */
lpcr = kvm->arch.lpcr;
@@ -1162,12 +1161,9 @@
ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
}
- pg_ix = mem->guest_phys_addr >> porder;
- pginfo = kvm->arch.ram_pginfo + pg_ix;
- for (i = 0; i < npages; ++i, ++pg_ix) {
- if (ri && pg_ix < kvm->arch.n_rma_pages) {
- pginfo[i].pfn = ri->base_pfn +
- (pg_ix << (porder - PAGE_SHIFT));
+ for (i = 0; i < npages; ++i) {
+ if (ri && i < ri->npages) {
+ phys[i] = (ri->base_pfn << PAGE_SHIFT) + (i << porder);
continue;
}
hva = mem->userspace_addr + (i << porder);
@@ -1183,7 +1179,7 @@
hva, compound_order(page));
goto err;
}
- pginfo[i].pfn = page_to_pfn(page);
+ phys[i] = (page_to_pfn(page) << PAGE_SHIFT) | KVMPPC_GOT_PAGE;
}
return 0;
@@ -1192,6 +1188,28 @@
return -EINVAL;
}
+static void unpin_slot(struct kvm *kvm, int slot_id)
+{
+ unsigned long *physp;
+ unsigned long j, npages, pfn;
+ struct page *page;
+
+ physp = kvm->arch.slot_phys[slot_id];
+ npages = kvm->arch.slot_npages[slot_id];
+ if (physp) {
+ for (j = 0; j < npages; j++) {
+ if (!(physp[j] & KVMPPC_GOT_PAGE))
+ continue;
+ pfn = physp[j] >> PAGE_SHIFT;
+ page = pfn_to_page(pfn);
+ SetPageDirty(page);
+ put_page(page);
+ }
+ vfree(physp);
+ kvm->arch.slot_phys[slot_id] = NULL;
+ }
+}
+
void kvmppc_core_commit_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem)
{
@@ -1203,8 +1221,6 @@
int kvmppc_core_init_vm(struct kvm *kvm)
{
long r;
- unsigned long npages = 1ul << (MAX_MEM_ORDER - LARGE_PAGE_ORDER);
- long err = -ENOMEM;
unsigned long lpcr;
/* Allocate hashed page table */
@@ -1214,19 +1230,9 @@
INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
- kvm->arch.ram_pginfo = kzalloc(npages * sizeof(struct kvmppc_pginfo),
- GFP_KERNEL);
- if (!kvm->arch.ram_pginfo) {
- pr_err("kvmppc_core_init_vm: couldn't alloc %lu bytes\n",
- npages * sizeof(struct kvmppc_pginfo));
- goto out_free;
- }
-
- kvm->arch.ram_npages = 0;
kvm->arch.ram_psize = 1ul << LARGE_PAGE_ORDER;
kvm->arch.ram_porder = LARGE_PAGE_ORDER;
kvm->arch.rma = NULL;
- kvm->arch.n_rma_pages = 0;
kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
@@ -1249,25 +1255,15 @@
kvm->arch.lpcr = lpcr;
return 0;
-
- out_free:
- kvmppc_free_hpt(kvm);
- return err;
}
void kvmppc_core_destroy_vm(struct kvm *kvm)
{
- struct kvmppc_pginfo *pginfo;
unsigned long i;
- if (kvm->arch.ram_pginfo) {
- pginfo = kvm->arch.ram_pginfo;
- kvm->arch.ram_pginfo = NULL;
- for (i = kvm->arch.n_rma_pages; i < kvm->arch.ram_npages; ++i)
- if (pginfo[i].pfn)
- put_page(pfn_to_page(pginfo[i].pfn));
- kfree(pginfo);
- }
+ for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
+ unpin_slot(kvm, i);
+
if (kvm->arch.rma) {
kvm_release_rma(kvm->arch.rma);
kvm->arch.rma = NULL;
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 6148493..84dae82 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -20,6 +20,25 @@
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
+/*
+ * Since this file is built in even if KVM is a module, we need
+ * a local copy of this function for the case where kvm_main.c is
+ * modular.
+ */
+static struct kvm_memory_slot *builtin_gfn_to_memslot(struct kvm *kvm,
+ gfn_t gfn)
+{
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+
+ slots = kvm_memslots(kvm);
+ kvm_for_each_memslot(memslot, slots)
+ if (gfn >= memslot->base_gfn &&
+ gfn < memslot->base_gfn + memslot->npages)
+ return memslot;
+ return NULL;
+}
+
/* Translate address of a vmalloc'd thing to a linear map address */
static void *real_vmalloc_addr(void *x)
{
@@ -59,10 +78,12 @@
{
unsigned long porder;
struct kvm *kvm = vcpu->kvm;
- unsigned long i, lpn, pa;
+ unsigned long i, gfn, lpn, pa;
unsigned long *hpte;
struct revmap_entry *rev;
unsigned long g_ptel = ptel;
+ struct kvm_memory_slot *memslot;
+ unsigned long *physp;
/* only handle 4k, 64k and 16M pages for now */
porder = 12;
@@ -80,12 +101,24 @@
} else
return H_PARAMETER;
}
- lpn = (ptel & HPTE_R_RPN) >> kvm->arch.ram_porder;
- if (lpn >= kvm->arch.ram_npages || porder > kvm->arch.ram_porder)
+ if (porder > kvm->arch.ram_porder)
return H_PARAMETER;
- pa = kvm->arch.ram_pginfo[lpn].pfn << PAGE_SHIFT;
+
+ gfn = ((ptel & HPTE_R_RPN) & ~((1ul << porder) - 1)) >> PAGE_SHIFT;
+ memslot = builtin_gfn_to_memslot(kvm, gfn);
+ if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)))
+ return H_PARAMETER;
+ physp = kvm->arch.slot_phys[memslot->id];
+ if (!physp)
+ return H_PARAMETER;
+
+ lpn = (gfn - memslot->base_gfn) >> (kvm->arch.ram_porder - PAGE_SHIFT);
+ physp = real_vmalloc_addr(physp + lpn);
+ pa = *physp;
if (!pa)
return H_PARAMETER;
+ pa &= PAGE_MASK;
+
/* Check WIMG */
if ((ptel & HPTE_R_WIMG) != HPTE_R_M &&
(ptel & HPTE_R_WIMG) != (HPTE_R_W | HPTE_R_I | HPTE_R_M))