blob: ffa602a97dba6d4685a23bc3e2084703c85b4494 [file] [log] [blame]
Bharata B Raoca9f4942019-11-25 08:36:26 +05301// SPDX-License-Identifier: GPL-2.0
2/*
3 * Secure pages management: Migration of pages between normal and secure
4 * memory of KVM guests.
5 *
6 * Copyright 2018 Bharata B Rao, IBM Corp. <bharata@linux.ibm.com>
7 */
8
9/*
10 * A pseries guest can be run as secure guest on Ultravisor-enabled
11 * POWER platforms. On such platforms, this driver will be used to manage
12 * the movement of guest pages between the normal memory managed by
13 * hypervisor (HV) and secure memory managed by Ultravisor (UV).
14 *
15 * The page-in or page-out requests from UV will come to HV as hcalls and
16 * HV will call back into UV via ultracalls to satisfy these page requests.
17 *
18 * Private ZONE_DEVICE memory equal to the amount of secure memory
19 * available in the platform for running secure guests is hotplugged.
20 * Whenever a page belonging to the guest becomes secure, a page from this
21 * private device memory is used to represent and track that secure page
Bharata B Rao60f0a642019-11-25 08:36:27 +053022 * on the HV side. Some pages (like virtio buffers, VPA pages etc) are
23 * shared between UV and HV. However such pages aren't represented by
24 * device private memory and mappings to shared memory exist in both
25 * UV and HV page tables.
Bharata B Raoca9f4942019-11-25 08:36:26 +053026 */
27
28/*
29 * Notes on locking
30 *
31 * kvm->arch.uvmem_lock is a per-guest lock that prevents concurrent
32 * page-in and page-out requests for the same GPA. Concurrent accesses
33 * can either come via UV (guest vCPUs requesting for same page)
34 * or when HV and guest simultaneously access the same page.
35 * This mutex serializes the migration of page from HV(normal) to
36 * UV(secure) and vice versa. So the serialization points are around
37 * migrate_vma routines and page-in/out routines.
38 *
39 * Per-guest mutex comes with a cost though. Mainly it serializes the
40 * fault path as page-out can occur when HV faults on accessing secure
41 * guest pages. Currently UV issues page-in requests for all the guest
42 * PFNs one at a time during early boot (UV_ESM uvcall), so this is
43 * not a cause for concern. Also currently the number of page-outs caused
44 * by HV touching secure pages is very very low. If an when UV supports
45 * overcommitting, then we might see concurrent guest driven page-outs.
46 *
47 * Locking order
48 *
49 * 1. kvm->srcu - Protects KVM memslots
50 * 2. kvm->mm->mmap_sem - find_vma, migrate_vma_pages and helpers, ksm_madvise
51 * 3. kvm->arch.uvmem_lock - protects read/writes to uvmem slots thus acting
52 * as sync-points for page-in/out
53 */
54
55/*
56 * Notes on page size
57 *
58 * Currently UV uses 2MB mappings internally, but will issue H_SVM_PAGE_IN
59 * and H_SVM_PAGE_OUT hcalls in PAGE_SIZE(64K) granularity. HV tracks
60 * secure GPAs at 64K page size and maintains one device PFN for each
61 * 64K secure GPA. UV_PAGE_IN and UV_PAGE_OUT calls by HV are also issued
62 * for 64K page at a time.
63 *
64 * HV faulting on secure pages: When HV touches any secure page, it
65 * faults and issues a UV_PAGE_OUT request with 64K page size. Currently
66 * UV splits and remaps the 2MB page if necessary and copies out the
67 * required 64K page contents.
68 *
Bharata B Rao60f0a642019-11-25 08:36:27 +053069 * Shared pages: Whenever guest shares a secure page, UV will split and
70 * remap the 2MB page if required and issue H_SVM_PAGE_IN with 64K page size.
71 *
Bharata B Rao008e3592019-11-25 08:36:28 +053072 * HV invalidating a page: When a regular page belonging to secure
73 * guest gets unmapped, HV informs UV with UV_PAGE_INVAL of 64K
74 * page size. Using 64K page size is correct here because any non-secure
75 * page will essentially be of 64K page size. Splitting by UV during sharing
76 * and page-out ensures this.
77 *
78 * Page fault handling: When HV handles page fault of a page belonging
79 * to secure guest, it sends that to UV with a 64K UV_PAGE_IN request.
80 * Using 64K size is correct here too as UV would have split the 2MB page
81 * into 64k mappings and would have done page-outs earlier.
82 *
Bharata B Raoca9f4942019-11-25 08:36:26 +053083 * In summary, the current secure pages handling code in HV assumes
84 * 64K page size and in fact fails any page-in/page-out requests of
85 * non-64K size upfront. If and when UV starts supporting multiple
86 * page-sizes, we need to break this assumption.
87 */
88
89#include <linux/pagemap.h>
90#include <linux/migrate.h>
91#include <linux/kvm_host.h>
92#include <linux/ksm.h>
93#include <asm/ultravisor.h>
94#include <asm/mman.h>
95#include <asm/kvm_ppc.h>
96
97static struct dev_pagemap kvmppc_uvmem_pgmap;
98static unsigned long *kvmppc_uvmem_bitmap;
99static DEFINE_SPINLOCK(kvmppc_uvmem_bitmap_lock);
100
101#define KVMPPC_UVMEM_PFN (1UL << 63)
102
103struct kvmppc_uvmem_slot {
104 struct list_head list;
105 unsigned long nr_pfns;
106 unsigned long base_pfn;
107 unsigned long *pfns;
108};
109
110struct kvmppc_uvmem_page_pvt {
111 struct kvm *kvm;
112 unsigned long gpa;
Bharata B Rao60f0a642019-11-25 08:36:27 +0530113 bool skip_page_out;
Bharata B Raoca9f4942019-11-25 08:36:26 +0530114};
115
116int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot)
117{
118 struct kvmppc_uvmem_slot *p;
119
120 p = kzalloc(sizeof(*p), GFP_KERNEL);
121 if (!p)
122 return -ENOMEM;
123 p->pfns = vzalloc(array_size(slot->npages, sizeof(*p->pfns)));
124 if (!p->pfns) {
125 kfree(p);
126 return -ENOMEM;
127 }
128 p->nr_pfns = slot->npages;
129 p->base_pfn = slot->base_gfn;
130
131 mutex_lock(&kvm->arch.uvmem_lock);
132 list_add(&p->list, &kvm->arch.uvmem_pfns);
133 mutex_unlock(&kvm->arch.uvmem_lock);
134
135 return 0;
136}
137
138/*
139 * All device PFNs are already released by the time we come here.
140 */
141void kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot)
142{
143 struct kvmppc_uvmem_slot *p, *next;
144
145 mutex_lock(&kvm->arch.uvmem_lock);
146 list_for_each_entry_safe(p, next, &kvm->arch.uvmem_pfns, list) {
147 if (p->base_pfn == slot->base_gfn) {
148 vfree(p->pfns);
149 list_del(&p->list);
150 kfree(p);
151 break;
152 }
153 }
154 mutex_unlock(&kvm->arch.uvmem_lock);
155}
156
157static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn,
158 struct kvm *kvm)
159{
160 struct kvmppc_uvmem_slot *p;
161
162 list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
163 if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
164 unsigned long index = gfn - p->base_pfn;
165
166 p->pfns[index] = uvmem_pfn | KVMPPC_UVMEM_PFN;
167 return;
168 }
169 }
170}
171
172static void kvmppc_uvmem_pfn_remove(unsigned long gfn, struct kvm *kvm)
173{
174 struct kvmppc_uvmem_slot *p;
175
176 list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
177 if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
178 p->pfns[gfn - p->base_pfn] = 0;
179 return;
180 }
181 }
182}
183
184static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
185 unsigned long *uvmem_pfn)
186{
187 struct kvmppc_uvmem_slot *p;
188
189 list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
190 if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
191 unsigned long index = gfn - p->base_pfn;
192
193 if (p->pfns[index] & KVMPPC_UVMEM_PFN) {
194 if (uvmem_pfn)
195 *uvmem_pfn = p->pfns[index] &
196 ~KVMPPC_UVMEM_PFN;
197 return true;
198 } else
199 return false;
200 }
201 }
202 return false;
203}
204
205unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
206{
207 struct kvm_memslots *slots;
208 struct kvm_memory_slot *memslot;
209 int ret = H_SUCCESS;
210 int srcu_idx;
211
212 if (!kvmppc_uvmem_bitmap)
213 return H_UNSUPPORTED;
214
215 /* Only radix guests can be secure guests */
216 if (!kvm_is_radix(kvm))
217 return H_UNSUPPORTED;
218
219 srcu_idx = srcu_read_lock(&kvm->srcu);
220 slots = kvm_memslots(kvm);
221 kvm_for_each_memslot(memslot, slots) {
222 if (kvmppc_uvmem_slot_init(kvm, memslot)) {
223 ret = H_PARAMETER;
224 goto out;
225 }
226 ret = uv_register_mem_slot(kvm->arch.lpid,
227 memslot->base_gfn << PAGE_SHIFT,
228 memslot->npages * PAGE_SIZE,
229 0, memslot->id);
230 if (ret < 0) {
231 kvmppc_uvmem_slot_free(kvm, memslot);
232 ret = H_PARAMETER;
233 goto out;
234 }
235 }
236 kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_START;
237out:
238 srcu_read_unlock(&kvm->srcu, srcu_idx);
239 return ret;
240}
241
242unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
243{
244 if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
245 return H_UNSUPPORTED;
246
247 kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE;
248 pr_info("LPID %d went secure\n", kvm->arch.lpid);
249 return H_SUCCESS;
250}
251
252/*
Bharata B Raoc3262252019-11-25 08:36:29 +0530253 * Drop device pages that we maintain for the secure guest
254 *
255 * We first mark the pages to be skipped from UV_PAGE_OUT when there
256 * is HV side fault on these pages. Next we *get* these pages, forcing
257 * fault on them, do fault time migration to replace the device PTEs in
258 * QEMU page table with normal PTEs from newly allocated pages.
259 */
260void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
Sukadev Bhattiproluce477a72019-12-19 13:51:45 -0800261 struct kvm *kvm, bool skip_page_out)
Bharata B Raoc3262252019-11-25 08:36:29 +0530262{
263 int i;
264 struct kvmppc_uvmem_page_pvt *pvt;
265 unsigned long pfn, uvmem_pfn;
266 unsigned long gfn = free->base_gfn;
267
268 for (i = free->npages; i; --i, ++gfn) {
269 struct page *uvmem_page;
270
271 mutex_lock(&kvm->arch.uvmem_lock);
272 if (!kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
273 mutex_unlock(&kvm->arch.uvmem_lock);
274 continue;
275 }
276
277 uvmem_page = pfn_to_page(uvmem_pfn);
278 pvt = uvmem_page->zone_device_data;
Sukadev Bhattiproluce477a72019-12-19 13:51:45 -0800279 pvt->skip_page_out = skip_page_out;
Bharata B Raoc3262252019-11-25 08:36:29 +0530280 mutex_unlock(&kvm->arch.uvmem_lock);
281
282 pfn = gfn_to_pfn(kvm, gfn);
283 if (is_error_noslot_pfn(pfn))
284 continue;
285 kvm_release_pfn_clean(pfn);
286 }
287}
288
289/*
Bharata B Raoca9f4942019-11-25 08:36:26 +0530290 * Get a free device PFN from the pool
291 *
292 * Called when a normal page is moved to secure memory (UV_PAGE_IN). Device
293 * PFN will be used to keep track of the secure page on HV side.
294 *
295 * Called with kvm->arch.uvmem_lock held
296 */
297static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
298{
299 struct page *dpage = NULL;
300 unsigned long bit, uvmem_pfn;
301 struct kvmppc_uvmem_page_pvt *pvt;
302 unsigned long pfn_last, pfn_first;
303
304 pfn_first = kvmppc_uvmem_pgmap.res.start >> PAGE_SHIFT;
305 pfn_last = pfn_first +
306 (resource_size(&kvmppc_uvmem_pgmap.res) >> PAGE_SHIFT);
307
308 spin_lock(&kvmppc_uvmem_bitmap_lock);
309 bit = find_first_zero_bit(kvmppc_uvmem_bitmap,
310 pfn_last - pfn_first);
311 if (bit >= (pfn_last - pfn_first))
312 goto out;
313 bitmap_set(kvmppc_uvmem_bitmap, bit, 1);
314 spin_unlock(&kvmppc_uvmem_bitmap_lock);
315
316 pvt = kzalloc(sizeof(*pvt), GFP_KERNEL);
317 if (!pvt)
318 goto out_clear;
319
320 uvmem_pfn = bit + pfn_first;
321 kvmppc_uvmem_pfn_insert(gpa >> PAGE_SHIFT, uvmem_pfn, kvm);
322
323 pvt->gpa = gpa;
324 pvt->kvm = kvm;
325
326 dpage = pfn_to_page(uvmem_pfn);
327 dpage->zone_device_data = pvt;
328 get_page(dpage);
329 lock_page(dpage);
330 return dpage;
331out_clear:
332 spin_lock(&kvmppc_uvmem_bitmap_lock);
333 bitmap_clear(kvmppc_uvmem_bitmap, bit, 1);
334out:
335 spin_unlock(&kvmppc_uvmem_bitmap_lock);
336 return NULL;
337}
338
339/*
340 * Alloc a PFN from private device memory pool and copy page from normal
341 * memory to secure memory using UV_PAGE_IN uvcall.
342 */
343static int
344kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
345 unsigned long end, unsigned long gpa, struct kvm *kvm,
346 unsigned long page_shift, bool *downgrade)
347{
348 unsigned long src_pfn, dst_pfn = 0;
349 struct migrate_vma mig;
350 struct page *spage;
351 unsigned long pfn;
352 struct page *dpage;
353 int ret = 0;
354
355 memset(&mig, 0, sizeof(mig));
356 mig.vma = vma;
357 mig.start = start;
358 mig.end = end;
359 mig.src = &src_pfn;
360 mig.dst = &dst_pfn;
361
362 /*
363 * We come here with mmap_sem write lock held just for
364 * ksm_madvise(), otherwise we only need read mmap_sem.
365 * Hence downgrade to read lock once ksm_madvise() is done.
366 */
367 ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
368 MADV_UNMERGEABLE, &vma->vm_flags);
369 downgrade_write(&kvm->mm->mmap_sem);
370 *downgrade = true;
371 if (ret)
372 return ret;
373
374 ret = migrate_vma_setup(&mig);
375 if (ret)
376 return ret;
377
378 if (!(*mig.src & MIGRATE_PFN_MIGRATE)) {
379 ret = -1;
380 goto out_finalize;
381 }
382
383 dpage = kvmppc_uvmem_get_page(gpa, kvm);
384 if (!dpage) {
385 ret = -1;
386 goto out_finalize;
387 }
388
389 pfn = *mig.src >> MIGRATE_PFN_SHIFT;
390 spage = migrate_pfn_to_page(*mig.src);
391 if (spage)
392 uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0,
393 page_shift);
394
395 *mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
396 migrate_vma_pages(&mig);
397out_finalize:
398 migrate_vma_finalize(&mig);
399 return ret;
400}
401
402/*
Bharata B Rao60f0a642019-11-25 08:36:27 +0530403 * Shares the page with HV, thus making it a normal page.
404 *
405 * - If the page is already secure, then provision a new page and share
406 * - If the page is a normal page, share the existing page
407 *
408 * In the former case, uses dev_pagemap_ops.migrate_to_ram handler
409 * to unmap the device page from QEMU's page tables.
410 */
411static unsigned long
412kvmppc_share_page(struct kvm *kvm, unsigned long gpa, unsigned long page_shift)
413{
414
415 int ret = H_PARAMETER;
416 struct page *uvmem_page;
417 struct kvmppc_uvmem_page_pvt *pvt;
418 unsigned long pfn;
419 unsigned long gfn = gpa >> page_shift;
420 int srcu_idx;
421 unsigned long uvmem_pfn;
422
423 srcu_idx = srcu_read_lock(&kvm->srcu);
424 mutex_lock(&kvm->arch.uvmem_lock);
425 if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
426 uvmem_page = pfn_to_page(uvmem_pfn);
427 pvt = uvmem_page->zone_device_data;
428 pvt->skip_page_out = true;
429 }
430
431retry:
432 mutex_unlock(&kvm->arch.uvmem_lock);
433 pfn = gfn_to_pfn(kvm, gfn);
434 if (is_error_noslot_pfn(pfn))
435 goto out;
436
437 mutex_lock(&kvm->arch.uvmem_lock);
438 if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
439 uvmem_page = pfn_to_page(uvmem_pfn);
440 pvt = uvmem_page->zone_device_data;
441 pvt->skip_page_out = true;
442 kvm_release_pfn_clean(pfn);
443 goto retry;
444 }
445
446 if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, page_shift))
447 ret = H_SUCCESS;
448 kvm_release_pfn_clean(pfn);
449 mutex_unlock(&kvm->arch.uvmem_lock);
450out:
451 srcu_read_unlock(&kvm->srcu, srcu_idx);
452 return ret;
453}
454
455/*
Bharata B Raoca9f4942019-11-25 08:36:26 +0530456 * H_SVM_PAGE_IN: Move page from normal memory to secure memory.
Bharata B Rao60f0a642019-11-25 08:36:27 +0530457 *
458 * H_PAGE_IN_SHARED flag makes the page shared which means that the same
459 * memory in is visible from both UV and HV.
Bharata B Raoca9f4942019-11-25 08:36:26 +0530460 */
461unsigned long
462kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
463 unsigned long flags, unsigned long page_shift)
464{
465 bool downgrade = false;
466 unsigned long start, end;
467 struct vm_area_struct *vma;
468 int srcu_idx;
469 unsigned long gfn = gpa >> page_shift;
470 int ret;
471
472 if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
473 return H_UNSUPPORTED;
474
475 if (page_shift != PAGE_SHIFT)
476 return H_P3;
477
Bharata B Rao60f0a642019-11-25 08:36:27 +0530478 if (flags & ~H_PAGE_IN_SHARED)
Bharata B Raoca9f4942019-11-25 08:36:26 +0530479 return H_P2;
480
Bharata B Rao60f0a642019-11-25 08:36:27 +0530481 if (flags & H_PAGE_IN_SHARED)
482 return kvmppc_share_page(kvm, gpa, page_shift);
483
Bharata B Raoca9f4942019-11-25 08:36:26 +0530484 ret = H_PARAMETER;
485 srcu_idx = srcu_read_lock(&kvm->srcu);
486 down_write(&kvm->mm->mmap_sem);
487
488 start = gfn_to_hva(kvm, gfn);
489 if (kvm_is_error_hva(start))
490 goto out;
491
492 mutex_lock(&kvm->arch.uvmem_lock);
493 /* Fail the page-in request of an already paged-in page */
494 if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, NULL))
495 goto out_unlock;
496
497 end = start + (1UL << page_shift);
498 vma = find_vma_intersection(kvm->mm, start, end);
499 if (!vma || vma->vm_start > start || vma->vm_end < end)
500 goto out_unlock;
501
502 if (!kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift,
503 &downgrade))
504 ret = H_SUCCESS;
505out_unlock:
506 mutex_unlock(&kvm->arch.uvmem_lock);
507out:
508 if (downgrade)
509 up_read(&kvm->mm->mmap_sem);
510 else
511 up_write(&kvm->mm->mmap_sem);
512 srcu_read_unlock(&kvm->srcu, srcu_idx);
513 return ret;
514}
515
516/*
517 * Provision a new page on HV side and copy over the contents
518 * from secure memory using UV_PAGE_OUT uvcall.
519 */
520static int
521kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start,
522 unsigned long end, unsigned long page_shift,
523 struct kvm *kvm, unsigned long gpa)
524{
525 unsigned long src_pfn, dst_pfn = 0;
526 struct migrate_vma mig;
527 struct page *dpage, *spage;
Bharata B Rao60f0a642019-11-25 08:36:27 +0530528 struct kvmppc_uvmem_page_pvt *pvt;
Bharata B Raoca9f4942019-11-25 08:36:26 +0530529 unsigned long pfn;
530 int ret = U_SUCCESS;
531
532 memset(&mig, 0, sizeof(mig));
533 mig.vma = vma;
534 mig.start = start;
535 mig.end = end;
536 mig.src = &src_pfn;
537 mig.dst = &dst_pfn;
538
539 mutex_lock(&kvm->arch.uvmem_lock);
540 /* The requested page is already paged-out, nothing to do */
541 if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL))
542 goto out;
543
544 ret = migrate_vma_setup(&mig);
545 if (ret)
546 return ret;
547
548 spage = migrate_pfn_to_page(*mig.src);
549 if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE))
550 goto out_finalize;
551
552 if (!is_zone_device_page(spage))
553 goto out_finalize;
554
555 dpage = alloc_page_vma(GFP_HIGHUSER, vma, start);
556 if (!dpage) {
557 ret = -1;
558 goto out_finalize;
559 }
560
561 lock_page(dpage);
Bharata B Rao60f0a642019-11-25 08:36:27 +0530562 pvt = spage->zone_device_data;
Bharata B Raoca9f4942019-11-25 08:36:26 +0530563 pfn = page_to_pfn(dpage);
564
Bharata B Rao60f0a642019-11-25 08:36:27 +0530565 /*
566 * This function is used in two cases:
567 * - When HV touches a secure page, for which we do UV_PAGE_OUT
568 * - When a secure page is converted to shared page, we *get*
569 * the page to essentially unmap the device page. In this
570 * case we skip page-out.
571 */
572 if (!pvt->skip_page_out)
573 ret = uv_page_out(kvm->arch.lpid, pfn << page_shift,
574 gpa, 0, page_shift);
575
Bharata B Raoca9f4942019-11-25 08:36:26 +0530576 if (ret == U_SUCCESS)
577 *mig.dst = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED;
578 else {
579 unlock_page(dpage);
580 __free_page(dpage);
581 goto out_finalize;
582 }
583
584 migrate_vma_pages(&mig);
585out_finalize:
586 migrate_vma_finalize(&mig);
587out:
588 mutex_unlock(&kvm->arch.uvmem_lock);
589 return ret;
590}
591
592/*
593 * Fault handler callback that gets called when HV touches any page that
594 * has been moved to secure memory, we ask UV to give back the page by
595 * issuing UV_PAGE_OUT uvcall.
596 *
597 * This eventually results in dropping of device PFN and the newly
598 * provisioned page/PFN gets populated in QEMU page tables.
599 */
600static vm_fault_t kvmppc_uvmem_migrate_to_ram(struct vm_fault *vmf)
601{
602 struct kvmppc_uvmem_page_pvt *pvt = vmf->page->zone_device_data;
603
604 if (kvmppc_svm_page_out(vmf->vma, vmf->address,
605 vmf->address + PAGE_SIZE, PAGE_SHIFT,
606 pvt->kvm, pvt->gpa))
607 return VM_FAULT_SIGBUS;
608 else
609 return 0;
610}
611
612/*
613 * Release the device PFN back to the pool
614 *
615 * Gets called when secure page becomes a normal page during H_SVM_PAGE_OUT.
616 * Gets called with kvm->arch.uvmem_lock held.
617 */
618static void kvmppc_uvmem_page_free(struct page *page)
619{
620 unsigned long pfn = page_to_pfn(page) -
621 (kvmppc_uvmem_pgmap.res.start >> PAGE_SHIFT);
622 struct kvmppc_uvmem_page_pvt *pvt;
623
624 spin_lock(&kvmppc_uvmem_bitmap_lock);
625 bitmap_clear(kvmppc_uvmem_bitmap, pfn, 1);
626 spin_unlock(&kvmppc_uvmem_bitmap_lock);
627
628 pvt = page->zone_device_data;
629 page->zone_device_data = NULL;
630 kvmppc_uvmem_pfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm);
631 kfree(pvt);
632}
633
634static const struct dev_pagemap_ops kvmppc_uvmem_ops = {
635 .page_free = kvmppc_uvmem_page_free,
636 .migrate_to_ram = kvmppc_uvmem_migrate_to_ram,
637};
638
639/*
640 * H_SVM_PAGE_OUT: Move page from secure memory to normal memory.
641 */
642unsigned long
643kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gpa,
644 unsigned long flags, unsigned long page_shift)
645{
646 unsigned long gfn = gpa >> page_shift;
647 unsigned long start, end;
648 struct vm_area_struct *vma;
649 int srcu_idx;
650 int ret;
651
652 if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
653 return H_UNSUPPORTED;
654
655 if (page_shift != PAGE_SHIFT)
656 return H_P3;
657
658 if (flags)
659 return H_P2;
660
661 ret = H_PARAMETER;
662 srcu_idx = srcu_read_lock(&kvm->srcu);
663 down_read(&kvm->mm->mmap_sem);
664 start = gfn_to_hva(kvm, gfn);
665 if (kvm_is_error_hva(start))
666 goto out;
667
668 end = start + (1UL << page_shift);
669 vma = find_vma_intersection(kvm->mm, start, end);
670 if (!vma || vma->vm_start > start || vma->vm_end < end)
671 goto out;
672
673 if (!kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa))
674 ret = H_SUCCESS;
675out:
676 up_read(&kvm->mm->mmap_sem);
677 srcu_read_unlock(&kvm->srcu, srcu_idx);
678 return ret;
679}
680
Bharata B Rao008e3592019-11-25 08:36:28 +0530681int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn)
682{
683 unsigned long pfn;
684 int ret = U_SUCCESS;
685
686 pfn = gfn_to_pfn(kvm, gfn);
687 if (is_error_noslot_pfn(pfn))
688 return -EFAULT;
689
690 mutex_lock(&kvm->arch.uvmem_lock);
691 if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, NULL))
692 goto out;
693
694 ret = uv_page_in(kvm->arch.lpid, pfn << PAGE_SHIFT, gfn << PAGE_SHIFT,
695 0, PAGE_SHIFT);
696out:
697 kvm_release_pfn_clean(pfn);
698 mutex_unlock(&kvm->arch.uvmem_lock);
699 return (ret == U_SUCCESS) ? RESUME_GUEST : -EFAULT;
700}
701
Bharata B Raoca9f4942019-11-25 08:36:26 +0530702static u64 kvmppc_get_secmem_size(void)
703{
704 struct device_node *np;
705 int i, len;
706 const __be32 *prop;
707 u64 size = 0;
708
709 np = of_find_compatible_node(NULL, NULL, "ibm,uv-firmware");
710 if (!np)
711 goto out;
712
713 prop = of_get_property(np, "secure-memory-ranges", &len);
714 if (!prop)
715 goto out_put;
716
717 for (i = 0; i < len / (sizeof(*prop) * 4); i++)
718 size += of_read_number(prop + (i * 4) + 2, 2);
719
720out_put:
721 of_node_put(np);
722out:
723 return size;
724}
725
726int kvmppc_uvmem_init(void)
727{
728 int ret = 0;
729 unsigned long size;
730 struct resource *res;
731 void *addr;
732 unsigned long pfn_last, pfn_first;
733
734 size = kvmppc_get_secmem_size();
735 if (!size) {
736 /*
737 * Don't fail the initialization of kvm-hv module if
738 * the platform doesn't export ibm,uv-firmware node.
739 * Let normal guests run on such PEF-disabled platform.
740 */
741 pr_info("KVMPPC-UVMEM: No support for secure guests\n");
742 goto out;
743 }
744
745 res = request_free_mem_region(&iomem_resource, size, "kvmppc_uvmem");
746 if (IS_ERR(res)) {
747 ret = PTR_ERR(res);
748 goto out;
749 }
750
751 kvmppc_uvmem_pgmap.type = MEMORY_DEVICE_PRIVATE;
752 kvmppc_uvmem_pgmap.res = *res;
753 kvmppc_uvmem_pgmap.ops = &kvmppc_uvmem_ops;
754 addr = memremap_pages(&kvmppc_uvmem_pgmap, NUMA_NO_NODE);
755 if (IS_ERR(addr)) {
756 ret = PTR_ERR(addr);
757 goto out_free_region;
758 }
759
760 pfn_first = res->start >> PAGE_SHIFT;
761 pfn_last = pfn_first + (resource_size(res) >> PAGE_SHIFT);
762 kvmppc_uvmem_bitmap = kcalloc(BITS_TO_LONGS(pfn_last - pfn_first),
763 sizeof(unsigned long), GFP_KERNEL);
764 if (!kvmppc_uvmem_bitmap) {
765 ret = -ENOMEM;
766 goto out_unmap;
767 }
768
769 pr_info("KVMPPC-UVMEM: Secure Memory size 0x%lx\n", size);
770 return ret;
771out_unmap:
772 memunmap_pages(&kvmppc_uvmem_pgmap);
773out_free_region:
774 release_mem_region(res->start, size);
775out:
776 return ret;
777}
778
779void kvmppc_uvmem_free(void)
780{
781 memunmap_pages(&kvmppc_uvmem_pgmap);
782 release_mem_region(kvmppc_uvmem_pgmap.res.start,
783 resource_size(&kvmppc_uvmem_pgmap.res));
784 kfree(kvmppc_uvmem_bitmap);
785}