blob: 8c0035cab6b629fa5732fe2e872256da1dd279f8 [file] [log] [blame]
Thomas Gleixnerd94d71c2019-05-29 07:12:40 -07001// SPDX-License-Identifier: GPL-2.0-only
Christoffer Dall749cf76c2013-01-20 18:28:06 -05002/*
3 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
4 * Author: Christoffer Dall <c.dall@virtualopensystems.com>
Christoffer Dall749cf76c2013-01-20 18:28:06 -05005 */
Christoffer Dall342cd0a2013-01-20 18:28:06 -05006
7#include <linux/mman.h>
8#include <linux/kvm_host.h>
9#include <linux/io.h>
Christoffer Dallad361f02012-11-01 17:14:45 +010010#include <linux/hugetlb.h>
James Morse196f8782017-06-20 17:11:48 +010011#include <linux/sched/signal.h>
Christoffer Dall45e96ea2013-01-20 18:43:58 -050012#include <trace/events/kvm.h>
Christoffer Dall342cd0a2013-01-20 18:28:06 -050013#include <asm/pgalloc.h>
Christoffer Dall94f8e642013-01-20 18:28:12 -050014#include <asm/cacheflush.h>
Christoffer Dall342cd0a2013-01-20 18:28:06 -050015#include <asm/kvm_arm.h>
16#include <asm/kvm_mmu.h>
James Morse0db5e022019-01-29 18:48:49 +000017#include <asm/kvm_ras.h>
Christoffer Dalld5d81842013-01-20 18:28:07 -050018#include <asm/kvm_asm.h>
Christoffer Dall94f8e642013-01-20 18:28:12 -050019#include <asm/kvm_emulate.h>
Marc Zyngier1e947ba2015-01-29 11:59:54 +000020#include <asm/virt.h>
Christoffer Dalld5d81842013-01-20 18:28:07 -050021
22#include "trace.h"
Christoffer Dall342cd0a2013-01-20 18:28:06 -050023
Marc Zyngier5a677ce2013-04-12 19:12:06 +010024static pgd_t *boot_hyp_pgd;
Marc Zyngier2fb41052013-04-12 19:12:03 +010025static pgd_t *hyp_pgd;
Ard Biesheuvele4c5a682015-03-19 16:42:28 +000026static pgd_t *merged_hyp_pgd;
Christoffer Dall342cd0a2013-01-20 18:28:06 -050027static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
28
Marc Zyngier5a677ce2013-04-12 19:12:06 +010029static unsigned long hyp_idmap_start;
30static unsigned long hyp_idmap_end;
31static phys_addr_t hyp_idmap_vector;
32
Marc Zyngiere3f019b2017-12-04 17:04:38 +000033static unsigned long io_map_base;
34
Christoffer Dall38f791a2014-10-10 12:14:28 +020035#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
Mark Salter5d4e08c2014-03-28 14:25:19 +000036
Mario Smarduch15a49a42015-01-15 15:58:58 -080037#define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0)
38#define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1)
39
Marc Zyngier6d674e22019-12-11 16:56:48 +000040static bool is_iomap(unsigned long flags)
41{
42 return flags & KVM_S2PTE_FLAG_IS_IOMAP;
43}
44
Mario Smarduch15a49a42015-01-15 15:58:58 -080045static bool memslot_is_logging(struct kvm_memory_slot *memslot)
46{
Mario Smarduch15a49a42015-01-15 15:58:58 -080047 return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY);
Mario Smarduch72760302015-01-15 15:59:01 -080048}
49
50/**
51 * kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8
52 * @kvm: pointer to kvm structure.
53 *
54 * Interface to HYP function to flush all VM TLB entries
55 */
56void kvm_flush_remote_tlbs(struct kvm *kvm)
57{
58 kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
Mario Smarduch15a49a42015-01-15 15:58:58 -080059}
Christoffer Dallad361f02012-11-01 17:14:45 +010060
Marc Zyngier48762762013-01-28 15:27:00 +000061static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
Christoffer Dalld5d81842013-01-20 18:28:07 -050062{
Suzuki K Poulose8684e702016-03-22 17:14:25 +000063 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
Christoffer Dalld5d81842013-01-20 18:28:07 -050064}
65
Marc Zyngier363ef892014-12-19 16:48:06 +000066/*
67 * D-Cache management functions. They take the page table entries by
68 * value, as they are flushing the cache using the kernel mapping (or
69 * kmap on 32bit).
70 */
71static void kvm_flush_dcache_pte(pte_t pte)
72{
73 __kvm_flush_dcache_pte(pte);
74}
75
76static void kvm_flush_dcache_pmd(pmd_t pmd)
77{
78 __kvm_flush_dcache_pmd(pmd);
79}
80
81static void kvm_flush_dcache_pud(pud_t pud)
82{
83 __kvm_flush_dcache_pud(pud);
84}
85
Ard Biesheuvele6fab542015-11-10 15:11:20 +010086static bool kvm_is_device_pfn(unsigned long pfn)
87{
88 return !pfn_valid(pfn);
89}
90
Mario Smarduch15a49a42015-01-15 15:58:58 -080091/**
92 * stage2_dissolve_pmd() - clear and flush huge PMD entry
93 * @kvm: pointer to kvm structure.
94 * @addr: IPA
95 * @pmd: pmd pointer for IPA
96 *
Zenghui Yu8324c3d2019-03-25 08:02:05 +000097 * Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs.
Mario Smarduch15a49a42015-01-15 15:58:58 -080098 */
99static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd)
100{
Suzuki K Poulosebbb3b6b2016-03-01 12:00:39 +0000101 if (!pmd_thp_or_huge(*pmd))
Mario Smarduch15a49a42015-01-15 15:58:58 -0800102 return;
103
104 pmd_clear(pmd);
105 kvm_tlb_flush_vmid_ipa(kvm, addr);
106 put_page(virt_to_page(pmd));
107}
108
Punit Agrawalb8e0ba72018-12-11 17:10:41 +0000109/**
110 * stage2_dissolve_pud() - clear and flush huge PUD entry
111 * @kvm: pointer to kvm structure.
112 * @addr: IPA
113 * @pud: pud pointer for IPA
114 *
Zenghui Yu8324c3d2019-03-25 08:02:05 +0000115 * Function clears a PUD entry, flushes addr 1st and 2nd stage TLBs.
Punit Agrawalb8e0ba72018-12-11 17:10:41 +0000116 */
117static void stage2_dissolve_pud(struct kvm *kvm, phys_addr_t addr, pud_t *pudp)
118{
119 if (!stage2_pud_huge(kvm, *pudp))
120 return;
121
122 stage2_pud_clear(kvm, pudp);
123 kvm_tlb_flush_vmid_ipa(kvm, addr);
124 put_page(virt_to_page(pudp));
125}
126
Christoffer Dalld5d81842013-01-20 18:28:07 -0500127static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
128 int min, int max)
129{
130 void *page;
131
132 BUG_ON(max > KVM_NR_MEM_OBJS);
133 if (cache->nobjs >= min)
134 return 0;
135 while (cache->nobjs < max) {
Mike Rapoport50f11a82019-07-11 20:58:02 -0700136 page = (void *)__get_free_page(GFP_PGTABLE_USER);
Christoffer Dalld5d81842013-01-20 18:28:07 -0500137 if (!page)
138 return -ENOMEM;
139 cache->objects[cache->nobjs++] = page;
140 }
141 return 0;
142}
143
144static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
145{
146 while (mc->nobjs)
147 free_page((unsigned long)mc->objects[--mc->nobjs]);
148}
149
150static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
151{
152 void *p;
153
154 BUG_ON(!mc || !mc->nobjs);
155 p = mc->objects[--mc->nobjs];
156 return p;
157}
158
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000159static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
Marc Zyngier979acd52013-08-06 13:05:48 +0100160{
Mike Rapoporte9f63762020-06-04 16:46:23 -0700161 p4d_t *p4d_table __maybe_unused = stage2_p4d_offset(kvm, pgd, 0UL);
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100162 stage2_pgd_clear(kvm, pgd);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200163 kvm_tlb_flush_vmid_ipa(kvm, addr);
Mike Rapoporte9f63762020-06-04 16:46:23 -0700164 stage2_p4d_free(kvm, p4d_table);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200165 put_page(virt_to_page(pgd));
Marc Zyngier979acd52013-08-06 13:05:48 +0100166}
167
Mike Rapoporte9f63762020-06-04 16:46:23 -0700168static void clear_stage2_p4d_entry(struct kvm *kvm, p4d_t *p4d, phys_addr_t addr)
169{
170 pud_t *pud_table __maybe_unused = stage2_pud_offset(kvm, p4d, 0);
171 stage2_p4d_clear(kvm, p4d);
172 kvm_tlb_flush_vmid_ipa(kvm, addr);
173 stage2_pud_free(kvm, pud_table);
174 put_page(virt_to_page(p4d));
175}
176
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000177static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500178{
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100179 pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(kvm, pud, 0);
180 VM_BUG_ON(stage2_pud_huge(kvm, *pud));
181 stage2_pud_clear(kvm, pud);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200182 kvm_tlb_flush_vmid_ipa(kvm, addr);
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100183 stage2_pmd_free(kvm, pmd_table);
Marc Zyngier4f728272013-04-12 19:12:05 +0100184 put_page(virt_to_page(pud));
185}
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500186
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000187static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
Marc Zyngier4f728272013-04-12 19:12:05 +0100188{
Christoffer Dall4f853a72014-05-09 23:31:31 +0200189 pte_t *pte_table = pte_offset_kernel(pmd, 0);
Suzuki K Poulosebbb3b6b2016-03-01 12:00:39 +0000190 VM_BUG_ON(pmd_thp_or_huge(*pmd));
Christoffer Dall4f853a72014-05-09 23:31:31 +0200191 pmd_clear(pmd);
192 kvm_tlb_flush_vmid_ipa(kvm, addr);
Anshuman Khandual14b94d02019-03-12 18:55:45 +0530193 free_page((unsigned long)pte_table);
Marc Zyngier4f728272013-04-12 19:12:05 +0100194 put_page(virt_to_page(pmd));
195}
196
Marc Zyngier88dc25e82018-05-25 12:23:11 +0100197static inline void kvm_set_pte(pte_t *ptep, pte_t new_pte)
198{
199 WRITE_ONCE(*ptep, new_pte);
200 dsb(ishst);
201}
202
203static inline void kvm_set_pmd(pmd_t *pmdp, pmd_t new_pmd)
204{
205 WRITE_ONCE(*pmdp, new_pmd);
206 dsb(ishst);
207}
208
Marc Zyngier0db9dd82018-06-27 15:51:05 +0100209static inline void kvm_pmd_populate(pmd_t *pmdp, pte_t *ptep)
210{
211 kvm_set_pmd(pmdp, kvm_mk_pmd(ptep));
212}
213
214static inline void kvm_pud_populate(pud_t *pudp, pmd_t *pmdp)
215{
216 WRITE_ONCE(*pudp, kvm_mk_pud(pmdp));
217 dsb(ishst);
218}
219
Mike Rapoporte9f63762020-06-04 16:46:23 -0700220static inline void kvm_p4d_populate(p4d_t *p4dp, pud_t *pudp)
Marc Zyngier0db9dd82018-06-27 15:51:05 +0100221{
Mike Rapoporte9f63762020-06-04 16:46:23 -0700222 WRITE_ONCE(*p4dp, kvm_mk_p4d(pudp));
Marc Zyngier0db9dd82018-06-27 15:51:05 +0100223 dsb(ishst);
224}
225
Mike Rapoporte9f63762020-06-04 16:46:23 -0700226static inline void kvm_pgd_populate(pgd_t *pgdp, p4d_t *p4dp)
227{
228#ifndef __PAGETABLE_P4D_FOLDED
229 WRITE_ONCE(*pgdp, kvm_mk_pgd(p4dp));
230 dsb(ishst);
231#endif
232}
233
Marc Zyngier363ef892014-12-19 16:48:06 +0000234/*
235 * Unmapping vs dcache management:
236 *
237 * If a guest maps certain memory pages as uncached, all writes will
238 * bypass the data cache and go directly to RAM. However, the CPUs
239 * can still speculate reads (not writes) and fill cache lines with
240 * data.
241 *
242 * Those cache lines will be *clean* cache lines though, so a
243 * clean+invalidate operation is equivalent to an invalidate
244 * operation, because no cache lines are marked dirty.
245 *
246 * Those clean cache lines could be filled prior to an uncached write
247 * by the guest, and the cache coherent IO subsystem would therefore
248 * end up writing old data to disk.
249 *
250 * This is why right after unmapping a page/section and invalidating
251 * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure
252 * the IO subsystem will never hit in the cache.
Marc Zyngiere48d53a2018-04-06 12:27:28 +0100253 *
254 * This is all avoided on systems that have ARM64_HAS_STAGE2_FWB, as
255 * we then fully enforce cacheability of RAM, no matter what the guest
256 * does.
Marc Zyngier363ef892014-12-19 16:48:06 +0000257 */
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000258static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
Christoffer Dall4f853a72014-05-09 23:31:31 +0200259 phys_addr_t addr, phys_addr_t end)
Marc Zyngier4f728272013-04-12 19:12:05 +0100260{
Christoffer Dall4f853a72014-05-09 23:31:31 +0200261 phys_addr_t start_addr = addr;
262 pte_t *pte, *start_pte;
263
264 start_pte = pte = pte_offset_kernel(pmd, addr);
265 do {
266 if (!pte_none(*pte)) {
Marc Zyngier363ef892014-12-19 16:48:06 +0000267 pte_t old_pte = *pte;
268
Christoffer Dall4f853a72014-05-09 23:31:31 +0200269 kvm_set_pte(pte, __pte(0));
Christoffer Dall4f853a72014-05-09 23:31:31 +0200270 kvm_tlb_flush_vmid_ipa(kvm, addr);
Marc Zyngier363ef892014-12-19 16:48:06 +0000271
272 /* No need to invalidate the cache for device mappings */
Ard Biesheuvel0de58f82015-12-03 09:25:22 +0100273 if (!kvm_is_device_pfn(pte_pfn(old_pte)))
Marc Zyngier363ef892014-12-19 16:48:06 +0000274 kvm_flush_dcache_pte(old_pte);
275
276 put_page(virt_to_page(pte));
Christoffer Dall4f853a72014-05-09 23:31:31 +0200277 }
278 } while (pte++, addr += PAGE_SIZE, addr != end);
279
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100280 if (stage2_pte_table_empty(kvm, start_pte))
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000281 clear_stage2_pmd_entry(kvm, pmd, start_addr);
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500282}
283
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000284static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud,
Christoffer Dall4f853a72014-05-09 23:31:31 +0200285 phys_addr_t addr, phys_addr_t end)
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500286{
Christoffer Dall4f853a72014-05-09 23:31:31 +0200287 phys_addr_t next, start_addr = addr;
288 pmd_t *pmd, *start_pmd;
Marc Zyngier000d3992013-03-05 02:43:17 +0000289
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100290 start_pmd = pmd = stage2_pmd_offset(kvm, pud, addr);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200291 do {
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100292 next = stage2_pmd_addr_end(kvm, addr, end);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200293 if (!pmd_none(*pmd)) {
Suzuki K Poulosebbb3b6b2016-03-01 12:00:39 +0000294 if (pmd_thp_or_huge(*pmd)) {
Marc Zyngier363ef892014-12-19 16:48:06 +0000295 pmd_t old_pmd = *pmd;
296
Christoffer Dall4f853a72014-05-09 23:31:31 +0200297 pmd_clear(pmd);
298 kvm_tlb_flush_vmid_ipa(kvm, addr);
Marc Zyngier363ef892014-12-19 16:48:06 +0000299
300 kvm_flush_dcache_pmd(old_pmd);
301
Christoffer Dall4f853a72014-05-09 23:31:31 +0200302 put_page(virt_to_page(pmd));
303 } else {
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000304 unmap_stage2_ptes(kvm, pmd, addr, next);
Marc Zyngier4f728272013-04-12 19:12:05 +0100305 }
306 }
Christoffer Dall4f853a72014-05-09 23:31:31 +0200307 } while (pmd++, addr = next, addr != end);
Marc Zyngier4f728272013-04-12 19:12:05 +0100308
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100309 if (stage2_pmd_table_empty(kvm, start_pmd))
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000310 clear_stage2_pud_entry(kvm, pud, start_addr);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200311}
312
Mike Rapoporte9f63762020-06-04 16:46:23 -0700313static void unmap_stage2_puds(struct kvm *kvm, p4d_t *p4d,
Christoffer Dall4f853a72014-05-09 23:31:31 +0200314 phys_addr_t addr, phys_addr_t end)
315{
316 phys_addr_t next, start_addr = addr;
317 pud_t *pud, *start_pud;
318
Mike Rapoporte9f63762020-06-04 16:46:23 -0700319 start_pud = pud = stage2_pud_offset(kvm, p4d, addr);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200320 do {
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100321 next = stage2_pud_addr_end(kvm, addr, end);
322 if (!stage2_pud_none(kvm, *pud)) {
323 if (stage2_pud_huge(kvm, *pud)) {
Marc Zyngier363ef892014-12-19 16:48:06 +0000324 pud_t old_pud = *pud;
325
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100326 stage2_pud_clear(kvm, pud);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200327 kvm_tlb_flush_vmid_ipa(kvm, addr);
Marc Zyngier363ef892014-12-19 16:48:06 +0000328 kvm_flush_dcache_pud(old_pud);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200329 put_page(virt_to_page(pud));
330 } else {
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000331 unmap_stage2_pmds(kvm, pud, addr, next);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200332 }
333 }
334 } while (pud++, addr = next, addr != end);
335
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100336 if (stage2_pud_table_empty(kvm, start_pud))
Mike Rapoporte9f63762020-06-04 16:46:23 -0700337 clear_stage2_p4d_entry(kvm, p4d, start_addr);
338}
339
340static void unmap_stage2_p4ds(struct kvm *kvm, pgd_t *pgd,
341 phys_addr_t addr, phys_addr_t end)
342{
343 phys_addr_t next, start_addr = addr;
344 p4d_t *p4d, *start_p4d;
345
346 start_p4d = p4d = stage2_p4d_offset(kvm, pgd, addr);
347 do {
348 next = stage2_p4d_addr_end(kvm, addr, end);
349 if (!stage2_p4d_none(kvm, *p4d))
350 unmap_stage2_puds(kvm, p4d, addr, next);
351 } while (p4d++, addr = next, addr != end);
352
353 if (stage2_p4d_table_empty(kvm, start_p4d))
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000354 clear_stage2_pgd_entry(kvm, pgd, start_addr);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200355}
356
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000357/**
358 * unmap_stage2_range -- Clear stage2 page table entries to unmap a range
359 * @kvm: The VM pointer
360 * @start: The intermediate physical base address of the range to unmap
361 * @size: The size of the area to unmap
362 *
363 * Clear a range of stage-2 mappings, lowering the various ref-counts. Must
364 * be called while holding mmu_lock (unless for freeing the stage2 pgd before
365 * destroying the VM), otherwise another faulting VCPU may come in and mess
366 * with things behind our backs.
367 */
368static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
Christoffer Dall4f853a72014-05-09 23:31:31 +0200369{
370 pgd_t *pgd;
371 phys_addr_t addr = start, end = start + size;
372 phys_addr_t next;
373
Suzuki K Poulose8b3405e2017-04-03 15:12:43 +0100374 assert_spin_locked(&kvm->mmu_lock);
Jia He47a91b72018-05-21 11:05:30 +0800375 WARN_ON(size & ~PAGE_MASK);
376
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100377 pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200378 do {
Suzuki K Poulose0c428a6a2017-05-16 10:34:55 +0100379 /*
380 * Make sure the page table is still active, as another thread
381 * could have possibly freed the page table, while we released
382 * the lock.
383 */
384 if (!READ_ONCE(kvm->arch.pgd))
385 break;
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100386 next = stage2_pgd_addr_end(kvm, addr, end);
387 if (!stage2_pgd_none(kvm, *pgd))
Mike Rapoporte9f63762020-06-04 16:46:23 -0700388 unmap_stage2_p4ds(kvm, pgd, addr, next);
Suzuki K Poulose8b3405e2017-04-03 15:12:43 +0100389 /*
390 * If the range is too large, release the kvm->mmu_lock
391 * to prevent starvation and lockup detector warnings.
392 */
393 if (next != end)
394 cond_resched_lock(&kvm->mmu_lock);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200395 } while (pgd++, addr = next, addr != end);
Marc Zyngier000d3992013-03-05 02:43:17 +0000396}
397
Marc Zyngier9d218a12014-01-15 12:50:23 +0000398static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
399 phys_addr_t addr, phys_addr_t end)
400{
401 pte_t *pte;
402
403 pte = pte_offset_kernel(pmd, addr);
404 do {
Ard Biesheuvel0de58f82015-12-03 09:25:22 +0100405 if (!pte_none(*pte) && !kvm_is_device_pfn(pte_pfn(*pte)))
Marc Zyngier363ef892014-12-19 16:48:06 +0000406 kvm_flush_dcache_pte(*pte);
Marc Zyngier9d218a12014-01-15 12:50:23 +0000407 } while (pte++, addr += PAGE_SIZE, addr != end);
408}
409
410static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
411 phys_addr_t addr, phys_addr_t end)
412{
413 pmd_t *pmd;
414 phys_addr_t next;
415
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100416 pmd = stage2_pmd_offset(kvm, pud, addr);
Marc Zyngier9d218a12014-01-15 12:50:23 +0000417 do {
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100418 next = stage2_pmd_addr_end(kvm, addr, end);
Marc Zyngier9d218a12014-01-15 12:50:23 +0000419 if (!pmd_none(*pmd)) {
Suzuki K Poulosebbb3b6b2016-03-01 12:00:39 +0000420 if (pmd_thp_or_huge(*pmd))
Marc Zyngier363ef892014-12-19 16:48:06 +0000421 kvm_flush_dcache_pmd(*pmd);
422 else
Marc Zyngier9d218a12014-01-15 12:50:23 +0000423 stage2_flush_ptes(kvm, pmd, addr, next);
Marc Zyngier9d218a12014-01-15 12:50:23 +0000424 }
425 } while (pmd++, addr = next, addr != end);
426}
427
Mike Rapoporte9f63762020-06-04 16:46:23 -0700428static void stage2_flush_puds(struct kvm *kvm, p4d_t *p4d,
Marc Zyngier9d218a12014-01-15 12:50:23 +0000429 phys_addr_t addr, phys_addr_t end)
430{
431 pud_t *pud;
432 phys_addr_t next;
433
Mike Rapoporte9f63762020-06-04 16:46:23 -0700434 pud = stage2_pud_offset(kvm, p4d, addr);
Marc Zyngier9d218a12014-01-15 12:50:23 +0000435 do {
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100436 next = stage2_pud_addr_end(kvm, addr, end);
437 if (!stage2_pud_none(kvm, *pud)) {
438 if (stage2_pud_huge(kvm, *pud))
Marc Zyngier363ef892014-12-19 16:48:06 +0000439 kvm_flush_dcache_pud(*pud);
440 else
Marc Zyngier9d218a12014-01-15 12:50:23 +0000441 stage2_flush_pmds(kvm, pud, addr, next);
Marc Zyngier9d218a12014-01-15 12:50:23 +0000442 }
443 } while (pud++, addr = next, addr != end);
444}
445
Mike Rapoporte9f63762020-06-04 16:46:23 -0700446static void stage2_flush_p4ds(struct kvm *kvm, pgd_t *pgd,
447 phys_addr_t addr, phys_addr_t end)
448{
449 p4d_t *p4d;
450 phys_addr_t next;
451
452 p4d = stage2_p4d_offset(kvm, pgd, addr);
453 do {
454 next = stage2_p4d_addr_end(kvm, addr, end);
455 if (!stage2_p4d_none(kvm, *p4d))
456 stage2_flush_puds(kvm, p4d, addr, next);
457 } while (p4d++, addr = next, addr != end);
458}
459
Marc Zyngier9d218a12014-01-15 12:50:23 +0000460static void stage2_flush_memslot(struct kvm *kvm,
461 struct kvm_memory_slot *memslot)
462{
463 phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
464 phys_addr_t end = addr + PAGE_SIZE * memslot->npages;
465 phys_addr_t next;
466 pgd_t *pgd;
467
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100468 pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr);
Marc Zyngier9d218a12014-01-15 12:50:23 +0000469 do {
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100470 next = stage2_pgd_addr_end(kvm, addr, end);
471 if (!stage2_pgd_none(kvm, *pgd))
Mike Rapoporte9f63762020-06-04 16:46:23 -0700472 stage2_flush_p4ds(kvm, pgd, addr, next);
Jiang Yi48c963e2020-04-15 10:42:29 +0200473
474 if (next != end)
475 cond_resched_lock(&kvm->mmu_lock);
Marc Zyngier9d218a12014-01-15 12:50:23 +0000476 } while (pgd++, addr = next, addr != end);
477}
478
479/**
480 * stage2_flush_vm - Invalidate cache for pages mapped in stage 2
481 * @kvm: The struct kvm pointer
482 *
483 * Go through the stage 2 page tables and invalidate any cache lines
484 * backing memory already mapped to the VM.
485 */
Marc Zyngier3c1e7162014-12-19 16:05:31 +0000486static void stage2_flush_vm(struct kvm *kvm)
Marc Zyngier9d218a12014-01-15 12:50:23 +0000487{
488 struct kvm_memslots *slots;
489 struct kvm_memory_slot *memslot;
490 int idx;
491
492 idx = srcu_read_lock(&kvm->srcu);
493 spin_lock(&kvm->mmu_lock);
494
495 slots = kvm_memslots(kvm);
496 kvm_for_each_memslot(memslot, slots)
497 stage2_flush_memslot(kvm, memslot);
498
499 spin_unlock(&kvm->mmu_lock);
500 srcu_read_unlock(&kvm->srcu, idx);
501}
502
Suzuki K Poulose64f32492016-03-22 18:56:21 +0000503static void clear_hyp_pgd_entry(pgd_t *pgd)
504{
Mike Rapoporte9f63762020-06-04 16:46:23 -0700505 p4d_t *p4d_table __maybe_unused = p4d_offset(pgd, 0UL);
Suzuki K Poulose64f32492016-03-22 18:56:21 +0000506 pgd_clear(pgd);
Mike Rapoporte9f63762020-06-04 16:46:23 -0700507 p4d_free(NULL, p4d_table);
Suzuki K Poulose64f32492016-03-22 18:56:21 +0000508 put_page(virt_to_page(pgd));
509}
510
Mike Rapoporte9f63762020-06-04 16:46:23 -0700511static void clear_hyp_p4d_entry(p4d_t *p4d)
512{
513 pud_t *pud_table __maybe_unused = pud_offset(p4d, 0UL);
514 VM_BUG_ON(p4d_huge(*p4d));
515 p4d_clear(p4d);
516 pud_free(NULL, pud_table);
517 put_page(virt_to_page(p4d));
518}
519
Suzuki K Poulose64f32492016-03-22 18:56:21 +0000520static void clear_hyp_pud_entry(pud_t *pud)
521{
522 pmd_t *pmd_table __maybe_unused = pmd_offset(pud, 0);
523 VM_BUG_ON(pud_huge(*pud));
524 pud_clear(pud);
525 pmd_free(NULL, pmd_table);
526 put_page(virt_to_page(pud));
527}
528
529static void clear_hyp_pmd_entry(pmd_t *pmd)
530{
531 pte_t *pte_table = pte_offset_kernel(pmd, 0);
532 VM_BUG_ON(pmd_thp_or_huge(*pmd));
533 pmd_clear(pmd);
534 pte_free_kernel(NULL, pte_table);
535 put_page(virt_to_page(pmd));
536}
537
538static void unmap_hyp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
539{
540 pte_t *pte, *start_pte;
541
542 start_pte = pte = pte_offset_kernel(pmd, addr);
543 do {
544 if (!pte_none(*pte)) {
545 kvm_set_pte(pte, __pte(0));
546 put_page(virt_to_page(pte));
547 }
548 } while (pte++, addr += PAGE_SIZE, addr != end);
549
550 if (hyp_pte_table_empty(start_pte))
551 clear_hyp_pmd_entry(pmd);
552}
553
554static void unmap_hyp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
555{
556 phys_addr_t next;
557 pmd_t *pmd, *start_pmd;
558
559 start_pmd = pmd = pmd_offset(pud, addr);
560 do {
561 next = pmd_addr_end(addr, end);
562 /* Hyp doesn't use huge pmds */
563 if (!pmd_none(*pmd))
564 unmap_hyp_ptes(pmd, addr, next);
565 } while (pmd++, addr = next, addr != end);
566
567 if (hyp_pmd_table_empty(start_pmd))
568 clear_hyp_pud_entry(pud);
569}
570
Mike Rapoporte9f63762020-06-04 16:46:23 -0700571static void unmap_hyp_puds(p4d_t *p4d, phys_addr_t addr, phys_addr_t end)
Suzuki K Poulose64f32492016-03-22 18:56:21 +0000572{
573 phys_addr_t next;
574 pud_t *pud, *start_pud;
575
Mike Rapoporte9f63762020-06-04 16:46:23 -0700576 start_pud = pud = pud_offset(p4d, addr);
Suzuki K Poulose64f32492016-03-22 18:56:21 +0000577 do {
578 next = pud_addr_end(addr, end);
579 /* Hyp doesn't use huge puds */
580 if (!pud_none(*pud))
581 unmap_hyp_pmds(pud, addr, next);
582 } while (pud++, addr = next, addr != end);
583
584 if (hyp_pud_table_empty(start_pud))
Mike Rapoporte9f63762020-06-04 16:46:23 -0700585 clear_hyp_p4d_entry(p4d);
586}
587
588static void unmap_hyp_p4ds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
589{
590 phys_addr_t next;
591 p4d_t *p4d, *start_p4d;
592
593 start_p4d = p4d = p4d_offset(pgd, addr);
594 do {
595 next = p4d_addr_end(addr, end);
596 /* Hyp doesn't use huge p4ds */
597 if (!p4d_none(*p4d))
598 unmap_hyp_puds(p4d, addr, next);
599 } while (p4d++, addr = next, addr != end);
600
601 if (hyp_p4d_table_empty(start_p4d))
Suzuki K Poulose64f32492016-03-22 18:56:21 +0000602 clear_hyp_pgd_entry(pgd);
603}
604
Marc Zyngier3ddd4552018-03-14 15:17:33 +0000605static unsigned int kvm_pgd_index(unsigned long addr, unsigned int ptrs_per_pgd)
606{
607 return (addr >> PGDIR_SHIFT) & (ptrs_per_pgd - 1);
608}
609
610static void __unmap_hyp_range(pgd_t *pgdp, unsigned long ptrs_per_pgd,
611 phys_addr_t start, u64 size)
Suzuki K Poulose64f32492016-03-22 18:56:21 +0000612{
613 pgd_t *pgd;
614 phys_addr_t addr = start, end = start + size;
615 phys_addr_t next;
616
617 /*
618 * We don't unmap anything from HYP, except at the hyp tear down.
619 * Hence, we don't have to invalidate the TLBs here.
620 */
Marc Zyngier3ddd4552018-03-14 15:17:33 +0000621 pgd = pgdp + kvm_pgd_index(addr, ptrs_per_pgd);
Suzuki K Poulose64f32492016-03-22 18:56:21 +0000622 do {
623 next = pgd_addr_end(addr, end);
624 if (!pgd_none(*pgd))
Mike Rapoporte9f63762020-06-04 16:46:23 -0700625 unmap_hyp_p4ds(pgd, addr, next);
Suzuki K Poulose64f32492016-03-22 18:56:21 +0000626 } while (pgd++, addr = next, addr != end);
627}
628
Marc Zyngier3ddd4552018-03-14 15:17:33 +0000629static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
630{
631 __unmap_hyp_range(pgdp, PTRS_PER_PGD, start, size);
632}
633
634static void unmap_hyp_idmap_range(pgd_t *pgdp, phys_addr_t start, u64 size)
635{
636 __unmap_hyp_range(pgdp, __kvm_idmap_ptrs_per_pgd(), start, size);
637}
638
Marc Zyngier000d3992013-03-05 02:43:17 +0000639/**
Marc Zyngier4f728272013-04-12 19:12:05 +0100640 * free_hyp_pgds - free Hyp-mode page tables
Marc Zyngier000d3992013-03-05 02:43:17 +0000641 *
Marc Zyngier5a677ce2013-04-12 19:12:06 +0100642 * Assumes hyp_pgd is a page table used strictly in Hyp-mode and
643 * therefore contains either mappings in the kernel memory area (above
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000644 * PAGE_OFFSET), or device mappings in the idmap range.
Marc Zyngier5a677ce2013-04-12 19:12:06 +0100645 *
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000646 * boot_hyp_pgd should only map the idmap range, and is only used in
647 * the extended idmap case.
Marc Zyngier000d3992013-03-05 02:43:17 +0000648 */
Marc Zyngier4f728272013-04-12 19:12:05 +0100649void free_hyp_pgds(void)
Marc Zyngier000d3992013-03-05 02:43:17 +0000650{
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000651 pgd_t *id_pgd;
652
Marc Zyngierd157f4a2013-04-12 19:12:07 +0100653 mutex_lock(&kvm_hyp_pgd_mutex);
Marc Zyngier5a677ce2013-04-12 19:12:06 +0100654
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000655 id_pgd = boot_hyp_pgd ? boot_hyp_pgd : hyp_pgd;
656
657 if (id_pgd) {
658 /* In case we never called hyp_mmu_init() */
659 if (!io_map_base)
660 io_map_base = hyp_idmap_start;
661 unmap_hyp_idmap_range(id_pgd, io_map_base,
662 hyp_idmap_start + PAGE_SIZE - io_map_base);
663 }
664
Marc Zyngier26781f9c2016-06-30 18:40:46 +0100665 if (boot_hyp_pgd) {
Marc Zyngier26781f9c2016-06-30 18:40:46 +0100666 free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order);
667 boot_hyp_pgd = NULL;
668 }
669
Marc Zyngier4f728272013-04-12 19:12:05 +0100670 if (hyp_pgd) {
Marc Zyngier7839c672017-12-07 11:45:45 +0000671 unmap_hyp_range(hyp_pgd, kern_hyp_va(PAGE_OFFSET),
672 (uintptr_t)high_memory - PAGE_OFFSET);
Marc Zyngierd4cb9df52013-05-14 12:11:34 +0100673
Christoffer Dall38f791a2014-10-10 12:14:28 +0200674 free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
Marc Zyngierd157f4a2013-04-12 19:12:07 +0100675 hyp_pgd = NULL;
Marc Zyngier4f728272013-04-12 19:12:05 +0100676 }
Ard Biesheuvele4c5a682015-03-19 16:42:28 +0000677 if (merged_hyp_pgd) {
678 clear_page(merged_hyp_pgd);
679 free_page((unsigned long)merged_hyp_pgd);
680 merged_hyp_pgd = NULL;
681 }
Marc Zyngier4f728272013-04-12 19:12:05 +0100682
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500683 mutex_unlock(&kvm_hyp_pgd_mutex);
684}
685
686static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
Marc Zyngier6060df82013-04-12 19:12:01 +0100687 unsigned long end, unsigned long pfn,
688 pgprot_t prot)
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500689{
690 pte_t *pte;
691 unsigned long addr;
692
Marc Zyngier3562c762013-04-12 19:12:02 +0100693 addr = start;
694 do {
Marc Zyngier6060df82013-04-12 19:12:01 +0100695 pte = pte_offset_kernel(pmd, addr);
Punit Agrawalf8df7332018-12-11 17:10:36 +0000696 kvm_set_pte(pte, kvm_pfn_pte(pfn, prot));
Marc Zyngier4f728272013-04-12 19:12:05 +0100697 get_page(virt_to_page(pte));
Marc Zyngier6060df82013-04-12 19:12:01 +0100698 pfn++;
Marc Zyngier3562c762013-04-12 19:12:02 +0100699 } while (addr += PAGE_SIZE, addr != end);
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500700}
701
702static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
Marc Zyngier6060df82013-04-12 19:12:01 +0100703 unsigned long end, unsigned long pfn,
704 pgprot_t prot)
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500705{
706 pmd_t *pmd;
707 pte_t *pte;
708 unsigned long addr, next;
709
Marc Zyngier3562c762013-04-12 19:12:02 +0100710 addr = start;
711 do {
Marc Zyngier6060df82013-04-12 19:12:01 +0100712 pmd = pmd_offset(pud, addr);
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500713
714 BUG_ON(pmd_sect(*pmd));
715
716 if (pmd_none(*pmd)) {
Joel Fernandes (Google)4cf58922019-01-03 15:28:34 -0800717 pte = pte_alloc_one_kernel(NULL);
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500718 if (!pte) {
719 kvm_err("Cannot allocate Hyp pte\n");
720 return -ENOMEM;
721 }
Marc Zyngier0db9dd82018-06-27 15:51:05 +0100722 kvm_pmd_populate(pmd, pte);
Marc Zyngier4f728272013-04-12 19:12:05 +0100723 get_page(virt_to_page(pmd));
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500724 }
725
726 next = pmd_addr_end(addr, end);
727
Marc Zyngier6060df82013-04-12 19:12:01 +0100728 create_hyp_pte_mappings(pmd, addr, next, pfn, prot);
729 pfn += (next - addr) >> PAGE_SHIFT;
Marc Zyngier3562c762013-04-12 19:12:02 +0100730 } while (addr = next, addr != end);
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500731
732 return 0;
733}
734
Mike Rapoporte9f63762020-06-04 16:46:23 -0700735static int create_hyp_pud_mappings(p4d_t *p4d, unsigned long start,
Christoffer Dall38f791a2014-10-10 12:14:28 +0200736 unsigned long end, unsigned long pfn,
737 pgprot_t prot)
738{
739 pud_t *pud;
740 pmd_t *pmd;
741 unsigned long addr, next;
742 int ret;
743
744 addr = start;
745 do {
Mike Rapoporte9f63762020-06-04 16:46:23 -0700746 pud = pud_offset(p4d, addr);
Christoffer Dall38f791a2014-10-10 12:14:28 +0200747
748 if (pud_none_or_clear_bad(pud)) {
749 pmd = pmd_alloc_one(NULL, addr);
750 if (!pmd) {
751 kvm_err("Cannot allocate Hyp pmd\n");
752 return -ENOMEM;
753 }
Marc Zyngier0db9dd82018-06-27 15:51:05 +0100754 kvm_pud_populate(pud, pmd);
Christoffer Dall38f791a2014-10-10 12:14:28 +0200755 get_page(virt_to_page(pud));
Christoffer Dall38f791a2014-10-10 12:14:28 +0200756 }
757
758 next = pud_addr_end(addr, end);
759 ret = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
760 if (ret)
761 return ret;
762 pfn += (next - addr) >> PAGE_SHIFT;
763 } while (addr = next, addr != end);
764
765 return 0;
766}
767
Mike Rapoporte9f63762020-06-04 16:46:23 -0700768static int create_hyp_p4d_mappings(pgd_t *pgd, unsigned long start,
769 unsigned long end, unsigned long pfn,
770 pgprot_t prot)
771{
772 p4d_t *p4d;
773 pud_t *pud;
774 unsigned long addr, next;
775 int ret;
776
777 addr = start;
778 do {
779 p4d = p4d_offset(pgd, addr);
780
781 if (p4d_none(*p4d)) {
782 pud = pud_alloc_one(NULL, addr);
783 if (!pud) {
784 kvm_err("Cannot allocate Hyp pud\n");
785 return -ENOMEM;
786 }
787 kvm_p4d_populate(p4d, pud);
788 get_page(virt_to_page(p4d));
789 }
790
791 next = p4d_addr_end(addr, end);
792 ret = create_hyp_pud_mappings(p4d, addr, next, pfn, prot);
793 if (ret)
794 return ret;
795 pfn += (next - addr) >> PAGE_SHIFT;
796 } while (addr = next, addr != end);
797
798 return 0;
799}
800
Kristina Martsenko98732d12018-01-15 15:23:49 +0000801static int __create_hyp_mappings(pgd_t *pgdp, unsigned long ptrs_per_pgd,
Marc Zyngier6060df82013-04-12 19:12:01 +0100802 unsigned long start, unsigned long end,
803 unsigned long pfn, pgprot_t prot)
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500804{
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500805 pgd_t *pgd;
Mike Rapoporte9f63762020-06-04 16:46:23 -0700806 p4d_t *p4d;
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500807 unsigned long addr, next;
808 int err = 0;
809
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500810 mutex_lock(&kvm_hyp_pgd_mutex);
Marc Zyngier3562c762013-04-12 19:12:02 +0100811 addr = start & PAGE_MASK;
812 end = PAGE_ALIGN(end);
813 do {
Marc Zyngier3ddd4552018-03-14 15:17:33 +0000814 pgd = pgdp + kvm_pgd_index(addr, ptrs_per_pgd);
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500815
Christoffer Dall38f791a2014-10-10 12:14:28 +0200816 if (pgd_none(*pgd)) {
Mike Rapoporte9f63762020-06-04 16:46:23 -0700817 p4d = p4d_alloc_one(NULL, addr);
818 if (!p4d) {
819 kvm_err("Cannot allocate Hyp p4d\n");
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500820 err = -ENOMEM;
821 goto out;
822 }
Mike Rapoporte9f63762020-06-04 16:46:23 -0700823 kvm_pgd_populate(pgd, p4d);
Christoffer Dall38f791a2014-10-10 12:14:28 +0200824 get_page(virt_to_page(pgd));
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500825 }
826
827 next = pgd_addr_end(addr, end);
Mike Rapoporte9f63762020-06-04 16:46:23 -0700828 err = create_hyp_p4d_mappings(pgd, addr, next, pfn, prot);
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500829 if (err)
830 goto out;
Marc Zyngier6060df82013-04-12 19:12:01 +0100831 pfn += (next - addr) >> PAGE_SHIFT;
Marc Zyngier3562c762013-04-12 19:12:02 +0100832 } while (addr = next, addr != end);
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500833out:
834 mutex_unlock(&kvm_hyp_pgd_mutex);
835 return err;
836}
837
Christoffer Dall40c27292013-11-15 13:14:12 -0800838static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
839{
840 if (!is_vmalloc_addr(kaddr)) {
841 BUG_ON(!virt_addr_valid(kaddr));
842 return __pa(kaddr);
843 } else {
844 return page_to_phys(vmalloc_to_page(kaddr)) +
845 offset_in_page(kaddr);
846 }
847}
848
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500849/**
Marc Zyngier06e8c3b2012-10-28 01:09:14 +0100850 * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500851 * @from: The virtual kernel start address of the range
852 * @to: The virtual kernel end address of the range (exclusive)
Marc Zyngierc8dddec2016-06-13 15:00:45 +0100853 * @prot: The protection to be applied to this range
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500854 *
Marc Zyngier06e8c3b2012-10-28 01:09:14 +0100855 * The same virtual address as the kernel virtual address is also used
856 * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying
857 * physical pages.
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500858 */
Marc Zyngierc8dddec2016-06-13 15:00:45 +0100859int create_hyp_mappings(void *from, void *to, pgprot_t prot)
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500860{
Christoffer Dall40c27292013-11-15 13:14:12 -0800861 phys_addr_t phys_addr;
862 unsigned long virt_addr;
Marc Zyngier6c41a412016-06-30 18:40:51 +0100863 unsigned long start = kern_hyp_va((unsigned long)from);
864 unsigned long end = kern_hyp_va((unsigned long)to);
Marc Zyngier6060df82013-04-12 19:12:01 +0100865
Marc Zyngier1e947ba2015-01-29 11:59:54 +0000866 if (is_kernel_in_hyp_mode())
867 return 0;
868
Christoffer Dall40c27292013-11-15 13:14:12 -0800869 start = start & PAGE_MASK;
870 end = PAGE_ALIGN(end);
Marc Zyngier6060df82013-04-12 19:12:01 +0100871
Christoffer Dall40c27292013-11-15 13:14:12 -0800872 for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) {
873 int err;
874
875 phys_addr = kvm_kaddr_to_phys(from + virt_addr - start);
Kristina Martsenko98732d12018-01-15 15:23:49 +0000876 err = __create_hyp_mappings(hyp_pgd, PTRS_PER_PGD,
877 virt_addr, virt_addr + PAGE_SIZE,
Christoffer Dall40c27292013-11-15 13:14:12 -0800878 __phys_to_pfn(phys_addr),
Marc Zyngierc8dddec2016-06-13 15:00:45 +0100879 prot);
Christoffer Dall40c27292013-11-15 13:14:12 -0800880 if (err)
881 return err;
882 }
883
884 return 0;
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500885}
886
Marc Zyngierdc2e4632018-02-13 11:00:29 +0000887static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
888 unsigned long *haddr, pgprot_t prot)
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500889{
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000890 pgd_t *pgd = hyp_pgd;
891 unsigned long base;
892 int ret = 0;
Marc Zyngier6060df82013-04-12 19:12:01 +0100893
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000894 mutex_lock(&kvm_hyp_pgd_mutex);
Marc Zyngier6060df82013-04-12 19:12:01 +0100895
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000896 /*
Fuad Tabba656012c2020-04-01 15:03:10 +0100897 * This assumes that we have enough space below the idmap
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000898 * page to allocate our VAs. If not, the check below will
899 * kick. A potential alternative would be to detect that
900 * overflow and switch to an allocation above the idmap.
901 *
902 * The allocated size is always a multiple of PAGE_SIZE.
903 */
904 size = PAGE_ALIGN(size + offset_in_page(phys_addr));
905 base = io_map_base - size;
Marc Zyngier1bb32a42017-12-04 16:43:23 +0000906
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000907 /*
908 * Verify that BIT(VA_BITS - 1) hasn't been flipped by
909 * allocating the new area, as it would indicate we've
910 * overflowed the idmap/IO address range.
911 */
912 if ((base ^ io_map_base) & BIT(VA_BITS - 1))
913 ret = -ENOMEM;
914 else
915 io_map_base = base;
916
917 mutex_unlock(&kvm_hyp_pgd_mutex);
918
919 if (ret)
920 goto out;
921
922 if (__kvm_cpu_uses_extended_idmap())
923 pgd = boot_hyp_pgd;
924
925 ret = __create_hyp_mappings(pgd, __kvm_idmap_ptrs_per_pgd(),
926 base, base + size,
Marc Zyngierdc2e4632018-02-13 11:00:29 +0000927 __phys_to_pfn(phys_addr), prot);
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000928 if (ret)
929 goto out;
930
Marc Zyngierdc2e4632018-02-13 11:00:29 +0000931 *haddr = base + offset_in_page(phys_addr);
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000932
933out:
Marc Zyngierdc2e4632018-02-13 11:00:29 +0000934 return ret;
935}
936
937/**
938 * create_hyp_io_mappings - Map IO into both kernel and HYP
939 * @phys_addr: The physical start address which gets mapped
940 * @size: Size of the region being mapped
941 * @kaddr: Kernel VA for this mapping
942 * @haddr: HYP VA for this mapping
943 */
944int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
945 void __iomem **kaddr,
946 void __iomem **haddr)
947{
948 unsigned long addr;
949 int ret;
950
951 *kaddr = ioremap(phys_addr, size);
952 if (!*kaddr)
953 return -ENOMEM;
954
955 if (is_kernel_in_hyp_mode()) {
956 *haddr = *kaddr;
957 return 0;
958 }
959
960 ret = __create_hyp_private_mapping(phys_addr, size,
961 &addr, PAGE_HYP_DEVICE);
Marc Zyngier1bb32a42017-12-04 16:43:23 +0000962 if (ret) {
963 iounmap(*kaddr);
964 *kaddr = NULL;
Marc Zyngierdc2e4632018-02-13 11:00:29 +0000965 *haddr = NULL;
Marc Zyngier1bb32a42017-12-04 16:43:23 +0000966 return ret;
967 }
968
Marc Zyngierdc2e4632018-02-13 11:00:29 +0000969 *haddr = (void __iomem *)addr;
970 return 0;
971}
972
973/**
974 * create_hyp_exec_mappings - Map an executable range into HYP
975 * @phys_addr: The physical start address which gets mapped
976 * @size: Size of the region being mapped
977 * @haddr: HYP VA for this mapping
978 */
979int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
980 void **haddr)
981{
982 unsigned long addr;
983 int ret;
984
985 BUG_ON(is_kernel_in_hyp_mode());
986
987 ret = __create_hyp_private_mapping(phys_addr, size,
988 &addr, PAGE_HYP_EXEC);
989 if (ret) {
990 *haddr = NULL;
991 return ret;
992 }
993
994 *haddr = (void *)addr;
Marc Zyngier1bb32a42017-12-04 16:43:23 +0000995 return 0;
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500996}
997
Christoffer Dalld5d81842013-01-20 18:28:07 -0500998/**
999 * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
1000 * @kvm: The KVM struct pointer for the VM.
1001 *
Zenghui Yu8324c3d2019-03-25 08:02:05 +00001002 * Allocates only the stage-2 HW PGD level table(s) of size defined by
1003 * stage2_pgd_size(kvm).
Christoffer Dalld5d81842013-01-20 18:28:07 -05001004 *
1005 * Note we don't need locking here as this is only called when the VM is
1006 * created, which can only be done once.
1007 */
1008int kvm_alloc_stage2_pgd(struct kvm *kvm)
1009{
Christoffer Dalle329fb72018-12-11 15:26:31 +01001010 phys_addr_t pgd_phys;
Christoffer Dalld5d81842013-01-20 18:28:07 -05001011 pgd_t *pgd;
1012
1013 if (kvm->arch.pgd != NULL) {
1014 kvm_err("kvm_arch already initialized?\n");
1015 return -EINVAL;
1016 }
1017
Suzuki K Poulose9163ee232016-03-22 17:01:21 +00001018 /* Allocate the HW PGD, making sure that each page gets its own refcount */
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001019 pgd = alloc_pages_exact(stage2_pgd_size(kvm), GFP_KERNEL | __GFP_ZERO);
Suzuki K Poulose9163ee232016-03-22 17:01:21 +00001020 if (!pgd)
Marc Zyngiera9873702015-03-10 19:06:59 +00001021 return -ENOMEM;
1022
Christoffer Dalle329fb72018-12-11 15:26:31 +01001023 pgd_phys = virt_to_phys(pgd);
1024 if (WARN_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm)))
1025 return -EINVAL;
1026
Christoffer Dalld5d81842013-01-20 18:28:07 -05001027 kvm->arch.pgd = pgd;
Christoffer Dalle329fb72018-12-11 15:26:31 +01001028 kvm->arch.pgd_phys = pgd_phys;
Christoffer Dalld5d81842013-01-20 18:28:07 -05001029 return 0;
1030}
1031
Christoffer Dall957db102014-11-27 10:35:03 +01001032static void stage2_unmap_memslot(struct kvm *kvm,
1033 struct kvm_memory_slot *memslot)
1034{
1035 hva_t hva = memslot->userspace_addr;
1036 phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
1037 phys_addr_t size = PAGE_SIZE * memslot->npages;
1038 hva_t reg_end = hva + size;
1039
1040 /*
1041 * A memory region could potentially cover multiple VMAs, and any holes
1042 * between them, so iterate over all of them to find out if we should
1043 * unmap any of them.
1044 *
1045 * +--------------------------------------------+
1046 * +---------------+----------------+ +----------------+
1047 * | : VMA 1 | VMA 2 | | VMA 3 : |
1048 * +---------------+----------------+ +----------------+
1049 * | memory region |
1050 * +--------------------------------------------+
1051 */
1052 do {
1053 struct vm_area_struct *vma = find_vma(current->mm, hva);
1054 hva_t vm_start, vm_end;
1055
1056 if (!vma || vma->vm_start >= reg_end)
1057 break;
1058
1059 /*
1060 * Take the intersection of this VMA with the memory region
1061 */
1062 vm_start = max(hva, vma->vm_start);
1063 vm_end = min(reg_end, vma->vm_end);
1064
1065 if (!(vma->vm_flags & VM_PFNMAP)) {
1066 gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
1067 unmap_stage2_range(kvm, gpa, vm_end - vm_start);
1068 }
1069 hva = vm_end;
1070 } while (hva < reg_end);
1071}
1072
1073/**
1074 * stage2_unmap_vm - Unmap Stage-2 RAM mappings
1075 * @kvm: The struct kvm pointer
1076 *
Fuad Tabba656012c2020-04-01 15:03:10 +01001077 * Go through the memregions and unmap any regular RAM
Christoffer Dall957db102014-11-27 10:35:03 +01001078 * backing memory already mapped to the VM.
1079 */
1080void stage2_unmap_vm(struct kvm *kvm)
1081{
1082 struct kvm_memslots *slots;
1083 struct kvm_memory_slot *memslot;
1084 int idx;
1085
1086 idx = srcu_read_lock(&kvm->srcu);
Michel Lespinasse89154dd2020-06-08 21:33:29 -07001087 mmap_read_lock(current->mm);
Christoffer Dall957db102014-11-27 10:35:03 +01001088 spin_lock(&kvm->mmu_lock);
1089
1090 slots = kvm_memslots(kvm);
1091 kvm_for_each_memslot(memslot, slots)
1092 stage2_unmap_memslot(kvm, memslot);
1093
1094 spin_unlock(&kvm->mmu_lock);
Michel Lespinasse89154dd2020-06-08 21:33:29 -07001095 mmap_read_unlock(current->mm);
Christoffer Dall957db102014-11-27 10:35:03 +01001096 srcu_read_unlock(&kvm->srcu, idx);
1097}
1098
Christoffer Dalld5d81842013-01-20 18:28:07 -05001099/**
1100 * kvm_free_stage2_pgd - free all stage-2 tables
1101 * @kvm: The KVM struct pointer for the VM.
1102 *
1103 * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all
1104 * underlying level-2 and level-3 tables before freeing the actual level-1 table
1105 * and setting the struct pointer to NULL.
Christoffer Dalld5d81842013-01-20 18:28:07 -05001106 */
1107void kvm_free_stage2_pgd(struct kvm *kvm)
1108{
Suzuki K Poulose6c0d7062017-05-03 15:17:51 +01001109 void *pgd = NULL;
Christoffer Dalld5d81842013-01-20 18:28:07 -05001110
Suzuki K Poulose8b3405e2017-04-03 15:12:43 +01001111 spin_lock(&kvm->mmu_lock);
Suzuki K Poulose6c0d7062017-05-03 15:17:51 +01001112 if (kvm->arch.pgd) {
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001113 unmap_stage2_range(kvm, 0, kvm_phys_size(kvm));
Suzuki K Poulose2952a602017-05-16 10:34:54 +01001114 pgd = READ_ONCE(kvm->arch.pgd);
Suzuki K Poulose6c0d7062017-05-03 15:17:51 +01001115 kvm->arch.pgd = NULL;
Christoffer Dalle329fb72018-12-11 15:26:31 +01001116 kvm->arch.pgd_phys = 0;
Suzuki K Poulose6c0d7062017-05-03 15:17:51 +01001117 }
Suzuki K Poulose8b3405e2017-04-03 15:12:43 +01001118 spin_unlock(&kvm->mmu_lock);
1119
Suzuki K Poulose9163ee232016-03-22 17:01:21 +00001120 /* Free the HW pgd, one page at a time */
Suzuki K Poulose6c0d7062017-05-03 15:17:51 +01001121 if (pgd)
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001122 free_pages_exact(pgd, stage2_pgd_size(kvm));
Christoffer Dalld5d81842013-01-20 18:28:07 -05001123}
1124
Mike Rapoporte9f63762020-06-04 16:46:23 -07001125static p4d_t *stage2_get_p4d(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
Christoffer Dall38f791a2014-10-10 12:14:28 +02001126 phys_addr_t addr)
1127{
1128 pgd_t *pgd;
Mike Rapoporte9f63762020-06-04 16:46:23 -07001129 p4d_t *p4d;
Christoffer Dall38f791a2014-10-10 12:14:28 +02001130
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001131 pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr);
1132 if (stage2_pgd_none(kvm, *pgd)) {
Christoffer Dall38f791a2014-10-10 12:14:28 +02001133 if (!cache)
1134 return NULL;
Mike Rapoporte9f63762020-06-04 16:46:23 -07001135 p4d = mmu_memory_cache_alloc(cache);
1136 stage2_pgd_populate(kvm, pgd, p4d);
Christoffer Dall38f791a2014-10-10 12:14:28 +02001137 get_page(virt_to_page(pgd));
1138 }
1139
Mike Rapoporte9f63762020-06-04 16:46:23 -07001140 return stage2_p4d_offset(kvm, pgd, addr);
1141}
1142
1143static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
1144 phys_addr_t addr)
1145{
1146 p4d_t *p4d;
1147 pud_t *pud;
1148
1149 p4d = stage2_get_p4d(kvm, cache, addr);
1150 if (stage2_p4d_none(kvm, *p4d)) {
1151 if (!cache)
1152 return NULL;
1153 pud = mmu_memory_cache_alloc(cache);
1154 stage2_p4d_populate(kvm, p4d, pud);
1155 get_page(virt_to_page(p4d));
1156 }
1157
1158 return stage2_pud_offset(kvm, p4d, addr);
Christoffer Dall38f791a2014-10-10 12:14:28 +02001159}
1160
Christoffer Dallad361f02012-11-01 17:14:45 +01001161static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
1162 phys_addr_t addr)
Christoffer Dalld5d81842013-01-20 18:28:07 -05001163{
Christoffer Dalld5d81842013-01-20 18:28:07 -05001164 pud_t *pud;
1165 pmd_t *pmd;
Christoffer Dalld5d81842013-01-20 18:28:07 -05001166
Christoffer Dall38f791a2014-10-10 12:14:28 +02001167 pud = stage2_get_pud(kvm, cache, addr);
Punit Agrawalb8e0ba72018-12-11 17:10:41 +00001168 if (!pud || stage2_pud_huge(kvm, *pud))
Marc Zyngierd6dbdd32017-06-05 19:17:18 +01001169 return NULL;
1170
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001171 if (stage2_pud_none(kvm, *pud)) {
Christoffer Dalld5d81842013-01-20 18:28:07 -05001172 if (!cache)
Christoffer Dallad361f02012-11-01 17:14:45 +01001173 return NULL;
Christoffer Dalld5d81842013-01-20 18:28:07 -05001174 pmd = mmu_memory_cache_alloc(cache);
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001175 stage2_pud_populate(kvm, pud, pmd);
Christoffer Dalld5d81842013-01-20 18:28:07 -05001176 get_page(virt_to_page(pud));
Marc Zyngierc62ee2b2012-10-15 11:27:37 +01001177 }
1178
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001179 return stage2_pmd_offset(kvm, pud, addr);
Christoffer Dallad361f02012-11-01 17:14:45 +01001180}
Christoffer Dalld5d81842013-01-20 18:28:07 -05001181
Christoffer Dallad361f02012-11-01 17:14:45 +01001182static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
1183 *cache, phys_addr_t addr, const pmd_t *new_pmd)
1184{
1185 pmd_t *pmd, old_pmd;
1186
Suzuki K Poulose3c3736c2019-03-20 14:57:19 +00001187retry:
Christoffer Dallad361f02012-11-01 17:14:45 +01001188 pmd = stage2_get_pmd(kvm, cache, addr);
1189 VM_BUG_ON(!pmd);
1190
Christoffer Dallad361f02012-11-01 17:14:45 +01001191 old_pmd = *pmd;
Suzuki K Poulose3c3736c2019-03-20 14:57:19 +00001192 /*
1193 * Multiple vcpus faulting on the same PMD entry, can
1194 * lead to them sequentially updating the PMD with the
1195 * same value. Following the break-before-make
1196 * (pmd_clear() followed by tlb_flush()) process can
1197 * hinder forward progress due to refaults generated
1198 * on missing translations.
1199 *
1200 * Skip updating the page table if the entry is
1201 * unchanged.
1202 */
1203 if (pmd_val(old_pmd) == pmd_val(*new_pmd))
1204 return 0;
1205
Marc Zyngierd4b9e072016-04-28 16:16:31 +01001206 if (pmd_present(old_pmd)) {
Punit Agrawal86658b82018-08-13 11:43:50 +01001207 /*
Suzuki K Poulose3c3736c2019-03-20 14:57:19 +00001208 * If we already have PTE level mapping for this block,
1209 * we must unmap it to avoid inconsistent TLB state and
1210 * leaking the table page. We could end up in this situation
1211 * if the memory slot was marked for dirty logging and was
1212 * reverted, leaving PTE level mappings for the pages accessed
1213 * during the period. So, unmap the PTE level mapping for this
1214 * block and retry, as we could have released the upper level
1215 * table in the process.
Punit Agrawal86658b82018-08-13 11:43:50 +01001216 *
Suzuki K Poulose3c3736c2019-03-20 14:57:19 +00001217 * Normal THP split/merge follows mmu_notifier callbacks and do
1218 * get handled accordingly.
Punit Agrawal86658b82018-08-13 11:43:50 +01001219 */
Suzuki K Poulose3c3736c2019-03-20 14:57:19 +00001220 if (!pmd_thp_or_huge(old_pmd)) {
1221 unmap_stage2_range(kvm, addr & S2_PMD_MASK, S2_PMD_SIZE);
1222 goto retry;
1223 }
Punit Agrawal86658b82018-08-13 11:43:50 +01001224 /*
1225 * Mapping in huge pages should only happen through a
1226 * fault. If a page is merged into a transparent huge
1227 * page, the individual subpages of that huge page
1228 * should be unmapped through MMU notifiers before we
1229 * get here.
1230 *
1231 * Merging of CompoundPages is not supported; they
1232 * should become splitting first, unmapped, merged,
1233 * and mapped back in on-demand.
1234 */
Suzuki K Poulose3c3736c2019-03-20 14:57:19 +00001235 WARN_ON_ONCE(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd));
Marc Zyngierd4b9e072016-04-28 16:16:31 +01001236 pmd_clear(pmd);
Christoffer Dallad361f02012-11-01 17:14:45 +01001237 kvm_tlb_flush_vmid_ipa(kvm, addr);
Marc Zyngierd4b9e072016-04-28 16:16:31 +01001238 } else {
Christoffer Dallad361f02012-11-01 17:14:45 +01001239 get_page(virt_to_page(pmd));
Marc Zyngierd4b9e072016-04-28 16:16:31 +01001240 }
1241
1242 kvm_set_pmd(pmd, *new_pmd);
Christoffer Dallad361f02012-11-01 17:14:45 +01001243 return 0;
1244}
1245
Punit Agrawalb8e0ba72018-12-11 17:10:41 +00001246static int stage2_set_pud_huge(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
1247 phys_addr_t addr, const pud_t *new_pudp)
1248{
1249 pud_t *pudp, old_pud;
1250
Suzuki K Poulose3c3736c2019-03-20 14:57:19 +00001251retry:
Punit Agrawalb8e0ba72018-12-11 17:10:41 +00001252 pudp = stage2_get_pud(kvm, cache, addr);
1253 VM_BUG_ON(!pudp);
1254
1255 old_pud = *pudp;
1256
1257 /*
1258 * A large number of vcpus faulting on the same stage 2 entry,
Suzuki K Poulose3c3736c2019-03-20 14:57:19 +00001259 * can lead to a refault due to the stage2_pud_clear()/tlb_flush().
1260 * Skip updating the page tables if there is no change.
Punit Agrawalb8e0ba72018-12-11 17:10:41 +00001261 */
1262 if (pud_val(old_pud) == pud_val(*new_pudp))
1263 return 0;
1264
1265 if (stage2_pud_present(kvm, old_pud)) {
Suzuki K Poulose3c3736c2019-03-20 14:57:19 +00001266 /*
1267 * If we already have table level mapping for this block, unmap
1268 * the range for this block and retry.
1269 */
1270 if (!stage2_pud_huge(kvm, old_pud)) {
1271 unmap_stage2_range(kvm, addr & S2_PUD_MASK, S2_PUD_SIZE);
1272 goto retry;
1273 }
1274
1275 WARN_ON_ONCE(kvm_pud_pfn(old_pud) != kvm_pud_pfn(*new_pudp));
Punit Agrawalb8e0ba72018-12-11 17:10:41 +00001276 stage2_pud_clear(kvm, pudp);
1277 kvm_tlb_flush_vmid_ipa(kvm, addr);
1278 } else {
1279 get_page(virt_to_page(pudp));
1280 }
1281
1282 kvm_set_pud(pudp, *new_pudp);
1283 return 0;
1284}
1285
Punit Agrawal86d1c552018-12-11 17:10:38 +00001286/*
1287 * stage2_get_leaf_entry - walk the stage2 VM page tables and return
1288 * true if a valid and present leaf-entry is found. A pointer to the
1289 * leaf-entry is returned in the appropriate level variable - pudpp,
1290 * pmdpp, ptepp.
1291 */
1292static bool stage2_get_leaf_entry(struct kvm *kvm, phys_addr_t addr,
1293 pud_t **pudpp, pmd_t **pmdpp, pte_t **ptepp)
Marc Zyngier7a3796d2017-10-23 17:11:21 +01001294{
Punit Agrawal86d1c552018-12-11 17:10:38 +00001295 pud_t *pudp;
Marc Zyngier7a3796d2017-10-23 17:11:21 +01001296 pmd_t *pmdp;
1297 pte_t *ptep;
1298
Punit Agrawal86d1c552018-12-11 17:10:38 +00001299 *pudpp = NULL;
1300 *pmdpp = NULL;
1301 *ptepp = NULL;
1302
1303 pudp = stage2_get_pud(kvm, NULL, addr);
1304 if (!pudp || stage2_pud_none(kvm, *pudp) || !stage2_pud_present(kvm, *pudp))
1305 return false;
1306
1307 if (stage2_pud_huge(kvm, *pudp)) {
1308 *pudpp = pudp;
1309 return true;
1310 }
1311
1312 pmdp = stage2_pmd_offset(kvm, pudp, addr);
Marc Zyngier7a3796d2017-10-23 17:11:21 +01001313 if (!pmdp || pmd_none(*pmdp) || !pmd_present(*pmdp))
1314 return false;
1315
Punit Agrawal86d1c552018-12-11 17:10:38 +00001316 if (pmd_thp_or_huge(*pmdp)) {
1317 *pmdpp = pmdp;
1318 return true;
1319 }
Marc Zyngier7a3796d2017-10-23 17:11:21 +01001320
1321 ptep = pte_offset_kernel(pmdp, addr);
1322 if (!ptep || pte_none(*ptep) || !pte_present(*ptep))
1323 return false;
1324
Punit Agrawal86d1c552018-12-11 17:10:38 +00001325 *ptepp = ptep;
1326 return true;
1327}
1328
1329static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr)
1330{
1331 pud_t *pudp;
1332 pmd_t *pmdp;
1333 pte_t *ptep;
1334 bool found;
1335
1336 found = stage2_get_leaf_entry(kvm, addr, &pudp, &pmdp, &ptep);
1337 if (!found)
1338 return false;
1339
1340 if (pudp)
1341 return kvm_s2pud_exec(pudp);
1342 else if (pmdp)
1343 return kvm_s2pmd_exec(pmdp);
1344 else
1345 return kvm_s2pte_exec(ptep);
Marc Zyngier7a3796d2017-10-23 17:11:21 +01001346}
1347
Christoffer Dallad361f02012-11-01 17:14:45 +01001348static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
Mario Smarduch15a49a42015-01-15 15:58:58 -08001349 phys_addr_t addr, const pte_t *new_pte,
1350 unsigned long flags)
Christoffer Dallad361f02012-11-01 17:14:45 +01001351{
Punit Agrawalb8e0ba72018-12-11 17:10:41 +00001352 pud_t *pud;
Christoffer Dallad361f02012-11-01 17:14:45 +01001353 pmd_t *pmd;
1354 pte_t *pte, old_pte;
Mario Smarduch15a49a42015-01-15 15:58:58 -08001355 bool iomap = flags & KVM_S2PTE_FLAG_IS_IOMAP;
1356 bool logging_active = flags & KVM_S2_FLAG_LOGGING_ACTIVE;
1357
1358 VM_BUG_ON(logging_active && !cache);
Christoffer Dallad361f02012-11-01 17:14:45 +01001359
Christoffer Dall38f791a2014-10-10 12:14:28 +02001360 /* Create stage-2 page table mapping - Levels 0 and 1 */
Punit Agrawalb8e0ba72018-12-11 17:10:41 +00001361 pud = stage2_get_pud(kvm, cache, addr);
1362 if (!pud) {
1363 /*
1364 * Ignore calls from kvm_set_spte_hva for unallocated
1365 * address ranges.
1366 */
1367 return 0;
1368 }
1369
1370 /*
1371 * While dirty page logging - dissolve huge PUD, then continue
1372 * on to allocate page.
1373 */
1374 if (logging_active)
1375 stage2_dissolve_pud(kvm, addr, pud);
1376
1377 if (stage2_pud_none(kvm, *pud)) {
1378 if (!cache)
1379 return 0; /* ignore calls from kvm_set_spte_hva */
1380 pmd = mmu_memory_cache_alloc(cache);
1381 stage2_pud_populate(kvm, pud, pmd);
1382 get_page(virt_to_page(pud));
1383 }
1384
1385 pmd = stage2_pmd_offset(kvm, pud, addr);
Christoffer Dallad361f02012-11-01 17:14:45 +01001386 if (!pmd) {
1387 /*
1388 * Ignore calls from kvm_set_spte_hva for unallocated
1389 * address ranges.
1390 */
1391 return 0;
1392 }
1393
Mario Smarduch15a49a42015-01-15 15:58:58 -08001394 /*
1395 * While dirty page logging - dissolve huge PMD, then continue on to
1396 * allocate page.
1397 */
1398 if (logging_active)
1399 stage2_dissolve_pmd(kvm, addr, pmd);
1400
Christoffer Dallad361f02012-11-01 17:14:45 +01001401 /* Create stage-2 page mappings - Level 2 */
Christoffer Dalld5d81842013-01-20 18:28:07 -05001402 if (pmd_none(*pmd)) {
1403 if (!cache)
1404 return 0; /* ignore calls from kvm_set_spte_hva */
1405 pte = mmu_memory_cache_alloc(cache);
Marc Zyngier0db9dd82018-06-27 15:51:05 +01001406 kvm_pmd_populate(pmd, pte);
Christoffer Dalld5d81842013-01-20 18:28:07 -05001407 get_page(virt_to_page(pmd));
Marc Zyngierc62ee2b2012-10-15 11:27:37 +01001408 }
1409
1410 pte = pte_offset_kernel(pmd, addr);
Christoffer Dalld5d81842013-01-20 18:28:07 -05001411
1412 if (iomap && pte_present(*pte))
1413 return -EFAULT;
1414
1415 /* Create 2nd stage page table mapping - Level 3 */
1416 old_pte = *pte;
Marc Zyngierd4b9e072016-04-28 16:16:31 +01001417 if (pte_present(old_pte)) {
Punit Agrawal976d34e2018-08-13 11:43:51 +01001418 /* Skip page table update if there is no change */
1419 if (pte_val(old_pte) == pte_val(*new_pte))
1420 return 0;
1421
Marc Zyngierd4b9e072016-04-28 16:16:31 +01001422 kvm_set_pte(pte, __pte(0));
Marc Zyngier48762762013-01-28 15:27:00 +00001423 kvm_tlb_flush_vmid_ipa(kvm, addr);
Marc Zyngierd4b9e072016-04-28 16:16:31 +01001424 } else {
Christoffer Dalld5d81842013-01-20 18:28:07 -05001425 get_page(virt_to_page(pte));
Marc Zyngierd4b9e072016-04-28 16:16:31 +01001426 }
Christoffer Dalld5d81842013-01-20 18:28:07 -05001427
Marc Zyngierd4b9e072016-04-28 16:16:31 +01001428 kvm_set_pte(pte, *new_pte);
Christoffer Dalld5d81842013-01-20 18:28:07 -05001429 return 0;
1430}
1431
Catalin Marinas06485052016-04-13 17:57:37 +01001432#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
1433static int stage2_ptep_test_and_clear_young(pte_t *pte)
1434{
1435 if (pte_young(*pte)) {
1436 *pte = pte_mkold(*pte);
1437 return 1;
1438 }
1439 return 0;
1440}
1441#else
1442static int stage2_ptep_test_and_clear_young(pte_t *pte)
1443{
1444 return __ptep_test_and_clear_young(pte);
1445}
1446#endif
1447
1448static int stage2_pmdp_test_and_clear_young(pmd_t *pmd)
1449{
1450 return stage2_ptep_test_and_clear_young((pte_t *)pmd);
1451}
1452
Punit Agrawal35a63962018-12-11 17:10:40 +00001453static int stage2_pudp_test_and_clear_young(pud_t *pud)
1454{
1455 return stage2_ptep_test_and_clear_young((pte_t *)pud);
1456}
1457
Christoffer Dalld5d81842013-01-20 18:28:07 -05001458/**
1459 * kvm_phys_addr_ioremap - map a device range to guest IPA
1460 *
1461 * @kvm: The KVM pointer
1462 * @guest_ipa: The IPA at which to insert the mapping
1463 * @pa: The physical address of the device
1464 * @size: The size of the mapping
1465 */
1466int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
Ard Biesheuvelc40f2f82014-09-17 14:56:18 -07001467 phys_addr_t pa, unsigned long size, bool writable)
Christoffer Dalld5d81842013-01-20 18:28:07 -05001468{
1469 phys_addr_t addr, end;
1470 int ret = 0;
1471 unsigned long pfn;
1472 struct kvm_mmu_memory_cache cache = { 0, };
1473
1474 end = (guest_ipa + size + PAGE_SIZE - 1) & PAGE_MASK;
1475 pfn = __phys_to_pfn(pa);
1476
1477 for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
Punit Agrawalf8df7332018-12-11 17:10:36 +00001478 pte_t pte = kvm_pfn_pte(pfn, PAGE_S2_DEVICE);
Christoffer Dalld5d81842013-01-20 18:28:07 -05001479
Ard Biesheuvelc40f2f82014-09-17 14:56:18 -07001480 if (writable)
Catalin Marinas06485052016-04-13 17:57:37 +01001481 pte = kvm_s2pte_mkwrite(pte);
Ard Biesheuvelc40f2f82014-09-17 14:56:18 -07001482
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001483 ret = mmu_topup_memory_cache(&cache,
1484 kvm_mmu_cache_min_pages(kvm),
1485 KVM_NR_MEM_OBJS);
Christoffer Dalld5d81842013-01-20 18:28:07 -05001486 if (ret)
1487 goto out;
1488 spin_lock(&kvm->mmu_lock);
Mario Smarduch15a49a42015-01-15 15:58:58 -08001489 ret = stage2_set_pte(kvm, &cache, addr, &pte,
1490 KVM_S2PTE_FLAG_IS_IOMAP);
Christoffer Dalld5d81842013-01-20 18:28:07 -05001491 spin_unlock(&kvm->mmu_lock);
1492 if (ret)
1493 goto out;
1494
1495 pfn++;
1496 }
1497
1498out:
1499 mmu_free_memory_cache(&cache);
1500 return ret;
1501}
1502
Mario Smarduchc6473552015-01-15 15:58:56 -08001503/**
1504 * stage2_wp_ptes - write protect PMD range
1505 * @pmd: pointer to pmd entry
1506 * @addr: range start address
1507 * @end: range end address
1508 */
1509static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
1510{
1511 pte_t *pte;
1512
1513 pte = pte_offset_kernel(pmd, addr);
1514 do {
1515 if (!pte_none(*pte)) {
1516 if (!kvm_s2pte_readonly(pte))
1517 kvm_set_s2pte_readonly(pte);
1518 }
1519 } while (pte++, addr += PAGE_SIZE, addr != end);
1520}
1521
1522/**
1523 * stage2_wp_pmds - write protect PUD range
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001524 * kvm: kvm instance for the VM
Mario Smarduchc6473552015-01-15 15:58:56 -08001525 * @pud: pointer to pud entry
1526 * @addr: range start address
1527 * @end: range end address
1528 */
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001529static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud,
1530 phys_addr_t addr, phys_addr_t end)
Mario Smarduchc6473552015-01-15 15:58:56 -08001531{
1532 pmd_t *pmd;
1533 phys_addr_t next;
1534
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001535 pmd = stage2_pmd_offset(kvm, pud, addr);
Mario Smarduchc6473552015-01-15 15:58:56 -08001536
1537 do {
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001538 next = stage2_pmd_addr_end(kvm, addr, end);
Mario Smarduchc6473552015-01-15 15:58:56 -08001539 if (!pmd_none(*pmd)) {
Suzuki K Poulosebbb3b6b2016-03-01 12:00:39 +00001540 if (pmd_thp_or_huge(*pmd)) {
Mario Smarduchc6473552015-01-15 15:58:56 -08001541 if (!kvm_s2pmd_readonly(pmd))
1542 kvm_set_s2pmd_readonly(pmd);
1543 } else {
1544 stage2_wp_ptes(pmd, addr, next);
1545 }
1546 }
1547 } while (pmd++, addr = next, addr != end);
1548}
1549
1550/**
Mike Rapoporte9f63762020-06-04 16:46:23 -07001551 * stage2_wp_puds - write protect P4D range
Zenghui Yu8324c3d2019-03-25 08:02:05 +00001552 * @pgd: pointer to pgd entry
1553 * @addr: range start address
1554 * @end: range end address
1555 */
Mike Rapoporte9f63762020-06-04 16:46:23 -07001556static void stage2_wp_puds(struct kvm *kvm, p4d_t *p4d,
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001557 phys_addr_t addr, phys_addr_t end)
Mario Smarduchc6473552015-01-15 15:58:56 -08001558{
1559 pud_t *pud;
1560 phys_addr_t next;
1561
Mike Rapoporte9f63762020-06-04 16:46:23 -07001562 pud = stage2_pud_offset(kvm, p4d, addr);
Mario Smarduchc6473552015-01-15 15:58:56 -08001563 do {
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001564 next = stage2_pud_addr_end(kvm, addr, end);
1565 if (!stage2_pud_none(kvm, *pud)) {
Punit Agrawal4ea5af52018-12-11 17:10:37 +00001566 if (stage2_pud_huge(kvm, *pud)) {
1567 if (!kvm_s2pud_readonly(pud))
1568 kvm_set_s2pud_readonly(pud);
1569 } else {
1570 stage2_wp_pmds(kvm, pud, addr, next);
1571 }
Mario Smarduchc6473552015-01-15 15:58:56 -08001572 }
1573 } while (pud++, addr = next, addr != end);
1574}
1575
1576/**
Mike Rapoporte9f63762020-06-04 16:46:23 -07001577 * stage2_wp_p4ds - write protect PGD range
1578 * @pgd: pointer to pgd entry
1579 * @addr: range start address
1580 * @end: range end address
1581 */
1582static void stage2_wp_p4ds(struct kvm *kvm, pgd_t *pgd,
1583 phys_addr_t addr, phys_addr_t end)
1584{
1585 p4d_t *p4d;
1586 phys_addr_t next;
1587
1588 p4d = stage2_p4d_offset(kvm, pgd, addr);
1589 do {
1590 next = stage2_p4d_addr_end(kvm, addr, end);
1591 if (!stage2_p4d_none(kvm, *p4d))
1592 stage2_wp_puds(kvm, p4d, addr, next);
1593 } while (p4d++, addr = next, addr != end);
1594}
1595
1596/**
Mario Smarduchc6473552015-01-15 15:58:56 -08001597 * stage2_wp_range() - write protect stage2 memory region range
1598 * @kvm: The KVM pointer
1599 * @addr: Start address of range
1600 * @end: End address of range
1601 */
1602static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
1603{
1604 pgd_t *pgd;
1605 phys_addr_t next;
1606
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001607 pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr);
Mario Smarduchc6473552015-01-15 15:58:56 -08001608 do {
1609 /*
1610 * Release kvm_mmu_lock periodically if the memory region is
1611 * large. Otherwise, we may see kernel panics with
Christoffer Dall227ea812015-01-23 10:49:31 +01001612 * CONFIG_DETECT_HUNG_TASK, CONFIG_LOCKUP_DETECTOR,
1613 * CONFIG_LOCKDEP. Additionally, holding the lock too long
Suzuki K Poulose0c428a6a2017-05-16 10:34:55 +01001614 * will also starve other vCPUs. We have to also make sure
1615 * that the page tables are not freed while we released
1616 * the lock.
Mario Smarduchc6473552015-01-15 15:58:56 -08001617 */
Suzuki K Poulose0c428a6a2017-05-16 10:34:55 +01001618 cond_resched_lock(&kvm->mmu_lock);
1619 if (!READ_ONCE(kvm->arch.pgd))
1620 break;
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001621 next = stage2_pgd_addr_end(kvm, addr, end);
1622 if (stage2_pgd_present(kvm, *pgd))
Mike Rapoporte9f63762020-06-04 16:46:23 -07001623 stage2_wp_p4ds(kvm, pgd, addr, next);
Mario Smarduchc6473552015-01-15 15:58:56 -08001624 } while (pgd++, addr = next, addr != end);
1625}
1626
1627/**
1628 * kvm_mmu_wp_memory_region() - write protect stage 2 entries for memory slot
1629 * @kvm: The KVM pointer
1630 * @slot: The memory slot to write protect
1631 *
1632 * Called to start logging dirty pages after memory region
1633 * KVM_MEM_LOG_DIRTY_PAGES operation is called. After this function returns
Punit Agrawal4ea5af52018-12-11 17:10:37 +00001634 * all present PUD, PMD and PTEs are write protected in the memory region.
Mario Smarduchc6473552015-01-15 15:58:56 -08001635 * Afterwards read of dirty page log can be called.
1636 *
1637 * Acquires kvm_mmu_lock. Called with kvm->slots_lock mutex acquired,
1638 * serializing operations for VM memory regions.
1639 */
1640void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
1641{
Paolo Bonzini9f6b8022015-05-17 16:20:07 +02001642 struct kvm_memslots *slots = kvm_memslots(kvm);
1643 struct kvm_memory_slot *memslot = id_to_memslot(slots, slot);
Sean Christopherson0577d1a2020-02-18 13:07:31 -08001644 phys_addr_t start, end;
1645
1646 if (WARN_ON_ONCE(!memslot))
1647 return;
1648
1649 start = memslot->base_gfn << PAGE_SHIFT;
1650 end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
Mario Smarduchc6473552015-01-15 15:58:56 -08001651
1652 spin_lock(&kvm->mmu_lock);
1653 stage2_wp_range(kvm, start, end);
1654 spin_unlock(&kvm->mmu_lock);
1655 kvm_flush_remote_tlbs(kvm);
1656}
Mario Smarduch53c810c2015-01-15 15:58:57 -08001657
1658/**
Kai Huang3b0f1d02015-01-28 10:54:23 +08001659 * kvm_mmu_write_protect_pt_masked() - write protect dirty pages
Mario Smarduch53c810c2015-01-15 15:58:57 -08001660 * @kvm: The KVM pointer
1661 * @slot: The memory slot associated with mask
1662 * @gfn_offset: The gfn offset in memory slot
1663 * @mask: The mask of dirty pages at offset 'gfn_offset' in this memory
1664 * slot to be write protected
1665 *
1666 * Walks bits set in mask write protects the associated pte's. Caller must
1667 * acquire kvm_mmu_lock.
1668 */
Kai Huang3b0f1d02015-01-28 10:54:23 +08001669static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
Mario Smarduch53c810c2015-01-15 15:58:57 -08001670 struct kvm_memory_slot *slot,
1671 gfn_t gfn_offset, unsigned long mask)
1672{
1673 phys_addr_t base_gfn = slot->base_gfn + gfn_offset;
1674 phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT;
1675 phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
1676
1677 stage2_wp_range(kvm, start, end);
1678}
Mario Smarduchc6473552015-01-15 15:58:56 -08001679
Kai Huang3b0f1d02015-01-28 10:54:23 +08001680/*
1681 * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected
1682 * dirty pages.
1683 *
1684 * It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to
1685 * enable dirty logging for them.
1686 */
1687void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
1688 struct kvm_memory_slot *slot,
1689 gfn_t gfn_offset, unsigned long mask)
1690{
1691 kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
1692}
1693
Marc Zyngier17ab9d52017-10-23 17:11:22 +01001694static void clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
Marc Zyngier0d3e4d42015-01-05 21:13:24 +00001695{
Marc Zyngier17ab9d52017-10-23 17:11:22 +01001696 __clean_dcache_guest_page(pfn, size);
Marc Zyngiera15f6932017-10-23 17:11:15 +01001697}
1698
Marc Zyngier17ab9d52017-10-23 17:11:22 +01001699static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size)
Marc Zyngiera15f6932017-10-23 17:11:15 +01001700{
Marc Zyngier17ab9d52017-10-23 17:11:22 +01001701 __invalidate_icache_guest_page(pfn, size);
Marc Zyngier0d3e4d42015-01-05 21:13:24 +00001702}
1703
James Morse1559b752019-12-17 12:38:09 +00001704static void kvm_send_hwpoison_signal(unsigned long address, short lsb)
James Morse196f8782017-06-20 17:11:48 +01001705{
Eric W. Biederman795a8372018-04-16 13:39:10 -05001706 send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current);
James Morse196f8782017-06-20 17:11:48 +01001707}
1708
Suzuki K Poulosea80868f2019-03-12 09:52:51 +00001709static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
1710 unsigned long hva,
1711 unsigned long map_size)
Christoffer Dall6794ad52018-11-02 08:53:22 +01001712{
Shaokun Zhangc2be79a2019-02-19 17:22:21 +08001713 gpa_t gpa_start;
Christoffer Dall6794ad52018-11-02 08:53:22 +01001714 hva_t uaddr_start, uaddr_end;
1715 size_t size;
1716
Suzuki K Poulose9f283612020-05-07 20:35:45 +08001717 /* The memslot and the VMA are guaranteed to be aligned to PAGE_SIZE */
1718 if (map_size == PAGE_SIZE)
1719 return true;
1720
Christoffer Dall6794ad52018-11-02 08:53:22 +01001721 size = memslot->npages * PAGE_SIZE;
1722
1723 gpa_start = memslot->base_gfn << PAGE_SHIFT;
Christoffer Dall6794ad52018-11-02 08:53:22 +01001724
1725 uaddr_start = memslot->userspace_addr;
1726 uaddr_end = uaddr_start + size;
1727
1728 /*
1729 * Pages belonging to memslots that don't have the same alignment
Suzuki K Poulosea80868f2019-03-12 09:52:51 +00001730 * within a PMD/PUD for userspace and IPA cannot be mapped with stage-2
1731 * PMD/PUD entries, because we'll end up mapping the wrong pages.
Christoffer Dall6794ad52018-11-02 08:53:22 +01001732 *
1733 * Consider a layout like the following:
1734 *
1735 * memslot->userspace_addr:
1736 * +-----+--------------------+--------------------+---+
Suzuki K Poulosea80868f2019-03-12 09:52:51 +00001737 * |abcde|fgh Stage-1 block | Stage-1 block tv|xyz|
Christoffer Dall6794ad52018-11-02 08:53:22 +01001738 * +-----+--------------------+--------------------+---+
1739 *
Suzuki K Poulose9f283612020-05-07 20:35:45 +08001740 * memslot->base_gfn << PAGE_SHIFT:
Christoffer Dall6794ad52018-11-02 08:53:22 +01001741 * +---+--------------------+--------------------+-----+
Suzuki K Poulosea80868f2019-03-12 09:52:51 +00001742 * |abc|def Stage-2 block | Stage-2 block |tvxyz|
Christoffer Dall6794ad52018-11-02 08:53:22 +01001743 * +---+--------------------+--------------------+-----+
1744 *
Suzuki K Poulosea80868f2019-03-12 09:52:51 +00001745 * If we create those stage-2 blocks, we'll end up with this incorrect
Christoffer Dall6794ad52018-11-02 08:53:22 +01001746 * mapping:
1747 * d -> f
1748 * e -> g
1749 * f -> h
1750 */
Suzuki K Poulosea80868f2019-03-12 09:52:51 +00001751 if ((gpa_start & (map_size - 1)) != (uaddr_start & (map_size - 1)))
Christoffer Dall6794ad52018-11-02 08:53:22 +01001752 return false;
1753
1754 /*
1755 * Next, let's make sure we're not trying to map anything not covered
Suzuki K Poulosea80868f2019-03-12 09:52:51 +00001756 * by the memslot. This means we have to prohibit block size mappings
1757 * for the beginning and end of a non-block aligned and non-block sized
Christoffer Dall6794ad52018-11-02 08:53:22 +01001758 * memory slot (illustrated by the head and tail parts of the
1759 * userspace view above containing pages 'abcde' and 'xyz',
1760 * respectively).
1761 *
1762 * Note that it doesn't matter if we do the check using the
1763 * userspace_addr or the base_gfn, as both are equally aligned (per
1764 * the check above) and equally sized.
1765 */
Suzuki K Poulosea80868f2019-03-12 09:52:51 +00001766 return (hva & ~(map_size - 1)) >= uaddr_start &&
1767 (hva & ~(map_size - 1)) + map_size <= uaddr_end;
Christoffer Dall6794ad52018-11-02 08:53:22 +01001768}
1769
Suzuki K Poulose0529c902020-05-07 20:35:46 +08001770/*
1771 * Check if the given hva is backed by a transparent huge page (THP) and
1772 * whether it can be mapped using block mapping in stage2. If so, adjust
1773 * the stage2 PFN and IPA accordingly. Only PMD_SIZE THPs are currently
1774 * supported. This will need to be updated to support other THP sizes.
1775 *
1776 * Returns the size of the mapping.
1777 */
1778static unsigned long
1779transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
1780 unsigned long hva, kvm_pfn_t *pfnp,
1781 phys_addr_t *ipap)
1782{
1783 kvm_pfn_t pfn = *pfnp;
1784
1785 /*
1786 * Make sure the adjustment is done only for THP pages. Also make
1787 * sure that the HVA and IPA are sufficiently aligned and that the
1788 * block map is contained within the memslot.
1789 */
1790 if (kvm_is_transparent_hugepage(pfn) &&
1791 fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) {
1792 /*
1793 * The address we faulted on is backed by a transparent huge
1794 * page. However, because we map the compound huge page and
1795 * not the individual tail page, we need to transfer the
1796 * refcount to the head page. We have to be careful that the
1797 * THP doesn't start to split while we are adjusting the
1798 * refcounts.
1799 *
1800 * We are sure this doesn't happen, because mmu_notifier_retry
1801 * was successful and we are holding the mmu_lock, so if this
1802 * THP is trying to split, it will be blocked in the mmu
1803 * notifier before touching any of the pages, specifically
1804 * before being able to call __split_huge_page_refcount().
1805 *
1806 * We can therefore safely transfer the refcount from PG_tail
1807 * to PG_head and switch the pfn from a tail page to the head
1808 * page accordingly.
1809 */
1810 *ipap &= PMD_MASK;
1811 kvm_release_pfn_clean(pfn);
1812 pfn &= ~(PTRS_PER_PMD - 1);
1813 kvm_get_pfn(pfn);
1814 *pfnp = pfn;
1815
1816 return PMD_SIZE;
1817 }
1818
1819 /* Use page mapping if we cannot use block mapping. */
1820 return PAGE_SIZE;
1821}
1822
Christoffer Dall94f8e642013-01-20 18:28:12 -05001823static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
Christoffer Dall98047882014-08-19 12:18:04 +02001824 struct kvm_memory_slot *memslot, unsigned long hva,
Christoffer Dall94f8e642013-01-20 18:28:12 -05001825 unsigned long fault_status)
1826{
Christoffer Dall94f8e642013-01-20 18:28:12 -05001827 int ret;
Punit Agrawal6396b852018-12-11 17:10:35 +00001828 bool write_fault, writable, force_pte = false;
1829 bool exec_fault, needs_exec;
Christoffer Dall94f8e642013-01-20 18:28:12 -05001830 unsigned long mmu_seq;
Christoffer Dallad361f02012-11-01 17:14:45 +01001831 gfn_t gfn = fault_ipa >> PAGE_SHIFT;
Christoffer Dallad361f02012-11-01 17:14:45 +01001832 struct kvm *kvm = vcpu->kvm;
Christoffer Dall94f8e642013-01-20 18:28:12 -05001833 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
Christoffer Dallad361f02012-11-01 17:14:45 +01001834 struct vm_area_struct *vma;
James Morse1559b752019-12-17 12:38:09 +00001835 short vma_shift;
Dan Williamsba049e92016-01-15 16:56:11 -08001836 kvm_pfn_t pfn;
Kim Phillipsb8865762014-06-26 01:45:51 +01001837 pgprot_t mem_type = PAGE_S2;
Mario Smarduch15a49a42015-01-15 15:58:58 -08001838 bool logging_active = memslot_is_logging(memslot);
Punit Agrawal3f58bf62018-12-11 17:10:34 +00001839 unsigned long vma_pagesize, flags = 0;
Christoffer Dall94f8e642013-01-20 18:28:12 -05001840
Ard Biesheuvela7d079c2014-09-09 11:27:09 +01001841 write_fault = kvm_is_write_fault(vcpu);
Marc Zyngierd0e22b42017-10-23 17:11:19 +01001842 exec_fault = kvm_vcpu_trap_is_iabt(vcpu);
1843 VM_BUG_ON(write_fault && exec_fault);
1844
1845 if (fault_status == FSC_PERM && !write_fault && !exec_fault) {
Christoffer Dall94f8e642013-01-20 18:28:12 -05001846 kvm_err("Unexpected L2 read permission error\n");
1847 return -EFAULT;
1848 }
1849
Christoffer Dallad361f02012-11-01 17:14:45 +01001850 /* Let's check if we will get back a huge page backed by hugetlbfs */
Michel Lespinasse89154dd2020-06-08 21:33:29 -07001851 mmap_read_lock(current->mm);
Christoffer Dallad361f02012-11-01 17:14:45 +01001852 vma = find_vma_intersection(current->mm, hva, hva + 1);
Ard Biesheuvel37b54402014-09-17 14:56:17 -07001853 if (unlikely(!vma)) {
1854 kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
Michel Lespinasse89154dd2020-06-08 21:33:29 -07001855 mmap_read_unlock(current->mm);
Ard Biesheuvel37b54402014-09-17 14:56:17 -07001856 return -EFAULT;
1857 }
1858
James Morse1559b752019-12-17 12:38:09 +00001859 if (is_vm_hugetlb_page(vma))
1860 vma_shift = huge_page_shift(hstate_vma(vma));
1861 else
1862 vma_shift = PAGE_SHIFT;
1863
1864 vma_pagesize = 1ULL << vma_shift;
Suzuki K Poulosea80868f2019-03-12 09:52:51 +00001865 if (logging_active ||
Marc Zyngier6d674e22019-12-11 16:56:48 +00001866 (vma->vm_flags & VM_PFNMAP) ||
Suzuki K Poulosea80868f2019-03-12 09:52:51 +00001867 !fault_supports_stage2_huge_mapping(memslot, hva, vma_pagesize)) {
1868 force_pte = true;
1869 vma_pagesize = PAGE_SIZE;
1870 }
1871
Punit Agrawalb8e0ba72018-12-11 17:10:41 +00001872 /*
Suzuki K Poulose280cebf2019-01-29 19:12:17 +00001873 * The stage2 has a minimum of 2 level table (For arm64 see
1874 * kvm_arm_setup_stage2()). Hence, we are guaranteed that we can
1875 * use PMD_SIZE huge mappings (even when the PMD is folded into PGD).
1876 * As for PUD huge maps, we must make sure that we have at least
1877 * 3 levels, i.e, PMD is not folded.
Punit Agrawalb8e0ba72018-12-11 17:10:41 +00001878 */
Suzuki K Poulosea80868f2019-03-12 09:52:51 +00001879 if (vma_pagesize == PMD_SIZE ||
1880 (vma_pagesize == PUD_SIZE && kvm_stage2_has_pmd(kvm)))
Punit Agrawalb8e0ba72018-12-11 17:10:41 +00001881 gfn = (fault_ipa & huge_page_mask(hstate_vma(vma))) >> PAGE_SHIFT;
Michel Lespinasse89154dd2020-06-08 21:33:29 -07001882 mmap_read_unlock(current->mm);
Christoffer Dallad361f02012-11-01 17:14:45 +01001883
Christoffer Dall94f8e642013-01-20 18:28:12 -05001884 /* We need minimum second+third level pages */
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001885 ret = mmu_topup_memory_cache(memcache, kvm_mmu_cache_min_pages(kvm),
Christoffer Dall38f791a2014-10-10 12:14:28 +02001886 KVM_NR_MEM_OBJS);
Christoffer Dall94f8e642013-01-20 18:28:12 -05001887 if (ret)
1888 return ret;
1889
1890 mmu_seq = vcpu->kvm->mmu_notifier_seq;
1891 /*
1892 * Ensure the read of mmu_notifier_seq happens before we call
1893 * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk
1894 * the page we just got a reference to gets unmapped before we have a
1895 * chance to grab the mmu_lock, which ensure that if the page gets
1896 * unmapped afterwards, the call to kvm_unmap_hva will take it away
1897 * from us again properly. This smp_rmb() interacts with the smp_wmb()
1898 * in kvm_mmu_notifier_invalidate_<page|range_end>.
1899 */
1900 smp_rmb();
1901
Christoffer Dallad361f02012-11-01 17:14:45 +01001902 pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
James Morse196f8782017-06-20 17:11:48 +01001903 if (pfn == KVM_PFN_ERR_HWPOISON) {
James Morse1559b752019-12-17 12:38:09 +00001904 kvm_send_hwpoison_signal(hva, vma_shift);
James Morse196f8782017-06-20 17:11:48 +01001905 return 0;
1906 }
Christoffer Dall9ac71592016-08-17 10:46:10 +02001907 if (is_error_noslot_pfn(pfn))
Christoffer Dall94f8e642013-01-20 18:28:12 -05001908 return -EFAULT;
1909
Mario Smarduch15a49a42015-01-15 15:58:58 -08001910 if (kvm_is_device_pfn(pfn)) {
Kim Phillipsb8865762014-06-26 01:45:51 +01001911 mem_type = PAGE_S2_DEVICE;
Mario Smarduch15a49a42015-01-15 15:58:58 -08001912 flags |= KVM_S2PTE_FLAG_IS_IOMAP;
1913 } else if (logging_active) {
1914 /*
1915 * Faults on pages in a memslot with logging enabled
1916 * should not be mapped with huge pages (it introduces churn
1917 * and performance degradation), so force a pte mapping.
1918 */
Mario Smarduch15a49a42015-01-15 15:58:58 -08001919 flags |= KVM_S2_FLAG_LOGGING_ACTIVE;
1920
1921 /*
1922 * Only actually map the page as writable if this was a write
1923 * fault.
1924 */
1925 if (!write_fault)
1926 writable = false;
1927 }
Kim Phillipsb8865762014-06-26 01:45:51 +01001928
Marc Zyngier6d674e22019-12-11 16:56:48 +00001929 if (exec_fault && is_iomap(flags))
1930 return -ENOEXEC;
1931
Christoffer Dallad361f02012-11-01 17:14:45 +01001932 spin_lock(&kvm->mmu_lock);
1933 if (mmu_notifier_retry(kvm, mmu_seq))
Christoffer Dall94f8e642013-01-20 18:28:12 -05001934 goto out_unlock;
Mario Smarduch15a49a42015-01-15 15:58:58 -08001935
Suzuki K Poulose0529c902020-05-07 20:35:46 +08001936 /*
1937 * If we are not forced to use page mapping, check if we are
1938 * backed by a THP and thus use block mapping if possible.
1939 */
1940 if (vma_pagesize == PAGE_SIZE && !force_pte)
1941 vma_pagesize = transparent_hugepage_adjust(memslot, hva,
1942 &pfn, &fault_ipa);
Punit Agrawal3f58bf62018-12-11 17:10:34 +00001943 if (writable)
1944 kvm_set_pfn_dirty(pfn);
1945
Marc Zyngier6d674e22019-12-11 16:56:48 +00001946 if (fault_status != FSC_PERM && !is_iomap(flags))
Punit Agrawal3f58bf62018-12-11 17:10:34 +00001947 clean_dcache_guest_page(pfn, vma_pagesize);
1948
1949 if (exec_fault)
1950 invalidate_icache_guest_page(pfn, vma_pagesize);
1951
Punit Agrawal6396b852018-12-11 17:10:35 +00001952 /*
1953 * If we took an execution fault we have made the
1954 * icache/dcache coherent above and should now let the s2
1955 * mapping be executable.
1956 *
1957 * Write faults (!exec_fault && FSC_PERM) are orthogonal to
1958 * execute permissions, and we preserve whatever we have.
1959 */
1960 needs_exec = exec_fault ||
1961 (fault_status == FSC_PERM && stage2_is_exec(kvm, fault_ipa));
1962
Punit Agrawalb8e0ba72018-12-11 17:10:41 +00001963 if (vma_pagesize == PUD_SIZE) {
1964 pud_t new_pud = kvm_pfn_pud(pfn, mem_type);
1965
1966 new_pud = kvm_pud_mkhuge(new_pud);
1967 if (writable)
1968 new_pud = kvm_s2pud_mkwrite(new_pud);
1969
1970 if (needs_exec)
1971 new_pud = kvm_s2pud_mkexec(new_pud);
1972
1973 ret = stage2_set_pud_huge(kvm, memcache, fault_ipa, &new_pud);
1974 } else if (vma_pagesize == PMD_SIZE) {
Punit Agrawalf8df7332018-12-11 17:10:36 +00001975 pmd_t new_pmd = kvm_pfn_pmd(pfn, mem_type);
1976
1977 new_pmd = kvm_pmd_mkhuge(new_pmd);
1978
Punit Agrawal3f58bf62018-12-11 17:10:34 +00001979 if (writable)
Catalin Marinas06485052016-04-13 17:57:37 +01001980 new_pmd = kvm_s2pmd_mkwrite(new_pmd);
Marc Zyngierd0e22b42017-10-23 17:11:19 +01001981
Punit Agrawal6396b852018-12-11 17:10:35 +00001982 if (needs_exec)
Marc Zyngierd0e22b42017-10-23 17:11:19 +01001983 new_pmd = kvm_s2pmd_mkexec(new_pmd);
Marc Zyngiera15f6932017-10-23 17:11:15 +01001984
Christoffer Dallad361f02012-11-01 17:14:45 +01001985 ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
1986 } else {
Punit Agrawalf8df7332018-12-11 17:10:36 +00001987 pte_t new_pte = kvm_pfn_pte(pfn, mem_type);
Mario Smarduch15a49a42015-01-15 15:58:58 -08001988
Christoffer Dallad361f02012-11-01 17:14:45 +01001989 if (writable) {
Catalin Marinas06485052016-04-13 17:57:37 +01001990 new_pte = kvm_s2pte_mkwrite(new_pte);
Mario Smarduch15a49a42015-01-15 15:58:58 -08001991 mark_page_dirty(kvm, gfn);
Christoffer Dallad361f02012-11-01 17:14:45 +01001992 }
Marc Zyngiera9c0e122017-10-23 17:11:20 +01001993
Punit Agrawal6396b852018-12-11 17:10:35 +00001994 if (needs_exec)
Marc Zyngierd0e22b42017-10-23 17:11:19 +01001995 new_pte = kvm_s2pte_mkexec(new_pte);
Marc Zyngiera15f6932017-10-23 17:11:15 +01001996
Mario Smarduch15a49a42015-01-15 15:58:58 -08001997 ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags);
Christoffer Dall94f8e642013-01-20 18:28:12 -05001998 }
Christoffer Dallad361f02012-11-01 17:14:45 +01001999
Christoffer Dall94f8e642013-01-20 18:28:12 -05002000out_unlock:
Christoffer Dallad361f02012-11-01 17:14:45 +01002001 spin_unlock(&kvm->mmu_lock);
Marc Zyngier35307b92015-03-12 18:16:51 +00002002 kvm_set_pfn_accessed(pfn);
Christoffer Dall94f8e642013-01-20 18:28:12 -05002003 kvm_release_pfn_clean(pfn);
Christoffer Dallad361f02012-11-01 17:14:45 +01002004 return ret;
Christoffer Dall94f8e642013-01-20 18:28:12 -05002005}
2006
Marc Zyngieraeda9132015-03-12 18:16:52 +00002007/*
2008 * Resolve the access fault by making the page young again.
2009 * Note that because the faulting entry is guaranteed not to be
2010 * cached in the TLB, we don't need to invalidate anything.
Catalin Marinas06485052016-04-13 17:57:37 +01002011 * Only the HW Access Flag updates are supported for Stage 2 (no DBM),
2012 * so there is no need for atomic (pte|pmd)_mkyoung operations.
Marc Zyngieraeda9132015-03-12 18:16:52 +00002013 */
2014static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
2015{
Punit Agrawaleb3f06242018-12-11 17:10:39 +00002016 pud_t *pud;
Marc Zyngieraeda9132015-03-12 18:16:52 +00002017 pmd_t *pmd;
2018 pte_t *pte;
Dan Williamsba049e92016-01-15 16:56:11 -08002019 kvm_pfn_t pfn;
Marc Zyngieraeda9132015-03-12 18:16:52 +00002020 bool pfn_valid = false;
2021
2022 trace_kvm_access_fault(fault_ipa);
2023
2024 spin_lock(&vcpu->kvm->mmu_lock);
2025
Punit Agrawaleb3f06242018-12-11 17:10:39 +00002026 if (!stage2_get_leaf_entry(vcpu->kvm, fault_ipa, &pud, &pmd, &pte))
Marc Zyngieraeda9132015-03-12 18:16:52 +00002027 goto out;
2028
Punit Agrawaleb3f06242018-12-11 17:10:39 +00002029 if (pud) { /* HugeTLB */
2030 *pud = kvm_s2pud_mkyoung(*pud);
2031 pfn = kvm_pud_pfn(*pud);
2032 pfn_valid = true;
2033 } else if (pmd) { /* THP, HugeTLB */
Marc Zyngieraeda9132015-03-12 18:16:52 +00002034 *pmd = pmd_mkyoung(*pmd);
2035 pfn = pmd_pfn(*pmd);
2036 pfn_valid = true;
Punit Agrawaleb3f06242018-12-11 17:10:39 +00002037 } else {
2038 *pte = pte_mkyoung(*pte); /* Just a page... */
2039 pfn = pte_pfn(*pte);
2040 pfn_valid = true;
Marc Zyngieraeda9132015-03-12 18:16:52 +00002041 }
2042
Marc Zyngieraeda9132015-03-12 18:16:52 +00002043out:
2044 spin_unlock(&vcpu->kvm->mmu_lock);
2045 if (pfn_valid)
2046 kvm_set_pfn_accessed(pfn);
2047}
2048
Christoffer Dall94f8e642013-01-20 18:28:12 -05002049/**
2050 * kvm_handle_guest_abort - handles all 2nd stage aborts
2051 * @vcpu: the VCPU pointer
2052 * @run: the kvm_run structure
2053 *
2054 * Any abort that gets to the host is almost guaranteed to be caused by a
2055 * missing second stage translation table entry, which can mean that either the
2056 * guest simply needs more memory and we must allocate an appropriate page or it
2057 * can mean that the guest tried to access I/O memory, which is emulated by user
2058 * space. The distinction is based on the IPA causing the fault and whether this
2059 * memory region has been registered as standard RAM by user space.
2060 */
Christoffer Dall342cd0a2013-01-20 18:28:06 -05002061int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
2062{
Christoffer Dall94f8e642013-01-20 18:28:12 -05002063 unsigned long fault_status;
2064 phys_addr_t fault_ipa;
2065 struct kvm_memory_slot *memslot;
Christoffer Dall98047882014-08-19 12:18:04 +02002066 unsigned long hva;
2067 bool is_iabt, write_fault, writable;
Christoffer Dall94f8e642013-01-20 18:28:12 -05002068 gfn_t gfn;
2069 int ret, idx;
2070
Tyler Baicar621f48e2017-06-21 12:17:14 -06002071 fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
2072
2073 fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
James Morsebb428922017-07-18 13:37:41 +01002074 is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
Tyler Baicar621f48e2017-06-21 12:17:14 -06002075
James Morsebb428922017-07-18 13:37:41 +01002076 /* Synchronous External Abort? */
2077 if (kvm_vcpu_dabt_isextabt(vcpu)) {
2078 /*
2079 * For RAS the host kernel may handle this abort.
2080 * There is no need to pass the error into the guest.
2081 */
James Morse0db5e022019-01-29 18:48:49 +00002082 if (!kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu)))
Tyler Baicar621f48e2017-06-21 12:17:14 -06002083 return 1;
Tyler Baicar621f48e2017-06-21 12:17:14 -06002084
James Morsebb428922017-07-18 13:37:41 +01002085 if (unlikely(!is_iabt)) {
2086 kvm_inject_vabt(vcpu);
2087 return 1;
2088 }
Marc Zyngier40557102016-09-06 14:02:15 +01002089 }
2090
Marc Zyngier7393b592012-09-17 19:27:09 +01002091 trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu),
2092 kvm_vcpu_get_hfar(vcpu), fault_ipa);
Christoffer Dall94f8e642013-01-20 18:28:12 -05002093
2094 /* Check the stage-2 fault is trans. fault or write fault */
Marc Zyngier35307b92015-03-12 18:16:51 +00002095 if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
2096 fault_status != FSC_ACCESS) {
Christoffer Dall0496daa52014-09-26 12:29:34 +02002097 kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
2098 kvm_vcpu_trap_get_class(vcpu),
2099 (unsigned long)kvm_vcpu_trap_get_fault(vcpu),
2100 (unsigned long)kvm_vcpu_get_hsr(vcpu));
Christoffer Dall94f8e642013-01-20 18:28:12 -05002101 return -EFAULT;
2102 }
2103
2104 idx = srcu_read_lock(&vcpu->kvm->srcu);
2105
2106 gfn = fault_ipa >> PAGE_SHIFT;
Christoffer Dall98047882014-08-19 12:18:04 +02002107 memslot = gfn_to_memslot(vcpu->kvm, gfn);
2108 hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
Ard Biesheuvela7d079c2014-09-09 11:27:09 +01002109 write_fault = kvm_is_write_fault(vcpu);
Christoffer Dall98047882014-08-19 12:18:04 +02002110 if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
Christoffer Dall94f8e642013-01-20 18:28:12 -05002111 if (is_iabt) {
2112 /* Prefetch Abort on I/O address */
Marc Zyngier6d674e22019-12-11 16:56:48 +00002113 ret = -ENOEXEC;
2114 goto out;
Christoffer Dall94f8e642013-01-20 18:28:12 -05002115 }
2116
Marc Zyngiercfe39502012-12-12 14:42:09 +00002117 /*
Marc Zyngier57c841f2016-01-29 15:01:28 +00002118 * Check for a cache maintenance operation. Since we
2119 * ended-up here, we know it is outside of any memory
2120 * slot. But we can't find out if that is for a device,
2121 * or if the guest is just being stupid. The only thing
2122 * we know for sure is that this range cannot be cached.
2123 *
2124 * So let's assume that the guest is just being
2125 * cautious, and skip the instruction.
2126 */
2127 if (kvm_vcpu_dabt_is_cm(vcpu)) {
2128 kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
2129 ret = 1;
2130 goto out_unlock;
2131 }
2132
2133 /*
Marc Zyngiercfe39502012-12-12 14:42:09 +00002134 * The IPA is reported as [MAX:12], so we need to
2135 * complement it with the bottom 12 bits from the
2136 * faulting VA. This is always 12 bits, irrespective
2137 * of the page size.
2138 */
2139 fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
Christoffer Dall45e96ea2013-01-20 18:43:58 -05002140 ret = io_mem_abort(vcpu, run, fault_ipa);
Christoffer Dall94f8e642013-01-20 18:28:12 -05002141 goto out_unlock;
2142 }
2143
Christoffer Dallc3058d52014-10-10 12:14:29 +02002144 /* Userspace should not be able to register out-of-bounds IPAs */
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01002145 VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm));
Christoffer Dallc3058d52014-10-10 12:14:29 +02002146
Marc Zyngieraeda9132015-03-12 18:16:52 +00002147 if (fault_status == FSC_ACCESS) {
2148 handle_access_fault(vcpu, fault_ipa);
2149 ret = 1;
2150 goto out_unlock;
2151 }
2152
Christoffer Dall98047882014-08-19 12:18:04 +02002153 ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
Christoffer Dall94f8e642013-01-20 18:28:12 -05002154 if (ret == 0)
2155 ret = 1;
Marc Zyngier6d674e22019-12-11 16:56:48 +00002156out:
2157 if (ret == -ENOEXEC) {
2158 kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
2159 ret = 1;
2160 }
Christoffer Dall94f8e642013-01-20 18:28:12 -05002161out_unlock:
2162 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2163 return ret;
Christoffer Dall342cd0a2013-01-20 18:28:06 -05002164}
2165
Marc Zyngier1d2ebac2015-03-12 18:16:50 +00002166static int handle_hva_to_gpa(struct kvm *kvm,
2167 unsigned long start,
2168 unsigned long end,
2169 int (*handler)(struct kvm *kvm,
Suzuki K Poulose056aad62017-03-20 18:26:42 +00002170 gpa_t gpa, u64 size,
2171 void *data),
Marc Zyngier1d2ebac2015-03-12 18:16:50 +00002172 void *data)
Christoffer Dalld5d81842013-01-20 18:28:07 -05002173{
2174 struct kvm_memslots *slots;
2175 struct kvm_memory_slot *memslot;
Marc Zyngier1d2ebac2015-03-12 18:16:50 +00002176 int ret = 0;
Christoffer Dalld5d81842013-01-20 18:28:07 -05002177
2178 slots = kvm_memslots(kvm);
2179
2180 /* we only care about the pages that the guest sees */
2181 kvm_for_each_memslot(memslot, slots) {
2182 unsigned long hva_start, hva_end;
Suzuki K Poulose056aad62017-03-20 18:26:42 +00002183 gfn_t gpa;
Christoffer Dalld5d81842013-01-20 18:28:07 -05002184
2185 hva_start = max(start, memslot->userspace_addr);
2186 hva_end = min(end, memslot->userspace_addr +
2187 (memslot->npages << PAGE_SHIFT));
2188 if (hva_start >= hva_end)
2189 continue;
2190
Suzuki K Poulose056aad62017-03-20 18:26:42 +00002191 gpa = hva_to_gfn_memslot(hva_start, memslot) << PAGE_SHIFT;
2192 ret |= handler(kvm, gpa, (u64)(hva_end - hva_start), data);
Christoffer Dalld5d81842013-01-20 18:28:07 -05002193 }
Marc Zyngier1d2ebac2015-03-12 18:16:50 +00002194
2195 return ret;
Christoffer Dalld5d81842013-01-20 18:28:07 -05002196}
2197
Suzuki K Poulose056aad62017-03-20 18:26:42 +00002198static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
Christoffer Dalld5d81842013-01-20 18:28:07 -05002199{
Suzuki K Poulose056aad62017-03-20 18:26:42 +00002200 unmap_stage2_range(kvm, gpa, size);
Marc Zyngier1d2ebac2015-03-12 18:16:50 +00002201 return 0;
Christoffer Dalld5d81842013-01-20 18:28:07 -05002202}
2203
Christoffer Dalld5d81842013-01-20 18:28:07 -05002204int kvm_unmap_hva_range(struct kvm *kvm,
2205 unsigned long start, unsigned long end)
2206{
2207 if (!kvm->arch.pgd)
2208 return 0;
2209
2210 trace_kvm_unmap_hva_range(start, end);
2211 handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
2212 return 0;
2213}
2214
Suzuki K Poulose056aad62017-03-20 18:26:42 +00002215static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
Christoffer Dalld5d81842013-01-20 18:28:07 -05002216{
2217 pte_t *pte = (pte_t *)data;
2218
Suzuki K Poulose056aad62017-03-20 18:26:42 +00002219 WARN_ON(size != PAGE_SIZE);
Mario Smarduch15a49a42015-01-15 15:58:58 -08002220 /*
2221 * We can always call stage2_set_pte with KVM_S2PTE_FLAG_LOGGING_ACTIVE
2222 * flag clear because MMU notifiers will have unmapped a huge PMD before
2223 * calling ->change_pte() (which in turn calls kvm_set_spte_hva()) and
2224 * therefore stage2_set_pte() never needs to clear out a huge PMD
2225 * through this calling path.
2226 */
2227 stage2_set_pte(kvm, NULL, gpa, pte, 0);
Marc Zyngier1d2ebac2015-03-12 18:16:50 +00002228 return 0;
Christoffer Dalld5d81842013-01-20 18:28:07 -05002229}
2230
2231
Lan Tianyu748c0e32018-12-06 21:21:10 +08002232int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
Christoffer Dalld5d81842013-01-20 18:28:07 -05002233{
2234 unsigned long end = hva + PAGE_SIZE;
Marc Zyngier694556d2018-08-23 09:58:27 +01002235 kvm_pfn_t pfn = pte_pfn(pte);
Christoffer Dalld5d81842013-01-20 18:28:07 -05002236 pte_t stage2_pte;
2237
2238 if (!kvm->arch.pgd)
Lan Tianyu748c0e32018-12-06 21:21:10 +08002239 return 0;
Christoffer Dalld5d81842013-01-20 18:28:07 -05002240
2241 trace_kvm_set_spte_hva(hva);
Marc Zyngier694556d2018-08-23 09:58:27 +01002242
2243 /*
2244 * We've moved a page around, probably through CoW, so let's treat it
2245 * just like a translation fault and clean the cache to the PoC.
2246 */
2247 clean_dcache_guest_page(pfn, PAGE_SIZE);
Punit Agrawalf8df7332018-12-11 17:10:36 +00002248 stage2_pte = kvm_pfn_pte(pfn, PAGE_S2);
Christoffer Dalld5d81842013-01-20 18:28:07 -05002249 handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte);
Lan Tianyu748c0e32018-12-06 21:21:10 +08002250
2251 return 0;
Christoffer Dalld5d81842013-01-20 18:28:07 -05002252}
2253
Suzuki K Poulose056aad62017-03-20 18:26:42 +00002254static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
Marc Zyngier35307b92015-03-12 18:16:51 +00002255{
Punit Agrawal35a63962018-12-11 17:10:40 +00002256 pud_t *pud;
Marc Zyngier35307b92015-03-12 18:16:51 +00002257 pmd_t *pmd;
2258 pte_t *pte;
2259
Punit Agrawal35a63962018-12-11 17:10:40 +00002260 WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);
2261 if (!stage2_get_leaf_entry(kvm, gpa, &pud, &pmd, &pte))
Marc Zyngier35307b92015-03-12 18:16:51 +00002262 return 0;
2263
Punit Agrawal35a63962018-12-11 17:10:40 +00002264 if (pud)
2265 return stage2_pudp_test_and_clear_young(pud);
2266 else if (pmd)
Catalin Marinas06485052016-04-13 17:57:37 +01002267 return stage2_pmdp_test_and_clear_young(pmd);
Punit Agrawal35a63962018-12-11 17:10:40 +00002268 else
2269 return stage2_ptep_test_and_clear_young(pte);
Marc Zyngier35307b92015-03-12 18:16:51 +00002270}
2271
Suzuki K Poulose056aad62017-03-20 18:26:42 +00002272static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
Marc Zyngier35307b92015-03-12 18:16:51 +00002273{
Punit Agrawal35a63962018-12-11 17:10:40 +00002274 pud_t *pud;
Marc Zyngier35307b92015-03-12 18:16:51 +00002275 pmd_t *pmd;
2276 pte_t *pte;
2277
Punit Agrawal35a63962018-12-11 17:10:40 +00002278 WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);
2279 if (!stage2_get_leaf_entry(kvm, gpa, &pud, &pmd, &pte))
Marc Zyngier35307b92015-03-12 18:16:51 +00002280 return 0;
2281
Punit Agrawal35a63962018-12-11 17:10:40 +00002282 if (pud)
2283 return kvm_s2pud_young(*pud);
2284 else if (pmd)
Marc Zyngier35307b92015-03-12 18:16:51 +00002285 return pmd_young(*pmd);
Punit Agrawal35a63962018-12-11 17:10:40 +00002286 else
Marc Zyngier35307b92015-03-12 18:16:51 +00002287 return pte_young(*pte);
Marc Zyngier35307b92015-03-12 18:16:51 +00002288}
2289
2290int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
2291{
Suzuki K Poulose7e5a6722017-07-05 09:57:00 +01002292 if (!kvm->arch.pgd)
2293 return 0;
Marc Zyngier35307b92015-03-12 18:16:51 +00002294 trace_kvm_age_hva(start, end);
2295 return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL);
2296}
2297
2298int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
2299{
Suzuki K Poulose7e5a6722017-07-05 09:57:00 +01002300 if (!kvm->arch.pgd)
2301 return 0;
Marc Zyngier35307b92015-03-12 18:16:51 +00002302 trace_kvm_test_age_hva(hva);
Gavin Shancf2d23e2020-01-21 16:56:59 +11002303 return handle_hva_to_gpa(kvm, hva, hva + PAGE_SIZE,
2304 kvm_test_age_hva_handler, NULL);
Marc Zyngier35307b92015-03-12 18:16:51 +00002305}
2306
Christoffer Dalld5d81842013-01-20 18:28:07 -05002307void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
2308{
2309 mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
2310}
2311
Christoffer Dall342cd0a2013-01-20 18:28:06 -05002312phys_addr_t kvm_mmu_get_httbr(void)
2313{
Ard Biesheuvele4c5a682015-03-19 16:42:28 +00002314 if (__kvm_cpu_uses_extended_idmap())
2315 return virt_to_phys(merged_hyp_pgd);
2316 else
2317 return virt_to_phys(hyp_pgd);
Christoffer Dall342cd0a2013-01-20 18:28:06 -05002318}
2319
Marc Zyngier5a677ce2013-04-12 19:12:06 +01002320phys_addr_t kvm_get_idmap_vector(void)
2321{
2322 return hyp_idmap_vector;
2323}
2324
Marc Zyngier0535a3e2016-06-30 18:40:43 +01002325static int kvm_map_idmap_text(pgd_t *pgd)
2326{
2327 int err;
2328
2329 /* Create the idmap in the boot page tables */
Kristina Martsenko98732d12018-01-15 15:23:49 +00002330 err = __create_hyp_mappings(pgd, __kvm_idmap_ptrs_per_pgd(),
Marc Zyngier0535a3e2016-06-30 18:40:43 +01002331 hyp_idmap_start, hyp_idmap_end,
2332 __phys_to_pfn(hyp_idmap_start),
2333 PAGE_HYP_EXEC);
2334 if (err)
2335 kvm_err("Failed to idmap %lx-%lx\n",
2336 hyp_idmap_start, hyp_idmap_end);
2337
2338 return err;
2339}
2340
Christoffer Dall342cd0a2013-01-20 18:28:06 -05002341int kvm_mmu_init(void)
2342{
Marc Zyngier2fb41052013-04-12 19:12:03 +01002343 int err;
2344
Andrew Scull0a787912020-05-19 11:40:36 +01002345 hyp_idmap_start = __pa_symbol(__hyp_idmap_text_start);
Marc Zyngier46fef152018-03-12 14:25:10 +00002346 hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE);
Andrew Scull0a787912020-05-19 11:40:36 +01002347 hyp_idmap_end = __pa_symbol(__hyp_idmap_text_end);
Marc Zyngier46fef152018-03-12 14:25:10 +00002348 hyp_idmap_end = ALIGN(hyp_idmap_end, PAGE_SIZE);
Andrew Scull0a787912020-05-19 11:40:36 +01002349 hyp_idmap_vector = __pa_symbol(__kvm_hyp_init);
Marc Zyngier5a677ce2013-04-12 19:12:06 +01002350
Ard Biesheuvel06f75a12015-03-19 16:42:26 +00002351 /*
2352 * We rely on the linker script to ensure at build time that the HYP
2353 * init code does not cross a page boundary.
2354 */
2355 BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK);
Marc Zyngier5a677ce2013-04-12 19:12:06 +01002356
Marc Zyngierb4ef0492017-12-03 20:04:51 +00002357 kvm_debug("IDMAP page: %lx\n", hyp_idmap_start);
2358 kvm_debug("HYP VA range: %lx:%lx\n",
2359 kern_hyp_va(PAGE_OFFSET),
2360 kern_hyp_va((unsigned long)high_memory - 1));
Marc Zyngiereac378a2016-06-30 18:40:50 +01002361
Marc Zyngier6c41a412016-06-30 18:40:51 +01002362 if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) &&
Marc Zyngiered57cac2017-12-03 18:22:49 +00002363 hyp_idmap_start < kern_hyp_va((unsigned long)high_memory - 1) &&
Marc Zyngierd2896d42016-08-22 09:01:17 +01002364 hyp_idmap_start != (unsigned long)__hyp_idmap_text_start) {
Marc Zyngiereac378a2016-06-30 18:40:50 +01002365 /*
2366 * The idmap page is intersecting with the VA space,
2367 * it is not safe to continue further.
2368 */
2369 kvm_err("IDMAP intersecting with HYP VA, unable to continue\n");
2370 err = -EINVAL;
2371 goto out;
2372 }
2373
Christoffer Dall38f791a2014-10-10 12:14:28 +02002374 hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
Marc Zyngier0535a3e2016-06-30 18:40:43 +01002375 if (!hyp_pgd) {
Christoffer Dalld5d81842013-01-20 18:28:07 -05002376 kvm_err("Hyp mode PGD not allocated\n");
Marc Zyngier2fb41052013-04-12 19:12:03 +01002377 err = -ENOMEM;
2378 goto out;
2379 }
2380
Ard Biesheuvele4c5a682015-03-19 16:42:28 +00002381 if (__kvm_cpu_uses_extended_idmap()) {
Marc Zyngier0535a3e2016-06-30 18:40:43 +01002382 boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
2383 hyp_pgd_order);
2384 if (!boot_hyp_pgd) {
2385 kvm_err("Hyp boot PGD not allocated\n");
2386 err = -ENOMEM;
2387 goto out;
2388 }
2389
2390 err = kvm_map_idmap_text(boot_hyp_pgd);
2391 if (err)
2392 goto out;
2393
Ard Biesheuvele4c5a682015-03-19 16:42:28 +00002394 merged_hyp_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
2395 if (!merged_hyp_pgd) {
2396 kvm_err("Failed to allocate extra HYP pgd\n");
2397 goto out;
2398 }
2399 __kvm_extend_hypmap(boot_hyp_pgd, hyp_pgd, merged_hyp_pgd,
2400 hyp_idmap_start);
Marc Zyngier0535a3e2016-06-30 18:40:43 +01002401 } else {
2402 err = kvm_map_idmap_text(hyp_pgd);
2403 if (err)
2404 goto out;
Marc Zyngier5a677ce2013-04-12 19:12:06 +01002405 }
2406
Marc Zyngiere3f019b2017-12-04 17:04:38 +00002407 io_map_base = hyp_idmap_start;
Christoffer Dalld5d81842013-01-20 18:28:07 -05002408 return 0;
Marc Zyngier2fb41052013-04-12 19:12:03 +01002409out:
Marc Zyngier4f728272013-04-12 19:12:05 +01002410 free_hyp_pgds();
Marc Zyngier2fb41052013-04-12 19:12:03 +01002411 return err;
Christoffer Dall342cd0a2013-01-20 18:28:06 -05002412}
Eric Augerdf6ce242014-06-06 11:10:23 +02002413
2414void kvm_arch_commit_memory_region(struct kvm *kvm,
Paolo Bonzini09170a42015-05-18 13:59:39 +02002415 const struct kvm_userspace_memory_region *mem,
Sean Christopherson9d4c1972020-02-18 13:07:24 -08002416 struct kvm_memory_slot *old,
Paolo Bonzinif36f3f22015-05-18 13:20:23 +02002417 const struct kvm_memory_slot *new,
Eric Augerdf6ce242014-06-06 11:10:23 +02002418 enum kvm_mr_change change)
2419{
Mario Smarduchc6473552015-01-15 15:58:56 -08002420 /*
2421 * At this point memslot has been committed and there is an
Fuad Tabba656012c2020-04-01 15:03:10 +01002422 * allocated dirty_bitmap[], dirty pages will be tracked while the
Mario Smarduchc6473552015-01-15 15:58:56 -08002423 * memory slot is write protected.
2424 */
Keqian Zhuc8626262020-04-13 20:20:23 +08002425 if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
2426 /*
2427 * If we're with initial-all-set, we don't need to write
2428 * protect any pages because they're all reported as dirty.
2429 * Huge pages and normal pages will be write protect gradually.
2430 */
2431 if (!kvm_dirty_log_manual_protect_and_init_set(kvm)) {
2432 kvm_mmu_wp_memory_region(kvm, mem->slot);
2433 }
2434 }
Eric Augerdf6ce242014-06-06 11:10:23 +02002435}
2436
2437int kvm_arch_prepare_memory_region(struct kvm *kvm,
2438 struct kvm_memory_slot *memslot,
Paolo Bonzini09170a42015-05-18 13:59:39 +02002439 const struct kvm_userspace_memory_region *mem,
Eric Augerdf6ce242014-06-06 11:10:23 +02002440 enum kvm_mr_change change)
2441{
Ard Biesheuvel8eef9122014-10-10 17:00:32 +02002442 hva_t hva = mem->userspace_addr;
2443 hva_t reg_end = hva + mem->memory_size;
2444 bool writable = !(mem->flags & KVM_MEM_READONLY);
2445 int ret = 0;
2446
Mario Smarduch15a49a42015-01-15 15:58:58 -08002447 if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
2448 change != KVM_MR_FLAGS_ONLY)
Ard Biesheuvel8eef9122014-10-10 17:00:32 +02002449 return 0;
2450
2451 /*
Christoffer Dallc3058d52014-10-10 12:14:29 +02002452 * Prevent userspace from creating a memory region outside of the IPA
2453 * space addressable by the KVM guest IPA space.
2454 */
2455 if (memslot->base_gfn + memslot->npages >=
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01002456 (kvm_phys_size(kvm) >> PAGE_SHIFT))
Christoffer Dallc3058d52014-10-10 12:14:29 +02002457 return -EFAULT;
2458
Michel Lespinasse89154dd2020-06-08 21:33:29 -07002459 mmap_read_lock(current->mm);
Christoffer Dallc3058d52014-10-10 12:14:29 +02002460 /*
Ard Biesheuvel8eef9122014-10-10 17:00:32 +02002461 * A memory region could potentially cover multiple VMAs, and any holes
2462 * between them, so iterate over all of them to find out if we can map
2463 * any of them right now.
2464 *
2465 * +--------------------------------------------+
2466 * +---------------+----------------+ +----------------+
2467 * | : VMA 1 | VMA 2 | | VMA 3 : |
2468 * +---------------+----------------+ +----------------+
2469 * | memory region |
2470 * +--------------------------------------------+
2471 */
2472 do {
2473 struct vm_area_struct *vma = find_vma(current->mm, hva);
2474 hva_t vm_start, vm_end;
2475
2476 if (!vma || vma->vm_start >= reg_end)
2477 break;
2478
2479 /*
Ard Biesheuvel8eef9122014-10-10 17:00:32 +02002480 * Take the intersection of this VMA with the memory region
2481 */
2482 vm_start = max(hva, vma->vm_start);
2483 vm_end = min(reg_end, vma->vm_end);
2484
2485 if (vma->vm_flags & VM_PFNMAP) {
2486 gpa_t gpa = mem->guest_phys_addr +
2487 (vm_start - mem->userspace_addr);
Marek Majtykaca09f022015-09-16 12:04:55 +02002488 phys_addr_t pa;
2489
2490 pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
2491 pa += vm_start - vma->vm_start;
Ard Biesheuvel8eef9122014-10-10 17:00:32 +02002492
Mario Smarduch15a49a42015-01-15 15:58:58 -08002493 /* IO region dirty page logging not allowed */
Marc Zyngier72f31042017-03-16 18:20:50 +00002494 if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
2495 ret = -EINVAL;
2496 goto out;
2497 }
Mario Smarduch15a49a42015-01-15 15:58:58 -08002498
Ard Biesheuvel8eef9122014-10-10 17:00:32 +02002499 ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
2500 vm_end - vm_start,
2501 writable);
2502 if (ret)
2503 break;
2504 }
2505 hva = vm_end;
2506 } while (hva < reg_end);
2507
Mario Smarduch15a49a42015-01-15 15:58:58 -08002508 if (change == KVM_MR_FLAGS_ONLY)
Marc Zyngier72f31042017-03-16 18:20:50 +00002509 goto out;
Mario Smarduch15a49a42015-01-15 15:58:58 -08002510
Ard Biesheuvel849260c2014-11-17 14:58:53 +00002511 spin_lock(&kvm->mmu_lock);
2512 if (ret)
Ard Biesheuvel8eef9122014-10-10 17:00:32 +02002513 unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
Ard Biesheuvel849260c2014-11-17 14:58:53 +00002514 else
2515 stage2_flush_memslot(kvm, memslot);
2516 spin_unlock(&kvm->mmu_lock);
Marc Zyngier72f31042017-03-16 18:20:50 +00002517out:
Michel Lespinasse89154dd2020-06-08 21:33:29 -07002518 mmap_read_unlock(current->mm);
Ard Biesheuvel8eef9122014-10-10 17:00:32 +02002519 return ret;
Eric Augerdf6ce242014-06-06 11:10:23 +02002520}
2521
Sean Christophersone96c81e2020-02-18 13:07:27 -08002522void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
Eric Augerdf6ce242014-06-06 11:10:23 +02002523{
2524}
2525
Sean Christopherson15248252019-02-05 12:54:17 -08002526void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
Eric Augerdf6ce242014-06-06 11:10:23 +02002527{
2528}
2529
2530void kvm_arch_flush_shadow_all(struct kvm *kvm)
2531{
Suzuki K Poulose293f2932016-09-08 16:25:49 +01002532 kvm_free_stage2_pgd(kvm);
Eric Augerdf6ce242014-06-06 11:10:23 +02002533}
2534
2535void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
2536 struct kvm_memory_slot *slot)
2537{
Ard Biesheuvel8eef9122014-10-10 17:00:32 +02002538 gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
2539 phys_addr_t size = slot->npages << PAGE_SHIFT;
2540
2541 spin_lock(&kvm->mmu_lock);
2542 unmap_stage2_range(kvm, gpa, size);
2543 spin_unlock(&kvm->mmu_lock);
Eric Augerdf6ce242014-06-06 11:10:23 +02002544}
Marc Zyngier3c1e7162014-12-19 16:05:31 +00002545
2546/*
2547 * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
2548 *
2549 * Main problems:
2550 * - S/W ops are local to a CPU (not broadcast)
2551 * - We have line migration behind our back (speculation)
2552 * - System caches don't support S/W at all (damn!)
2553 *
2554 * In the face of the above, the best we can do is to try and convert
2555 * S/W ops to VA ops. Because the guest is not allowed to infer the
2556 * S/W to PA mapping, it can only use S/W to nuke the whole cache,
2557 * which is a rather good thing for us.
2558 *
2559 * Also, it is only used when turning caches on/off ("The expected
2560 * usage of the cache maintenance instructions that operate by set/way
2561 * is associated with the cache maintenance instructions associated
2562 * with the powerdown and powerup of caches, if this is required by
2563 * the implementation.").
2564 *
2565 * We use the following policy:
2566 *
2567 * - If we trap a S/W operation, we enable VM trapping to detect
2568 * caches being turned on/off, and do a full clean.
2569 *
2570 * - We flush the caches on both caches being turned on and off.
2571 *
2572 * - Once the caches are enabled, we stop trapping VM ops.
2573 */
2574void kvm_set_way_flush(struct kvm_vcpu *vcpu)
2575{
Christoffer Dall3df59d82017-08-03 12:09:05 +02002576 unsigned long hcr = *vcpu_hcr(vcpu);
Marc Zyngier3c1e7162014-12-19 16:05:31 +00002577
2578 /*
2579 * If this is the first time we do a S/W operation
2580 * (i.e. HCR_TVM not set) flush the whole memory, and set the
2581 * VM trapping.
2582 *
2583 * Otherwise, rely on the VM trapping to wait for the MMU +
2584 * Caches to be turned off. At that point, we'll be able to
2585 * clean the caches again.
2586 */
2587 if (!(hcr & HCR_TVM)) {
2588 trace_kvm_set_way_flush(*vcpu_pc(vcpu),
2589 vcpu_has_cache_enabled(vcpu));
2590 stage2_flush_vm(vcpu->kvm);
Christoffer Dall3df59d82017-08-03 12:09:05 +02002591 *vcpu_hcr(vcpu) = hcr | HCR_TVM;
Marc Zyngier3c1e7162014-12-19 16:05:31 +00002592 }
2593}
2594
2595void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled)
2596{
2597 bool now_enabled = vcpu_has_cache_enabled(vcpu);
2598
2599 /*
2600 * If switching the MMU+caches on, need to invalidate the caches.
2601 * If switching it off, need to clean the caches.
2602 * Clean + invalidate does the trick always.
2603 */
2604 if (now_enabled != was_enabled)
2605 stage2_flush_vm(vcpu->kvm);
2606
2607 /* Caches are now on, stop trapping VM ops (until a S/W op) */
2608 if (now_enabled)
Christoffer Dall3df59d82017-08-03 12:09:05 +02002609 *vcpu_hcr(vcpu) &= ~HCR_TVM;
Marc Zyngier3c1e7162014-12-19 16:05:31 +00002610
2611 trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled);
2612}