blob: 7a7ddc4558a7697a69ca42b581f11a82e02788dd [file] [log] [blame]
Thomas Gleixnerd94d71c2019-05-29 07:12:40 -07001// SPDX-License-Identifier: GPL-2.0-only
Christoffer Dall749cf76c2013-01-20 18:28:06 -05002/*
3 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
4 * Author: Christoffer Dall <c.dall@virtualopensystems.com>
Christoffer Dall749cf76c2013-01-20 18:28:06 -05005 */
Christoffer Dall342cd0a2013-01-20 18:28:06 -05006
7#include <linux/mman.h>
8#include <linux/kvm_host.h>
9#include <linux/io.h>
Christoffer Dallad361f02012-11-01 17:14:45 +010010#include <linux/hugetlb.h>
James Morse196f8782017-06-20 17:11:48 +010011#include <linux/sched/signal.h>
Christoffer Dall45e96ea2013-01-20 18:43:58 -050012#include <trace/events/kvm.h>
Christoffer Dall342cd0a2013-01-20 18:28:06 -050013#include <asm/pgalloc.h>
Christoffer Dall94f8e642013-01-20 18:28:12 -050014#include <asm/cacheflush.h>
Christoffer Dall342cd0a2013-01-20 18:28:06 -050015#include <asm/kvm_arm.h>
16#include <asm/kvm_mmu.h>
James Morse0db5e022019-01-29 18:48:49 +000017#include <asm/kvm_ras.h>
Christoffer Dalld5d81842013-01-20 18:28:07 -050018#include <asm/kvm_asm.h>
Christoffer Dall94f8e642013-01-20 18:28:12 -050019#include <asm/kvm_emulate.h>
Marc Zyngier1e947ba2015-01-29 11:59:54 +000020#include <asm/virt.h>
Christoffer Dalld5d81842013-01-20 18:28:07 -050021
22#include "trace.h"
Christoffer Dall342cd0a2013-01-20 18:28:06 -050023
Marc Zyngier5a677ce2013-04-12 19:12:06 +010024static pgd_t *boot_hyp_pgd;
Marc Zyngier2fb41052013-04-12 19:12:03 +010025static pgd_t *hyp_pgd;
Ard Biesheuvele4c5a682015-03-19 16:42:28 +000026static pgd_t *merged_hyp_pgd;
Christoffer Dall342cd0a2013-01-20 18:28:06 -050027static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
28
Marc Zyngier5a677ce2013-04-12 19:12:06 +010029static unsigned long hyp_idmap_start;
30static unsigned long hyp_idmap_end;
31static phys_addr_t hyp_idmap_vector;
32
Marc Zyngiere3f019b2017-12-04 17:04:38 +000033static unsigned long io_map_base;
34
Christoffer Dall38f791a2014-10-10 12:14:28 +020035#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
Mark Salter5d4e08c2014-03-28 14:25:19 +000036
Mario Smarduch15a49a42015-01-15 15:58:58 -080037#define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0)
38#define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1)
39
Marc Zyngier6d674e22019-12-11 16:56:48 +000040static bool is_iomap(unsigned long flags)
41{
42 return flags & KVM_S2PTE_FLAG_IS_IOMAP;
43}
44
Mario Smarduch15a49a42015-01-15 15:58:58 -080045static bool memslot_is_logging(struct kvm_memory_slot *memslot)
46{
Mario Smarduch15a49a42015-01-15 15:58:58 -080047 return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY);
Mario Smarduch72760302015-01-15 15:59:01 -080048}
49
50/**
51 * kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8
52 * @kvm: pointer to kvm structure.
53 *
54 * Interface to HYP function to flush all VM TLB entries
55 */
56void kvm_flush_remote_tlbs(struct kvm *kvm)
57{
58 kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
Mario Smarduch15a49a42015-01-15 15:58:58 -080059}
Christoffer Dallad361f02012-11-01 17:14:45 +010060
Marc Zyngier48762762013-01-28 15:27:00 +000061static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
Christoffer Dalld5d81842013-01-20 18:28:07 -050062{
Suzuki K Poulose8684e702016-03-22 17:14:25 +000063 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
Christoffer Dalld5d81842013-01-20 18:28:07 -050064}
65
Marc Zyngier363ef892014-12-19 16:48:06 +000066/*
67 * D-Cache management functions. They take the page table entries by
68 * value, as they are flushing the cache using the kernel mapping (or
69 * kmap on 32bit).
70 */
71static void kvm_flush_dcache_pte(pte_t pte)
72{
73 __kvm_flush_dcache_pte(pte);
74}
75
76static void kvm_flush_dcache_pmd(pmd_t pmd)
77{
78 __kvm_flush_dcache_pmd(pmd);
79}
80
81static void kvm_flush_dcache_pud(pud_t pud)
82{
83 __kvm_flush_dcache_pud(pud);
84}
85
Ard Biesheuvele6fab542015-11-10 15:11:20 +010086static bool kvm_is_device_pfn(unsigned long pfn)
87{
88 return !pfn_valid(pfn);
89}
90
Mario Smarduch15a49a42015-01-15 15:58:58 -080091/**
92 * stage2_dissolve_pmd() - clear and flush huge PMD entry
93 * @kvm: pointer to kvm structure.
94 * @addr: IPA
95 * @pmd: pmd pointer for IPA
96 *
Zenghui Yu8324c3d2019-03-25 08:02:05 +000097 * Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs.
Mario Smarduch15a49a42015-01-15 15:58:58 -080098 */
99static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd)
100{
Suzuki K Poulosebbb3b6b2016-03-01 12:00:39 +0000101 if (!pmd_thp_or_huge(*pmd))
Mario Smarduch15a49a42015-01-15 15:58:58 -0800102 return;
103
104 pmd_clear(pmd);
105 kvm_tlb_flush_vmid_ipa(kvm, addr);
106 put_page(virt_to_page(pmd));
107}
108
Punit Agrawalb8e0ba72018-12-11 17:10:41 +0000109/**
110 * stage2_dissolve_pud() - clear and flush huge PUD entry
111 * @kvm: pointer to kvm structure.
112 * @addr: IPA
113 * @pud: pud pointer for IPA
114 *
Zenghui Yu8324c3d2019-03-25 08:02:05 +0000115 * Function clears a PUD entry, flushes addr 1st and 2nd stage TLBs.
Punit Agrawalb8e0ba72018-12-11 17:10:41 +0000116 */
117static void stage2_dissolve_pud(struct kvm *kvm, phys_addr_t addr, pud_t *pudp)
118{
119 if (!stage2_pud_huge(kvm, *pudp))
120 return;
121
122 stage2_pud_clear(kvm, pudp);
123 kvm_tlb_flush_vmid_ipa(kvm, addr);
124 put_page(virt_to_page(pudp));
125}
126
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000127static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
Marc Zyngier979acd52013-08-06 13:05:48 +0100128{
Mike Rapoporte9f63762020-06-04 16:46:23 -0700129 p4d_t *p4d_table __maybe_unused = stage2_p4d_offset(kvm, pgd, 0UL);
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100130 stage2_pgd_clear(kvm, pgd);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200131 kvm_tlb_flush_vmid_ipa(kvm, addr);
Mike Rapoporte9f63762020-06-04 16:46:23 -0700132 stage2_p4d_free(kvm, p4d_table);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200133 put_page(virt_to_page(pgd));
Marc Zyngier979acd52013-08-06 13:05:48 +0100134}
135
Mike Rapoporte9f63762020-06-04 16:46:23 -0700136static void clear_stage2_p4d_entry(struct kvm *kvm, p4d_t *p4d, phys_addr_t addr)
137{
138 pud_t *pud_table __maybe_unused = stage2_pud_offset(kvm, p4d, 0);
139 stage2_p4d_clear(kvm, p4d);
140 kvm_tlb_flush_vmid_ipa(kvm, addr);
141 stage2_pud_free(kvm, pud_table);
142 put_page(virt_to_page(p4d));
143}
144
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000145static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500146{
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100147 pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(kvm, pud, 0);
148 VM_BUG_ON(stage2_pud_huge(kvm, *pud));
149 stage2_pud_clear(kvm, pud);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200150 kvm_tlb_flush_vmid_ipa(kvm, addr);
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100151 stage2_pmd_free(kvm, pmd_table);
Marc Zyngier4f728272013-04-12 19:12:05 +0100152 put_page(virt_to_page(pud));
153}
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500154
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000155static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
Marc Zyngier4f728272013-04-12 19:12:05 +0100156{
Christoffer Dall4f853a72014-05-09 23:31:31 +0200157 pte_t *pte_table = pte_offset_kernel(pmd, 0);
Suzuki K Poulosebbb3b6b2016-03-01 12:00:39 +0000158 VM_BUG_ON(pmd_thp_or_huge(*pmd));
Christoffer Dall4f853a72014-05-09 23:31:31 +0200159 pmd_clear(pmd);
160 kvm_tlb_flush_vmid_ipa(kvm, addr);
Anshuman Khandual14b94d02019-03-12 18:55:45 +0530161 free_page((unsigned long)pte_table);
Marc Zyngier4f728272013-04-12 19:12:05 +0100162 put_page(virt_to_page(pmd));
163}
164
Marc Zyngier88dc25e82018-05-25 12:23:11 +0100165static inline void kvm_set_pte(pte_t *ptep, pte_t new_pte)
166{
167 WRITE_ONCE(*ptep, new_pte);
168 dsb(ishst);
169}
170
171static inline void kvm_set_pmd(pmd_t *pmdp, pmd_t new_pmd)
172{
173 WRITE_ONCE(*pmdp, new_pmd);
174 dsb(ishst);
175}
176
Marc Zyngier0db9dd82018-06-27 15:51:05 +0100177static inline void kvm_pmd_populate(pmd_t *pmdp, pte_t *ptep)
178{
179 kvm_set_pmd(pmdp, kvm_mk_pmd(ptep));
180}
181
182static inline void kvm_pud_populate(pud_t *pudp, pmd_t *pmdp)
183{
184 WRITE_ONCE(*pudp, kvm_mk_pud(pmdp));
185 dsb(ishst);
186}
187
Mike Rapoporte9f63762020-06-04 16:46:23 -0700188static inline void kvm_p4d_populate(p4d_t *p4dp, pud_t *pudp)
Marc Zyngier0db9dd82018-06-27 15:51:05 +0100189{
Mike Rapoporte9f63762020-06-04 16:46:23 -0700190 WRITE_ONCE(*p4dp, kvm_mk_p4d(pudp));
Marc Zyngier0db9dd82018-06-27 15:51:05 +0100191 dsb(ishst);
192}
193
Mike Rapoporte9f63762020-06-04 16:46:23 -0700194static inline void kvm_pgd_populate(pgd_t *pgdp, p4d_t *p4dp)
195{
196#ifndef __PAGETABLE_P4D_FOLDED
197 WRITE_ONCE(*pgdp, kvm_mk_pgd(p4dp));
198 dsb(ishst);
199#endif
200}
201
Marc Zyngier363ef892014-12-19 16:48:06 +0000202/*
203 * Unmapping vs dcache management:
204 *
205 * If a guest maps certain memory pages as uncached, all writes will
206 * bypass the data cache and go directly to RAM. However, the CPUs
207 * can still speculate reads (not writes) and fill cache lines with
208 * data.
209 *
210 * Those cache lines will be *clean* cache lines though, so a
211 * clean+invalidate operation is equivalent to an invalidate
212 * operation, because no cache lines are marked dirty.
213 *
214 * Those clean cache lines could be filled prior to an uncached write
215 * by the guest, and the cache coherent IO subsystem would therefore
216 * end up writing old data to disk.
217 *
218 * This is why right after unmapping a page/section and invalidating
219 * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure
220 * the IO subsystem will never hit in the cache.
Marc Zyngiere48d53a2018-04-06 12:27:28 +0100221 *
222 * This is all avoided on systems that have ARM64_HAS_STAGE2_FWB, as
223 * we then fully enforce cacheability of RAM, no matter what the guest
224 * does.
Marc Zyngier363ef892014-12-19 16:48:06 +0000225 */
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000226static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
Christoffer Dall4f853a72014-05-09 23:31:31 +0200227 phys_addr_t addr, phys_addr_t end)
Marc Zyngier4f728272013-04-12 19:12:05 +0100228{
Christoffer Dall4f853a72014-05-09 23:31:31 +0200229 phys_addr_t start_addr = addr;
230 pte_t *pte, *start_pte;
231
232 start_pte = pte = pte_offset_kernel(pmd, addr);
233 do {
234 if (!pte_none(*pte)) {
Marc Zyngier363ef892014-12-19 16:48:06 +0000235 pte_t old_pte = *pte;
236
Christoffer Dall4f853a72014-05-09 23:31:31 +0200237 kvm_set_pte(pte, __pte(0));
Christoffer Dall4f853a72014-05-09 23:31:31 +0200238 kvm_tlb_flush_vmid_ipa(kvm, addr);
Marc Zyngier363ef892014-12-19 16:48:06 +0000239
240 /* No need to invalidate the cache for device mappings */
Ard Biesheuvel0de58f82015-12-03 09:25:22 +0100241 if (!kvm_is_device_pfn(pte_pfn(old_pte)))
Marc Zyngier363ef892014-12-19 16:48:06 +0000242 kvm_flush_dcache_pte(old_pte);
243
244 put_page(virt_to_page(pte));
Christoffer Dall4f853a72014-05-09 23:31:31 +0200245 }
246 } while (pte++, addr += PAGE_SIZE, addr != end);
247
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100248 if (stage2_pte_table_empty(kvm, start_pte))
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000249 clear_stage2_pmd_entry(kvm, pmd, start_addr);
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500250}
251
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000252static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud,
Christoffer Dall4f853a72014-05-09 23:31:31 +0200253 phys_addr_t addr, phys_addr_t end)
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500254{
Christoffer Dall4f853a72014-05-09 23:31:31 +0200255 phys_addr_t next, start_addr = addr;
256 pmd_t *pmd, *start_pmd;
Marc Zyngier000d3992013-03-05 02:43:17 +0000257
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100258 start_pmd = pmd = stage2_pmd_offset(kvm, pud, addr);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200259 do {
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100260 next = stage2_pmd_addr_end(kvm, addr, end);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200261 if (!pmd_none(*pmd)) {
Suzuki K Poulosebbb3b6b2016-03-01 12:00:39 +0000262 if (pmd_thp_or_huge(*pmd)) {
Marc Zyngier363ef892014-12-19 16:48:06 +0000263 pmd_t old_pmd = *pmd;
264
Christoffer Dall4f853a72014-05-09 23:31:31 +0200265 pmd_clear(pmd);
266 kvm_tlb_flush_vmid_ipa(kvm, addr);
Marc Zyngier363ef892014-12-19 16:48:06 +0000267
268 kvm_flush_dcache_pmd(old_pmd);
269
Christoffer Dall4f853a72014-05-09 23:31:31 +0200270 put_page(virt_to_page(pmd));
271 } else {
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000272 unmap_stage2_ptes(kvm, pmd, addr, next);
Marc Zyngier4f728272013-04-12 19:12:05 +0100273 }
274 }
Christoffer Dall4f853a72014-05-09 23:31:31 +0200275 } while (pmd++, addr = next, addr != end);
Marc Zyngier4f728272013-04-12 19:12:05 +0100276
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100277 if (stage2_pmd_table_empty(kvm, start_pmd))
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000278 clear_stage2_pud_entry(kvm, pud, start_addr);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200279}
280
Mike Rapoporte9f63762020-06-04 16:46:23 -0700281static void unmap_stage2_puds(struct kvm *kvm, p4d_t *p4d,
Christoffer Dall4f853a72014-05-09 23:31:31 +0200282 phys_addr_t addr, phys_addr_t end)
283{
284 phys_addr_t next, start_addr = addr;
285 pud_t *pud, *start_pud;
286
Mike Rapoporte9f63762020-06-04 16:46:23 -0700287 start_pud = pud = stage2_pud_offset(kvm, p4d, addr);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200288 do {
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100289 next = stage2_pud_addr_end(kvm, addr, end);
290 if (!stage2_pud_none(kvm, *pud)) {
291 if (stage2_pud_huge(kvm, *pud)) {
Marc Zyngier363ef892014-12-19 16:48:06 +0000292 pud_t old_pud = *pud;
293
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100294 stage2_pud_clear(kvm, pud);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200295 kvm_tlb_flush_vmid_ipa(kvm, addr);
Marc Zyngier363ef892014-12-19 16:48:06 +0000296 kvm_flush_dcache_pud(old_pud);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200297 put_page(virt_to_page(pud));
298 } else {
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000299 unmap_stage2_pmds(kvm, pud, addr, next);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200300 }
301 }
302 } while (pud++, addr = next, addr != end);
303
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100304 if (stage2_pud_table_empty(kvm, start_pud))
Mike Rapoporte9f63762020-06-04 16:46:23 -0700305 clear_stage2_p4d_entry(kvm, p4d, start_addr);
306}
307
308static void unmap_stage2_p4ds(struct kvm *kvm, pgd_t *pgd,
309 phys_addr_t addr, phys_addr_t end)
310{
311 phys_addr_t next, start_addr = addr;
312 p4d_t *p4d, *start_p4d;
313
314 start_p4d = p4d = stage2_p4d_offset(kvm, pgd, addr);
315 do {
316 next = stage2_p4d_addr_end(kvm, addr, end);
317 if (!stage2_p4d_none(kvm, *p4d))
318 unmap_stage2_puds(kvm, p4d, addr, next);
319 } while (p4d++, addr = next, addr != end);
320
321 if (stage2_p4d_table_empty(kvm, start_p4d))
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000322 clear_stage2_pgd_entry(kvm, pgd, start_addr);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200323}
324
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000325/**
326 * unmap_stage2_range -- Clear stage2 page table entries to unmap a range
327 * @kvm: The VM pointer
328 * @start: The intermediate physical base address of the range to unmap
329 * @size: The size of the area to unmap
330 *
331 * Clear a range of stage-2 mappings, lowering the various ref-counts. Must
332 * be called while holding mmu_lock (unless for freeing the stage2 pgd before
333 * destroying the VM), otherwise another faulting VCPU may come in and mess
334 * with things behind our backs.
335 */
336static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
Christoffer Dall4f853a72014-05-09 23:31:31 +0200337{
338 pgd_t *pgd;
339 phys_addr_t addr = start, end = start + size;
340 phys_addr_t next;
341
Suzuki K Poulose8b3405e2017-04-03 15:12:43 +0100342 assert_spin_locked(&kvm->mmu_lock);
Jia He47a91b72018-05-21 11:05:30 +0800343 WARN_ON(size & ~PAGE_MASK);
344
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100345 pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200346 do {
Suzuki K Poulose0c428a6a2017-05-16 10:34:55 +0100347 /*
348 * Make sure the page table is still active, as another thread
349 * could have possibly freed the page table, while we released
350 * the lock.
351 */
352 if (!READ_ONCE(kvm->arch.pgd))
353 break;
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100354 next = stage2_pgd_addr_end(kvm, addr, end);
355 if (!stage2_pgd_none(kvm, *pgd))
Mike Rapoporte9f63762020-06-04 16:46:23 -0700356 unmap_stage2_p4ds(kvm, pgd, addr, next);
Suzuki K Poulose8b3405e2017-04-03 15:12:43 +0100357 /*
358 * If the range is too large, release the kvm->mmu_lock
359 * to prevent starvation and lockup detector warnings.
360 */
361 if (next != end)
362 cond_resched_lock(&kvm->mmu_lock);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200363 } while (pgd++, addr = next, addr != end);
Marc Zyngier000d3992013-03-05 02:43:17 +0000364}
365
Marc Zyngier9d218a12014-01-15 12:50:23 +0000366static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
367 phys_addr_t addr, phys_addr_t end)
368{
369 pte_t *pte;
370
371 pte = pte_offset_kernel(pmd, addr);
372 do {
Ard Biesheuvel0de58f82015-12-03 09:25:22 +0100373 if (!pte_none(*pte) && !kvm_is_device_pfn(pte_pfn(*pte)))
Marc Zyngier363ef892014-12-19 16:48:06 +0000374 kvm_flush_dcache_pte(*pte);
Marc Zyngier9d218a12014-01-15 12:50:23 +0000375 } while (pte++, addr += PAGE_SIZE, addr != end);
376}
377
378static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
379 phys_addr_t addr, phys_addr_t end)
380{
381 pmd_t *pmd;
382 phys_addr_t next;
383
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100384 pmd = stage2_pmd_offset(kvm, pud, addr);
Marc Zyngier9d218a12014-01-15 12:50:23 +0000385 do {
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100386 next = stage2_pmd_addr_end(kvm, addr, end);
Marc Zyngier9d218a12014-01-15 12:50:23 +0000387 if (!pmd_none(*pmd)) {
Suzuki K Poulosebbb3b6b2016-03-01 12:00:39 +0000388 if (pmd_thp_or_huge(*pmd))
Marc Zyngier363ef892014-12-19 16:48:06 +0000389 kvm_flush_dcache_pmd(*pmd);
390 else
Marc Zyngier9d218a12014-01-15 12:50:23 +0000391 stage2_flush_ptes(kvm, pmd, addr, next);
Marc Zyngier9d218a12014-01-15 12:50:23 +0000392 }
393 } while (pmd++, addr = next, addr != end);
394}
395
Mike Rapoporte9f63762020-06-04 16:46:23 -0700396static void stage2_flush_puds(struct kvm *kvm, p4d_t *p4d,
Marc Zyngier9d218a12014-01-15 12:50:23 +0000397 phys_addr_t addr, phys_addr_t end)
398{
399 pud_t *pud;
400 phys_addr_t next;
401
Mike Rapoporte9f63762020-06-04 16:46:23 -0700402 pud = stage2_pud_offset(kvm, p4d, addr);
Marc Zyngier9d218a12014-01-15 12:50:23 +0000403 do {
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100404 next = stage2_pud_addr_end(kvm, addr, end);
405 if (!stage2_pud_none(kvm, *pud)) {
406 if (stage2_pud_huge(kvm, *pud))
Marc Zyngier363ef892014-12-19 16:48:06 +0000407 kvm_flush_dcache_pud(*pud);
408 else
Marc Zyngier9d218a12014-01-15 12:50:23 +0000409 stage2_flush_pmds(kvm, pud, addr, next);
Marc Zyngier9d218a12014-01-15 12:50:23 +0000410 }
411 } while (pud++, addr = next, addr != end);
412}
413
Mike Rapoporte9f63762020-06-04 16:46:23 -0700414static void stage2_flush_p4ds(struct kvm *kvm, pgd_t *pgd,
415 phys_addr_t addr, phys_addr_t end)
416{
417 p4d_t *p4d;
418 phys_addr_t next;
419
420 p4d = stage2_p4d_offset(kvm, pgd, addr);
421 do {
422 next = stage2_p4d_addr_end(kvm, addr, end);
423 if (!stage2_p4d_none(kvm, *p4d))
424 stage2_flush_puds(kvm, p4d, addr, next);
425 } while (p4d++, addr = next, addr != end);
426}
427
Marc Zyngier9d218a12014-01-15 12:50:23 +0000428static void stage2_flush_memslot(struct kvm *kvm,
429 struct kvm_memory_slot *memslot)
430{
431 phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
432 phys_addr_t end = addr + PAGE_SIZE * memslot->npages;
433 phys_addr_t next;
434 pgd_t *pgd;
435
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100436 pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr);
Marc Zyngier9d218a12014-01-15 12:50:23 +0000437 do {
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100438 next = stage2_pgd_addr_end(kvm, addr, end);
439 if (!stage2_pgd_none(kvm, *pgd))
Mike Rapoporte9f63762020-06-04 16:46:23 -0700440 stage2_flush_p4ds(kvm, pgd, addr, next);
Jiang Yi48c963e2020-04-15 10:42:29 +0200441
442 if (next != end)
443 cond_resched_lock(&kvm->mmu_lock);
Marc Zyngier9d218a12014-01-15 12:50:23 +0000444 } while (pgd++, addr = next, addr != end);
445}
446
447/**
448 * stage2_flush_vm - Invalidate cache for pages mapped in stage 2
449 * @kvm: The struct kvm pointer
450 *
451 * Go through the stage 2 page tables and invalidate any cache lines
452 * backing memory already mapped to the VM.
453 */
Marc Zyngier3c1e7162014-12-19 16:05:31 +0000454static void stage2_flush_vm(struct kvm *kvm)
Marc Zyngier9d218a12014-01-15 12:50:23 +0000455{
456 struct kvm_memslots *slots;
457 struct kvm_memory_slot *memslot;
458 int idx;
459
460 idx = srcu_read_lock(&kvm->srcu);
461 spin_lock(&kvm->mmu_lock);
462
463 slots = kvm_memslots(kvm);
464 kvm_for_each_memslot(memslot, slots)
465 stage2_flush_memslot(kvm, memslot);
466
467 spin_unlock(&kvm->mmu_lock);
468 srcu_read_unlock(&kvm->srcu, idx);
469}
470
Suzuki K Poulose64f32492016-03-22 18:56:21 +0000471static void clear_hyp_pgd_entry(pgd_t *pgd)
472{
Mike Rapoporte9f63762020-06-04 16:46:23 -0700473 p4d_t *p4d_table __maybe_unused = p4d_offset(pgd, 0UL);
Suzuki K Poulose64f32492016-03-22 18:56:21 +0000474 pgd_clear(pgd);
Mike Rapoporte9f63762020-06-04 16:46:23 -0700475 p4d_free(NULL, p4d_table);
Suzuki K Poulose64f32492016-03-22 18:56:21 +0000476 put_page(virt_to_page(pgd));
477}
478
Mike Rapoporte9f63762020-06-04 16:46:23 -0700479static void clear_hyp_p4d_entry(p4d_t *p4d)
480{
481 pud_t *pud_table __maybe_unused = pud_offset(p4d, 0UL);
482 VM_BUG_ON(p4d_huge(*p4d));
483 p4d_clear(p4d);
484 pud_free(NULL, pud_table);
485 put_page(virt_to_page(p4d));
486}
487
Suzuki K Poulose64f32492016-03-22 18:56:21 +0000488static void clear_hyp_pud_entry(pud_t *pud)
489{
490 pmd_t *pmd_table __maybe_unused = pmd_offset(pud, 0);
491 VM_BUG_ON(pud_huge(*pud));
492 pud_clear(pud);
493 pmd_free(NULL, pmd_table);
494 put_page(virt_to_page(pud));
495}
496
497static void clear_hyp_pmd_entry(pmd_t *pmd)
498{
499 pte_t *pte_table = pte_offset_kernel(pmd, 0);
500 VM_BUG_ON(pmd_thp_or_huge(*pmd));
501 pmd_clear(pmd);
502 pte_free_kernel(NULL, pte_table);
503 put_page(virt_to_page(pmd));
504}
505
506static void unmap_hyp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
507{
508 pte_t *pte, *start_pte;
509
510 start_pte = pte = pte_offset_kernel(pmd, addr);
511 do {
512 if (!pte_none(*pte)) {
513 kvm_set_pte(pte, __pte(0));
514 put_page(virt_to_page(pte));
515 }
516 } while (pte++, addr += PAGE_SIZE, addr != end);
517
518 if (hyp_pte_table_empty(start_pte))
519 clear_hyp_pmd_entry(pmd);
520}
521
522static void unmap_hyp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
523{
524 phys_addr_t next;
525 pmd_t *pmd, *start_pmd;
526
527 start_pmd = pmd = pmd_offset(pud, addr);
528 do {
529 next = pmd_addr_end(addr, end);
530 /* Hyp doesn't use huge pmds */
531 if (!pmd_none(*pmd))
532 unmap_hyp_ptes(pmd, addr, next);
533 } while (pmd++, addr = next, addr != end);
534
535 if (hyp_pmd_table_empty(start_pmd))
536 clear_hyp_pud_entry(pud);
537}
538
Mike Rapoporte9f63762020-06-04 16:46:23 -0700539static void unmap_hyp_puds(p4d_t *p4d, phys_addr_t addr, phys_addr_t end)
Suzuki K Poulose64f32492016-03-22 18:56:21 +0000540{
541 phys_addr_t next;
542 pud_t *pud, *start_pud;
543
Mike Rapoporte9f63762020-06-04 16:46:23 -0700544 start_pud = pud = pud_offset(p4d, addr);
Suzuki K Poulose64f32492016-03-22 18:56:21 +0000545 do {
546 next = pud_addr_end(addr, end);
547 /* Hyp doesn't use huge puds */
548 if (!pud_none(*pud))
549 unmap_hyp_pmds(pud, addr, next);
550 } while (pud++, addr = next, addr != end);
551
552 if (hyp_pud_table_empty(start_pud))
Mike Rapoporte9f63762020-06-04 16:46:23 -0700553 clear_hyp_p4d_entry(p4d);
554}
555
556static void unmap_hyp_p4ds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
557{
558 phys_addr_t next;
559 p4d_t *p4d, *start_p4d;
560
561 start_p4d = p4d = p4d_offset(pgd, addr);
562 do {
563 next = p4d_addr_end(addr, end);
564 /* Hyp doesn't use huge p4ds */
565 if (!p4d_none(*p4d))
566 unmap_hyp_puds(p4d, addr, next);
567 } while (p4d++, addr = next, addr != end);
568
569 if (hyp_p4d_table_empty(start_p4d))
Suzuki K Poulose64f32492016-03-22 18:56:21 +0000570 clear_hyp_pgd_entry(pgd);
571}
572
Marc Zyngier3ddd4552018-03-14 15:17:33 +0000573static unsigned int kvm_pgd_index(unsigned long addr, unsigned int ptrs_per_pgd)
574{
575 return (addr >> PGDIR_SHIFT) & (ptrs_per_pgd - 1);
576}
577
578static void __unmap_hyp_range(pgd_t *pgdp, unsigned long ptrs_per_pgd,
579 phys_addr_t start, u64 size)
Suzuki K Poulose64f32492016-03-22 18:56:21 +0000580{
581 pgd_t *pgd;
582 phys_addr_t addr = start, end = start + size;
583 phys_addr_t next;
584
585 /*
586 * We don't unmap anything from HYP, except at the hyp tear down.
587 * Hence, we don't have to invalidate the TLBs here.
588 */
Marc Zyngier3ddd4552018-03-14 15:17:33 +0000589 pgd = pgdp + kvm_pgd_index(addr, ptrs_per_pgd);
Suzuki K Poulose64f32492016-03-22 18:56:21 +0000590 do {
591 next = pgd_addr_end(addr, end);
592 if (!pgd_none(*pgd))
Mike Rapoporte9f63762020-06-04 16:46:23 -0700593 unmap_hyp_p4ds(pgd, addr, next);
Suzuki K Poulose64f32492016-03-22 18:56:21 +0000594 } while (pgd++, addr = next, addr != end);
595}
596
Marc Zyngier3ddd4552018-03-14 15:17:33 +0000597static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
598{
599 __unmap_hyp_range(pgdp, PTRS_PER_PGD, start, size);
600}
601
602static void unmap_hyp_idmap_range(pgd_t *pgdp, phys_addr_t start, u64 size)
603{
604 __unmap_hyp_range(pgdp, __kvm_idmap_ptrs_per_pgd(), start, size);
605}
606
Marc Zyngier000d3992013-03-05 02:43:17 +0000607/**
Marc Zyngier4f728272013-04-12 19:12:05 +0100608 * free_hyp_pgds - free Hyp-mode page tables
Marc Zyngier000d3992013-03-05 02:43:17 +0000609 *
Marc Zyngier5a677ce2013-04-12 19:12:06 +0100610 * Assumes hyp_pgd is a page table used strictly in Hyp-mode and
611 * therefore contains either mappings in the kernel memory area (above
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000612 * PAGE_OFFSET), or device mappings in the idmap range.
Marc Zyngier5a677ce2013-04-12 19:12:06 +0100613 *
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000614 * boot_hyp_pgd should only map the idmap range, and is only used in
615 * the extended idmap case.
Marc Zyngier000d3992013-03-05 02:43:17 +0000616 */
Marc Zyngier4f728272013-04-12 19:12:05 +0100617void free_hyp_pgds(void)
Marc Zyngier000d3992013-03-05 02:43:17 +0000618{
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000619 pgd_t *id_pgd;
620
Marc Zyngierd157f4a2013-04-12 19:12:07 +0100621 mutex_lock(&kvm_hyp_pgd_mutex);
Marc Zyngier5a677ce2013-04-12 19:12:06 +0100622
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000623 id_pgd = boot_hyp_pgd ? boot_hyp_pgd : hyp_pgd;
624
625 if (id_pgd) {
626 /* In case we never called hyp_mmu_init() */
627 if (!io_map_base)
628 io_map_base = hyp_idmap_start;
629 unmap_hyp_idmap_range(id_pgd, io_map_base,
630 hyp_idmap_start + PAGE_SIZE - io_map_base);
631 }
632
Marc Zyngier26781f9c2016-06-30 18:40:46 +0100633 if (boot_hyp_pgd) {
Marc Zyngier26781f9c2016-06-30 18:40:46 +0100634 free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order);
635 boot_hyp_pgd = NULL;
636 }
637
Marc Zyngier4f728272013-04-12 19:12:05 +0100638 if (hyp_pgd) {
Marc Zyngier7839c672017-12-07 11:45:45 +0000639 unmap_hyp_range(hyp_pgd, kern_hyp_va(PAGE_OFFSET),
640 (uintptr_t)high_memory - PAGE_OFFSET);
Marc Zyngierd4cb9df52013-05-14 12:11:34 +0100641
Christoffer Dall38f791a2014-10-10 12:14:28 +0200642 free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
Marc Zyngierd157f4a2013-04-12 19:12:07 +0100643 hyp_pgd = NULL;
Marc Zyngier4f728272013-04-12 19:12:05 +0100644 }
Ard Biesheuvele4c5a682015-03-19 16:42:28 +0000645 if (merged_hyp_pgd) {
646 clear_page(merged_hyp_pgd);
647 free_page((unsigned long)merged_hyp_pgd);
648 merged_hyp_pgd = NULL;
649 }
Marc Zyngier4f728272013-04-12 19:12:05 +0100650
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500651 mutex_unlock(&kvm_hyp_pgd_mutex);
652}
653
654static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
Marc Zyngier6060df82013-04-12 19:12:01 +0100655 unsigned long end, unsigned long pfn,
656 pgprot_t prot)
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500657{
658 pte_t *pte;
659 unsigned long addr;
660
Marc Zyngier3562c762013-04-12 19:12:02 +0100661 addr = start;
662 do {
Marc Zyngier6060df82013-04-12 19:12:01 +0100663 pte = pte_offset_kernel(pmd, addr);
Punit Agrawalf8df7332018-12-11 17:10:36 +0000664 kvm_set_pte(pte, kvm_pfn_pte(pfn, prot));
Marc Zyngier4f728272013-04-12 19:12:05 +0100665 get_page(virt_to_page(pte));
Marc Zyngier6060df82013-04-12 19:12:01 +0100666 pfn++;
Marc Zyngier3562c762013-04-12 19:12:02 +0100667 } while (addr += PAGE_SIZE, addr != end);
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500668}
669
670static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
Marc Zyngier6060df82013-04-12 19:12:01 +0100671 unsigned long end, unsigned long pfn,
672 pgprot_t prot)
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500673{
674 pmd_t *pmd;
675 pte_t *pte;
676 unsigned long addr, next;
677
Marc Zyngier3562c762013-04-12 19:12:02 +0100678 addr = start;
679 do {
Marc Zyngier6060df82013-04-12 19:12:01 +0100680 pmd = pmd_offset(pud, addr);
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500681
682 BUG_ON(pmd_sect(*pmd));
683
684 if (pmd_none(*pmd)) {
Joel Fernandes (Google)4cf58922019-01-03 15:28:34 -0800685 pte = pte_alloc_one_kernel(NULL);
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500686 if (!pte) {
687 kvm_err("Cannot allocate Hyp pte\n");
688 return -ENOMEM;
689 }
Marc Zyngier0db9dd82018-06-27 15:51:05 +0100690 kvm_pmd_populate(pmd, pte);
Marc Zyngier4f728272013-04-12 19:12:05 +0100691 get_page(virt_to_page(pmd));
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500692 }
693
694 next = pmd_addr_end(addr, end);
695
Marc Zyngier6060df82013-04-12 19:12:01 +0100696 create_hyp_pte_mappings(pmd, addr, next, pfn, prot);
697 pfn += (next - addr) >> PAGE_SHIFT;
Marc Zyngier3562c762013-04-12 19:12:02 +0100698 } while (addr = next, addr != end);
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500699
700 return 0;
701}
702
Mike Rapoporte9f63762020-06-04 16:46:23 -0700703static int create_hyp_pud_mappings(p4d_t *p4d, unsigned long start,
Christoffer Dall38f791a2014-10-10 12:14:28 +0200704 unsigned long end, unsigned long pfn,
705 pgprot_t prot)
706{
707 pud_t *pud;
708 pmd_t *pmd;
709 unsigned long addr, next;
710 int ret;
711
712 addr = start;
713 do {
Mike Rapoporte9f63762020-06-04 16:46:23 -0700714 pud = pud_offset(p4d, addr);
Christoffer Dall38f791a2014-10-10 12:14:28 +0200715
716 if (pud_none_or_clear_bad(pud)) {
717 pmd = pmd_alloc_one(NULL, addr);
718 if (!pmd) {
719 kvm_err("Cannot allocate Hyp pmd\n");
720 return -ENOMEM;
721 }
Marc Zyngier0db9dd82018-06-27 15:51:05 +0100722 kvm_pud_populate(pud, pmd);
Christoffer Dall38f791a2014-10-10 12:14:28 +0200723 get_page(virt_to_page(pud));
Christoffer Dall38f791a2014-10-10 12:14:28 +0200724 }
725
726 next = pud_addr_end(addr, end);
727 ret = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
728 if (ret)
729 return ret;
730 pfn += (next - addr) >> PAGE_SHIFT;
731 } while (addr = next, addr != end);
732
733 return 0;
734}
735
Mike Rapoporte9f63762020-06-04 16:46:23 -0700736static int create_hyp_p4d_mappings(pgd_t *pgd, unsigned long start,
737 unsigned long end, unsigned long pfn,
738 pgprot_t prot)
739{
740 p4d_t *p4d;
741 pud_t *pud;
742 unsigned long addr, next;
743 int ret;
744
745 addr = start;
746 do {
747 p4d = p4d_offset(pgd, addr);
748
749 if (p4d_none(*p4d)) {
750 pud = pud_alloc_one(NULL, addr);
751 if (!pud) {
752 kvm_err("Cannot allocate Hyp pud\n");
753 return -ENOMEM;
754 }
755 kvm_p4d_populate(p4d, pud);
756 get_page(virt_to_page(p4d));
757 }
758
759 next = p4d_addr_end(addr, end);
760 ret = create_hyp_pud_mappings(p4d, addr, next, pfn, prot);
761 if (ret)
762 return ret;
763 pfn += (next - addr) >> PAGE_SHIFT;
764 } while (addr = next, addr != end);
765
766 return 0;
767}
768
Kristina Martsenko98732d12018-01-15 15:23:49 +0000769static int __create_hyp_mappings(pgd_t *pgdp, unsigned long ptrs_per_pgd,
Marc Zyngier6060df82013-04-12 19:12:01 +0100770 unsigned long start, unsigned long end,
771 unsigned long pfn, pgprot_t prot)
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500772{
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500773 pgd_t *pgd;
Mike Rapoporte9f63762020-06-04 16:46:23 -0700774 p4d_t *p4d;
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500775 unsigned long addr, next;
776 int err = 0;
777
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500778 mutex_lock(&kvm_hyp_pgd_mutex);
Marc Zyngier3562c762013-04-12 19:12:02 +0100779 addr = start & PAGE_MASK;
780 end = PAGE_ALIGN(end);
781 do {
Marc Zyngier3ddd4552018-03-14 15:17:33 +0000782 pgd = pgdp + kvm_pgd_index(addr, ptrs_per_pgd);
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500783
Christoffer Dall38f791a2014-10-10 12:14:28 +0200784 if (pgd_none(*pgd)) {
Mike Rapoporte9f63762020-06-04 16:46:23 -0700785 p4d = p4d_alloc_one(NULL, addr);
786 if (!p4d) {
787 kvm_err("Cannot allocate Hyp p4d\n");
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500788 err = -ENOMEM;
789 goto out;
790 }
Mike Rapoporte9f63762020-06-04 16:46:23 -0700791 kvm_pgd_populate(pgd, p4d);
Christoffer Dall38f791a2014-10-10 12:14:28 +0200792 get_page(virt_to_page(pgd));
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500793 }
794
795 next = pgd_addr_end(addr, end);
Mike Rapoporte9f63762020-06-04 16:46:23 -0700796 err = create_hyp_p4d_mappings(pgd, addr, next, pfn, prot);
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500797 if (err)
798 goto out;
Marc Zyngier6060df82013-04-12 19:12:01 +0100799 pfn += (next - addr) >> PAGE_SHIFT;
Marc Zyngier3562c762013-04-12 19:12:02 +0100800 } while (addr = next, addr != end);
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500801out:
802 mutex_unlock(&kvm_hyp_pgd_mutex);
803 return err;
804}
805
Christoffer Dall40c27292013-11-15 13:14:12 -0800806static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
807{
808 if (!is_vmalloc_addr(kaddr)) {
809 BUG_ON(!virt_addr_valid(kaddr));
810 return __pa(kaddr);
811 } else {
812 return page_to_phys(vmalloc_to_page(kaddr)) +
813 offset_in_page(kaddr);
814 }
815}
816
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500817/**
Marc Zyngier06e8c3b2012-10-28 01:09:14 +0100818 * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500819 * @from: The virtual kernel start address of the range
820 * @to: The virtual kernel end address of the range (exclusive)
Marc Zyngierc8dddec2016-06-13 15:00:45 +0100821 * @prot: The protection to be applied to this range
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500822 *
Marc Zyngier06e8c3b2012-10-28 01:09:14 +0100823 * The same virtual address as the kernel virtual address is also used
824 * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying
825 * physical pages.
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500826 */
Marc Zyngierc8dddec2016-06-13 15:00:45 +0100827int create_hyp_mappings(void *from, void *to, pgprot_t prot)
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500828{
Christoffer Dall40c27292013-11-15 13:14:12 -0800829 phys_addr_t phys_addr;
830 unsigned long virt_addr;
Marc Zyngier6c41a412016-06-30 18:40:51 +0100831 unsigned long start = kern_hyp_va((unsigned long)from);
832 unsigned long end = kern_hyp_va((unsigned long)to);
Marc Zyngier6060df82013-04-12 19:12:01 +0100833
Marc Zyngier1e947ba2015-01-29 11:59:54 +0000834 if (is_kernel_in_hyp_mode())
835 return 0;
836
Christoffer Dall40c27292013-11-15 13:14:12 -0800837 start = start & PAGE_MASK;
838 end = PAGE_ALIGN(end);
Marc Zyngier6060df82013-04-12 19:12:01 +0100839
Christoffer Dall40c27292013-11-15 13:14:12 -0800840 for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) {
841 int err;
842
843 phys_addr = kvm_kaddr_to_phys(from + virt_addr - start);
Kristina Martsenko98732d12018-01-15 15:23:49 +0000844 err = __create_hyp_mappings(hyp_pgd, PTRS_PER_PGD,
845 virt_addr, virt_addr + PAGE_SIZE,
Christoffer Dall40c27292013-11-15 13:14:12 -0800846 __phys_to_pfn(phys_addr),
Marc Zyngierc8dddec2016-06-13 15:00:45 +0100847 prot);
Christoffer Dall40c27292013-11-15 13:14:12 -0800848 if (err)
849 return err;
850 }
851
852 return 0;
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500853}
854
Marc Zyngierdc2e4632018-02-13 11:00:29 +0000855static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
856 unsigned long *haddr, pgprot_t prot)
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500857{
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000858 pgd_t *pgd = hyp_pgd;
859 unsigned long base;
860 int ret = 0;
Marc Zyngier6060df82013-04-12 19:12:01 +0100861
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000862 mutex_lock(&kvm_hyp_pgd_mutex);
Marc Zyngier6060df82013-04-12 19:12:01 +0100863
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000864 /*
Fuad Tabba656012c2020-04-01 15:03:10 +0100865 * This assumes that we have enough space below the idmap
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000866 * page to allocate our VAs. If not, the check below will
867 * kick. A potential alternative would be to detect that
868 * overflow and switch to an allocation above the idmap.
869 *
870 * The allocated size is always a multiple of PAGE_SIZE.
871 */
872 size = PAGE_ALIGN(size + offset_in_page(phys_addr));
873 base = io_map_base - size;
Marc Zyngier1bb32a42017-12-04 16:43:23 +0000874
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000875 /*
876 * Verify that BIT(VA_BITS - 1) hasn't been flipped by
877 * allocating the new area, as it would indicate we've
878 * overflowed the idmap/IO address range.
879 */
880 if ((base ^ io_map_base) & BIT(VA_BITS - 1))
881 ret = -ENOMEM;
882 else
883 io_map_base = base;
884
885 mutex_unlock(&kvm_hyp_pgd_mutex);
886
887 if (ret)
888 goto out;
889
890 if (__kvm_cpu_uses_extended_idmap())
891 pgd = boot_hyp_pgd;
892
893 ret = __create_hyp_mappings(pgd, __kvm_idmap_ptrs_per_pgd(),
894 base, base + size,
Marc Zyngierdc2e4632018-02-13 11:00:29 +0000895 __phys_to_pfn(phys_addr), prot);
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000896 if (ret)
897 goto out;
898
Marc Zyngierdc2e4632018-02-13 11:00:29 +0000899 *haddr = base + offset_in_page(phys_addr);
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000900
901out:
Marc Zyngierdc2e4632018-02-13 11:00:29 +0000902 return ret;
903}
904
905/**
906 * create_hyp_io_mappings - Map IO into both kernel and HYP
907 * @phys_addr: The physical start address which gets mapped
908 * @size: Size of the region being mapped
909 * @kaddr: Kernel VA for this mapping
910 * @haddr: HYP VA for this mapping
911 */
912int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
913 void __iomem **kaddr,
914 void __iomem **haddr)
915{
916 unsigned long addr;
917 int ret;
918
919 *kaddr = ioremap(phys_addr, size);
920 if (!*kaddr)
921 return -ENOMEM;
922
923 if (is_kernel_in_hyp_mode()) {
924 *haddr = *kaddr;
925 return 0;
926 }
927
928 ret = __create_hyp_private_mapping(phys_addr, size,
929 &addr, PAGE_HYP_DEVICE);
Marc Zyngier1bb32a42017-12-04 16:43:23 +0000930 if (ret) {
931 iounmap(*kaddr);
932 *kaddr = NULL;
Marc Zyngierdc2e4632018-02-13 11:00:29 +0000933 *haddr = NULL;
Marc Zyngier1bb32a42017-12-04 16:43:23 +0000934 return ret;
935 }
936
Marc Zyngierdc2e4632018-02-13 11:00:29 +0000937 *haddr = (void __iomem *)addr;
938 return 0;
939}
940
941/**
942 * create_hyp_exec_mappings - Map an executable range into HYP
943 * @phys_addr: The physical start address which gets mapped
944 * @size: Size of the region being mapped
945 * @haddr: HYP VA for this mapping
946 */
947int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
948 void **haddr)
949{
950 unsigned long addr;
951 int ret;
952
953 BUG_ON(is_kernel_in_hyp_mode());
954
955 ret = __create_hyp_private_mapping(phys_addr, size,
956 &addr, PAGE_HYP_EXEC);
957 if (ret) {
958 *haddr = NULL;
959 return ret;
960 }
961
962 *haddr = (void *)addr;
Marc Zyngier1bb32a42017-12-04 16:43:23 +0000963 return 0;
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500964}
965
Christoffer Dalld5d81842013-01-20 18:28:07 -0500966/**
967 * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
968 * @kvm: The KVM struct pointer for the VM.
969 *
Zenghui Yu8324c3d2019-03-25 08:02:05 +0000970 * Allocates only the stage-2 HW PGD level table(s) of size defined by
971 * stage2_pgd_size(kvm).
Christoffer Dalld5d81842013-01-20 18:28:07 -0500972 *
973 * Note we don't need locking here as this is only called when the VM is
974 * created, which can only be done once.
975 */
976int kvm_alloc_stage2_pgd(struct kvm *kvm)
977{
Christoffer Dalle329fb72018-12-11 15:26:31 +0100978 phys_addr_t pgd_phys;
Christoffer Dalld5d81842013-01-20 18:28:07 -0500979 pgd_t *pgd;
980
981 if (kvm->arch.pgd != NULL) {
982 kvm_err("kvm_arch already initialized?\n");
983 return -EINVAL;
984 }
985
Suzuki K Poulose9163ee232016-03-22 17:01:21 +0000986 /* Allocate the HW PGD, making sure that each page gets its own refcount */
Suzuki K Poulosee55cac52018-09-26 17:32:44 +0100987 pgd = alloc_pages_exact(stage2_pgd_size(kvm), GFP_KERNEL | __GFP_ZERO);
Suzuki K Poulose9163ee232016-03-22 17:01:21 +0000988 if (!pgd)
Marc Zyngiera9873702015-03-10 19:06:59 +0000989 return -ENOMEM;
990
Christoffer Dalle329fb72018-12-11 15:26:31 +0100991 pgd_phys = virt_to_phys(pgd);
992 if (WARN_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm)))
993 return -EINVAL;
994
Christoffer Dalld5d81842013-01-20 18:28:07 -0500995 kvm->arch.pgd = pgd;
Christoffer Dalle329fb72018-12-11 15:26:31 +0100996 kvm->arch.pgd_phys = pgd_phys;
Christoffer Dalld5d81842013-01-20 18:28:07 -0500997 return 0;
998}
999
Christoffer Dall957db102014-11-27 10:35:03 +01001000static void stage2_unmap_memslot(struct kvm *kvm,
1001 struct kvm_memory_slot *memslot)
1002{
1003 hva_t hva = memslot->userspace_addr;
1004 phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
1005 phys_addr_t size = PAGE_SIZE * memslot->npages;
1006 hva_t reg_end = hva + size;
1007
1008 /*
1009 * A memory region could potentially cover multiple VMAs, and any holes
1010 * between them, so iterate over all of them to find out if we should
1011 * unmap any of them.
1012 *
1013 * +--------------------------------------------+
1014 * +---------------+----------------+ +----------------+
1015 * | : VMA 1 | VMA 2 | | VMA 3 : |
1016 * +---------------+----------------+ +----------------+
1017 * | memory region |
1018 * +--------------------------------------------+
1019 */
1020 do {
1021 struct vm_area_struct *vma = find_vma(current->mm, hva);
1022 hva_t vm_start, vm_end;
1023
1024 if (!vma || vma->vm_start >= reg_end)
1025 break;
1026
1027 /*
1028 * Take the intersection of this VMA with the memory region
1029 */
1030 vm_start = max(hva, vma->vm_start);
1031 vm_end = min(reg_end, vma->vm_end);
1032
1033 if (!(vma->vm_flags & VM_PFNMAP)) {
1034 gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
1035 unmap_stage2_range(kvm, gpa, vm_end - vm_start);
1036 }
1037 hva = vm_end;
1038 } while (hva < reg_end);
1039}
1040
1041/**
1042 * stage2_unmap_vm - Unmap Stage-2 RAM mappings
1043 * @kvm: The struct kvm pointer
1044 *
Fuad Tabba656012c2020-04-01 15:03:10 +01001045 * Go through the memregions and unmap any regular RAM
Christoffer Dall957db102014-11-27 10:35:03 +01001046 * backing memory already mapped to the VM.
1047 */
1048void stage2_unmap_vm(struct kvm *kvm)
1049{
1050 struct kvm_memslots *slots;
1051 struct kvm_memory_slot *memslot;
1052 int idx;
1053
1054 idx = srcu_read_lock(&kvm->srcu);
Michel Lespinasse89154dd2020-06-08 21:33:29 -07001055 mmap_read_lock(current->mm);
Christoffer Dall957db102014-11-27 10:35:03 +01001056 spin_lock(&kvm->mmu_lock);
1057
1058 slots = kvm_memslots(kvm);
1059 kvm_for_each_memslot(memslot, slots)
1060 stage2_unmap_memslot(kvm, memslot);
1061
1062 spin_unlock(&kvm->mmu_lock);
Michel Lespinasse89154dd2020-06-08 21:33:29 -07001063 mmap_read_unlock(current->mm);
Christoffer Dall957db102014-11-27 10:35:03 +01001064 srcu_read_unlock(&kvm->srcu, idx);
1065}
1066
Christoffer Dalld5d81842013-01-20 18:28:07 -05001067/**
1068 * kvm_free_stage2_pgd - free all stage-2 tables
1069 * @kvm: The KVM struct pointer for the VM.
1070 *
1071 * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all
1072 * underlying level-2 and level-3 tables before freeing the actual level-1 table
1073 * and setting the struct pointer to NULL.
Christoffer Dalld5d81842013-01-20 18:28:07 -05001074 */
1075void kvm_free_stage2_pgd(struct kvm *kvm)
1076{
Suzuki K Poulose6c0d7062017-05-03 15:17:51 +01001077 void *pgd = NULL;
Christoffer Dalld5d81842013-01-20 18:28:07 -05001078
Suzuki K Poulose8b3405e2017-04-03 15:12:43 +01001079 spin_lock(&kvm->mmu_lock);
Suzuki K Poulose6c0d7062017-05-03 15:17:51 +01001080 if (kvm->arch.pgd) {
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001081 unmap_stage2_range(kvm, 0, kvm_phys_size(kvm));
Suzuki K Poulose2952a602017-05-16 10:34:54 +01001082 pgd = READ_ONCE(kvm->arch.pgd);
Suzuki K Poulose6c0d7062017-05-03 15:17:51 +01001083 kvm->arch.pgd = NULL;
Christoffer Dalle329fb72018-12-11 15:26:31 +01001084 kvm->arch.pgd_phys = 0;
Suzuki K Poulose6c0d7062017-05-03 15:17:51 +01001085 }
Suzuki K Poulose8b3405e2017-04-03 15:12:43 +01001086 spin_unlock(&kvm->mmu_lock);
1087
Suzuki K Poulose9163ee232016-03-22 17:01:21 +00001088 /* Free the HW pgd, one page at a time */
Suzuki K Poulose6c0d7062017-05-03 15:17:51 +01001089 if (pgd)
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001090 free_pages_exact(pgd, stage2_pgd_size(kvm));
Christoffer Dalld5d81842013-01-20 18:28:07 -05001091}
1092
Mike Rapoporte9f63762020-06-04 16:46:23 -07001093static p4d_t *stage2_get_p4d(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
Christoffer Dall38f791a2014-10-10 12:14:28 +02001094 phys_addr_t addr)
1095{
1096 pgd_t *pgd;
Mike Rapoporte9f63762020-06-04 16:46:23 -07001097 p4d_t *p4d;
Christoffer Dall38f791a2014-10-10 12:14:28 +02001098
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001099 pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr);
1100 if (stage2_pgd_none(kvm, *pgd)) {
Christoffer Dall38f791a2014-10-10 12:14:28 +02001101 if (!cache)
1102 return NULL;
Sean Christophersonc1a33ae2020-07-02 19:35:42 -07001103 p4d = kvm_mmu_memory_cache_alloc(cache);
Mike Rapoporte9f63762020-06-04 16:46:23 -07001104 stage2_pgd_populate(kvm, pgd, p4d);
Christoffer Dall38f791a2014-10-10 12:14:28 +02001105 get_page(virt_to_page(pgd));
1106 }
1107
Mike Rapoporte9f63762020-06-04 16:46:23 -07001108 return stage2_p4d_offset(kvm, pgd, addr);
1109}
1110
1111static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
1112 phys_addr_t addr)
1113{
1114 p4d_t *p4d;
1115 pud_t *pud;
1116
1117 p4d = stage2_get_p4d(kvm, cache, addr);
1118 if (stage2_p4d_none(kvm, *p4d)) {
1119 if (!cache)
1120 return NULL;
Sean Christophersonc1a33ae2020-07-02 19:35:42 -07001121 pud = kvm_mmu_memory_cache_alloc(cache);
Mike Rapoporte9f63762020-06-04 16:46:23 -07001122 stage2_p4d_populate(kvm, p4d, pud);
1123 get_page(virt_to_page(p4d));
1124 }
1125
1126 return stage2_pud_offset(kvm, p4d, addr);
Christoffer Dall38f791a2014-10-10 12:14:28 +02001127}
1128
Christoffer Dallad361f02012-11-01 17:14:45 +01001129static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
1130 phys_addr_t addr)
Christoffer Dalld5d81842013-01-20 18:28:07 -05001131{
Christoffer Dalld5d81842013-01-20 18:28:07 -05001132 pud_t *pud;
1133 pmd_t *pmd;
Christoffer Dalld5d81842013-01-20 18:28:07 -05001134
Christoffer Dall38f791a2014-10-10 12:14:28 +02001135 pud = stage2_get_pud(kvm, cache, addr);
Punit Agrawalb8e0ba72018-12-11 17:10:41 +00001136 if (!pud || stage2_pud_huge(kvm, *pud))
Marc Zyngierd6dbdd32017-06-05 19:17:18 +01001137 return NULL;
1138
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001139 if (stage2_pud_none(kvm, *pud)) {
Christoffer Dalld5d81842013-01-20 18:28:07 -05001140 if (!cache)
Christoffer Dallad361f02012-11-01 17:14:45 +01001141 return NULL;
Sean Christophersonc1a33ae2020-07-02 19:35:42 -07001142 pmd = kvm_mmu_memory_cache_alloc(cache);
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001143 stage2_pud_populate(kvm, pud, pmd);
Christoffer Dalld5d81842013-01-20 18:28:07 -05001144 get_page(virt_to_page(pud));
Marc Zyngierc62ee2b2012-10-15 11:27:37 +01001145 }
1146
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001147 return stage2_pmd_offset(kvm, pud, addr);
Christoffer Dallad361f02012-11-01 17:14:45 +01001148}
Christoffer Dalld5d81842013-01-20 18:28:07 -05001149
Christoffer Dallad361f02012-11-01 17:14:45 +01001150static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
1151 *cache, phys_addr_t addr, const pmd_t *new_pmd)
1152{
1153 pmd_t *pmd, old_pmd;
1154
Suzuki K Poulose3c3736c2019-03-20 14:57:19 +00001155retry:
Christoffer Dallad361f02012-11-01 17:14:45 +01001156 pmd = stage2_get_pmd(kvm, cache, addr);
1157 VM_BUG_ON(!pmd);
1158
Christoffer Dallad361f02012-11-01 17:14:45 +01001159 old_pmd = *pmd;
Suzuki K Poulose3c3736c2019-03-20 14:57:19 +00001160 /*
1161 * Multiple vcpus faulting on the same PMD entry, can
1162 * lead to them sequentially updating the PMD with the
1163 * same value. Following the break-before-make
1164 * (pmd_clear() followed by tlb_flush()) process can
1165 * hinder forward progress due to refaults generated
1166 * on missing translations.
1167 *
1168 * Skip updating the page table if the entry is
1169 * unchanged.
1170 */
1171 if (pmd_val(old_pmd) == pmd_val(*new_pmd))
1172 return 0;
1173
Marc Zyngierd4b9e072016-04-28 16:16:31 +01001174 if (pmd_present(old_pmd)) {
Punit Agrawal86658b82018-08-13 11:43:50 +01001175 /*
Suzuki K Poulose3c3736c2019-03-20 14:57:19 +00001176 * If we already have PTE level mapping for this block,
1177 * we must unmap it to avoid inconsistent TLB state and
1178 * leaking the table page. We could end up in this situation
1179 * if the memory slot was marked for dirty logging and was
1180 * reverted, leaving PTE level mappings for the pages accessed
1181 * during the period. So, unmap the PTE level mapping for this
1182 * block and retry, as we could have released the upper level
1183 * table in the process.
Punit Agrawal86658b82018-08-13 11:43:50 +01001184 *
Suzuki K Poulose3c3736c2019-03-20 14:57:19 +00001185 * Normal THP split/merge follows mmu_notifier callbacks and do
1186 * get handled accordingly.
Punit Agrawal86658b82018-08-13 11:43:50 +01001187 */
Suzuki K Poulose3c3736c2019-03-20 14:57:19 +00001188 if (!pmd_thp_or_huge(old_pmd)) {
1189 unmap_stage2_range(kvm, addr & S2_PMD_MASK, S2_PMD_SIZE);
1190 goto retry;
1191 }
Punit Agrawal86658b82018-08-13 11:43:50 +01001192 /*
1193 * Mapping in huge pages should only happen through a
1194 * fault. If a page is merged into a transparent huge
1195 * page, the individual subpages of that huge page
1196 * should be unmapped through MMU notifiers before we
1197 * get here.
1198 *
1199 * Merging of CompoundPages is not supported; they
1200 * should become splitting first, unmapped, merged,
1201 * and mapped back in on-demand.
1202 */
Suzuki K Poulose3c3736c2019-03-20 14:57:19 +00001203 WARN_ON_ONCE(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd));
Marc Zyngierd4b9e072016-04-28 16:16:31 +01001204 pmd_clear(pmd);
Christoffer Dallad361f02012-11-01 17:14:45 +01001205 kvm_tlb_flush_vmid_ipa(kvm, addr);
Marc Zyngierd4b9e072016-04-28 16:16:31 +01001206 } else {
Christoffer Dallad361f02012-11-01 17:14:45 +01001207 get_page(virt_to_page(pmd));
Marc Zyngierd4b9e072016-04-28 16:16:31 +01001208 }
1209
1210 kvm_set_pmd(pmd, *new_pmd);
Christoffer Dallad361f02012-11-01 17:14:45 +01001211 return 0;
1212}
1213
Punit Agrawalb8e0ba72018-12-11 17:10:41 +00001214static int stage2_set_pud_huge(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
1215 phys_addr_t addr, const pud_t *new_pudp)
1216{
1217 pud_t *pudp, old_pud;
1218
Suzuki K Poulose3c3736c2019-03-20 14:57:19 +00001219retry:
Punit Agrawalb8e0ba72018-12-11 17:10:41 +00001220 pudp = stage2_get_pud(kvm, cache, addr);
1221 VM_BUG_ON(!pudp);
1222
1223 old_pud = *pudp;
1224
1225 /*
1226 * A large number of vcpus faulting on the same stage 2 entry,
Suzuki K Poulose3c3736c2019-03-20 14:57:19 +00001227 * can lead to a refault due to the stage2_pud_clear()/tlb_flush().
1228 * Skip updating the page tables if there is no change.
Punit Agrawalb8e0ba72018-12-11 17:10:41 +00001229 */
1230 if (pud_val(old_pud) == pud_val(*new_pudp))
1231 return 0;
1232
1233 if (stage2_pud_present(kvm, old_pud)) {
Suzuki K Poulose3c3736c2019-03-20 14:57:19 +00001234 /*
1235 * If we already have table level mapping for this block, unmap
1236 * the range for this block and retry.
1237 */
1238 if (!stage2_pud_huge(kvm, old_pud)) {
1239 unmap_stage2_range(kvm, addr & S2_PUD_MASK, S2_PUD_SIZE);
1240 goto retry;
1241 }
1242
1243 WARN_ON_ONCE(kvm_pud_pfn(old_pud) != kvm_pud_pfn(*new_pudp));
Punit Agrawalb8e0ba72018-12-11 17:10:41 +00001244 stage2_pud_clear(kvm, pudp);
1245 kvm_tlb_flush_vmid_ipa(kvm, addr);
1246 } else {
1247 get_page(virt_to_page(pudp));
1248 }
1249
1250 kvm_set_pud(pudp, *new_pudp);
1251 return 0;
1252}
1253
Punit Agrawal86d1c552018-12-11 17:10:38 +00001254/*
1255 * stage2_get_leaf_entry - walk the stage2 VM page tables and return
1256 * true if a valid and present leaf-entry is found. A pointer to the
1257 * leaf-entry is returned in the appropriate level variable - pudpp,
1258 * pmdpp, ptepp.
1259 */
1260static bool stage2_get_leaf_entry(struct kvm *kvm, phys_addr_t addr,
1261 pud_t **pudpp, pmd_t **pmdpp, pte_t **ptepp)
Marc Zyngier7a3796d2017-10-23 17:11:21 +01001262{
Punit Agrawal86d1c552018-12-11 17:10:38 +00001263 pud_t *pudp;
Marc Zyngier7a3796d2017-10-23 17:11:21 +01001264 pmd_t *pmdp;
1265 pte_t *ptep;
1266
Punit Agrawal86d1c552018-12-11 17:10:38 +00001267 *pudpp = NULL;
1268 *pmdpp = NULL;
1269 *ptepp = NULL;
1270
1271 pudp = stage2_get_pud(kvm, NULL, addr);
1272 if (!pudp || stage2_pud_none(kvm, *pudp) || !stage2_pud_present(kvm, *pudp))
1273 return false;
1274
1275 if (stage2_pud_huge(kvm, *pudp)) {
1276 *pudpp = pudp;
1277 return true;
1278 }
1279
1280 pmdp = stage2_pmd_offset(kvm, pudp, addr);
Marc Zyngier7a3796d2017-10-23 17:11:21 +01001281 if (!pmdp || pmd_none(*pmdp) || !pmd_present(*pmdp))
1282 return false;
1283
Punit Agrawal86d1c552018-12-11 17:10:38 +00001284 if (pmd_thp_or_huge(*pmdp)) {
1285 *pmdpp = pmdp;
1286 return true;
1287 }
Marc Zyngier7a3796d2017-10-23 17:11:21 +01001288
1289 ptep = pte_offset_kernel(pmdp, addr);
1290 if (!ptep || pte_none(*ptep) || !pte_present(*ptep))
1291 return false;
1292
Punit Agrawal86d1c552018-12-11 17:10:38 +00001293 *ptepp = ptep;
1294 return true;
1295}
1296
Will Deaconb757b472020-07-23 11:17:14 +01001297static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr, unsigned long sz)
Punit Agrawal86d1c552018-12-11 17:10:38 +00001298{
1299 pud_t *pudp;
1300 pmd_t *pmdp;
1301 pte_t *ptep;
1302 bool found;
1303
1304 found = stage2_get_leaf_entry(kvm, addr, &pudp, &pmdp, &ptep);
1305 if (!found)
1306 return false;
1307
1308 if (pudp)
Will Deaconb757b472020-07-23 11:17:14 +01001309 return sz <= PUD_SIZE && kvm_s2pud_exec(pudp);
Punit Agrawal86d1c552018-12-11 17:10:38 +00001310 else if (pmdp)
Will Deaconb757b472020-07-23 11:17:14 +01001311 return sz <= PMD_SIZE && kvm_s2pmd_exec(pmdp);
Punit Agrawal86d1c552018-12-11 17:10:38 +00001312 else
Will Deaconb757b472020-07-23 11:17:14 +01001313 return sz == PAGE_SIZE && kvm_s2pte_exec(ptep);
Marc Zyngier7a3796d2017-10-23 17:11:21 +01001314}
1315
Christoffer Dallad361f02012-11-01 17:14:45 +01001316static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
Mario Smarduch15a49a42015-01-15 15:58:58 -08001317 phys_addr_t addr, const pte_t *new_pte,
1318 unsigned long flags)
Christoffer Dallad361f02012-11-01 17:14:45 +01001319{
Punit Agrawalb8e0ba72018-12-11 17:10:41 +00001320 pud_t *pud;
Christoffer Dallad361f02012-11-01 17:14:45 +01001321 pmd_t *pmd;
1322 pte_t *pte, old_pte;
Mario Smarduch15a49a42015-01-15 15:58:58 -08001323 bool iomap = flags & KVM_S2PTE_FLAG_IS_IOMAP;
1324 bool logging_active = flags & KVM_S2_FLAG_LOGGING_ACTIVE;
1325
1326 VM_BUG_ON(logging_active && !cache);
Christoffer Dallad361f02012-11-01 17:14:45 +01001327
Christoffer Dall38f791a2014-10-10 12:14:28 +02001328 /* Create stage-2 page table mapping - Levels 0 and 1 */
Punit Agrawalb8e0ba72018-12-11 17:10:41 +00001329 pud = stage2_get_pud(kvm, cache, addr);
1330 if (!pud) {
1331 /*
1332 * Ignore calls from kvm_set_spte_hva for unallocated
1333 * address ranges.
1334 */
1335 return 0;
1336 }
1337
1338 /*
1339 * While dirty page logging - dissolve huge PUD, then continue
1340 * on to allocate page.
1341 */
1342 if (logging_active)
1343 stage2_dissolve_pud(kvm, addr, pud);
1344
1345 if (stage2_pud_none(kvm, *pud)) {
1346 if (!cache)
1347 return 0; /* ignore calls from kvm_set_spte_hva */
Sean Christophersonc1a33ae2020-07-02 19:35:42 -07001348 pmd = kvm_mmu_memory_cache_alloc(cache);
Punit Agrawalb8e0ba72018-12-11 17:10:41 +00001349 stage2_pud_populate(kvm, pud, pmd);
1350 get_page(virt_to_page(pud));
1351 }
1352
1353 pmd = stage2_pmd_offset(kvm, pud, addr);
Christoffer Dallad361f02012-11-01 17:14:45 +01001354 if (!pmd) {
1355 /*
1356 * Ignore calls from kvm_set_spte_hva for unallocated
1357 * address ranges.
1358 */
1359 return 0;
1360 }
1361
Mario Smarduch15a49a42015-01-15 15:58:58 -08001362 /*
1363 * While dirty page logging - dissolve huge PMD, then continue on to
1364 * allocate page.
1365 */
1366 if (logging_active)
1367 stage2_dissolve_pmd(kvm, addr, pmd);
1368
Christoffer Dallad361f02012-11-01 17:14:45 +01001369 /* Create stage-2 page mappings - Level 2 */
Christoffer Dalld5d81842013-01-20 18:28:07 -05001370 if (pmd_none(*pmd)) {
1371 if (!cache)
1372 return 0; /* ignore calls from kvm_set_spte_hva */
Sean Christophersonc1a33ae2020-07-02 19:35:42 -07001373 pte = kvm_mmu_memory_cache_alloc(cache);
Marc Zyngier0db9dd82018-06-27 15:51:05 +01001374 kvm_pmd_populate(pmd, pte);
Christoffer Dalld5d81842013-01-20 18:28:07 -05001375 get_page(virt_to_page(pmd));
Marc Zyngierc62ee2b2012-10-15 11:27:37 +01001376 }
1377
1378 pte = pte_offset_kernel(pmd, addr);
Christoffer Dalld5d81842013-01-20 18:28:07 -05001379
1380 if (iomap && pte_present(*pte))
1381 return -EFAULT;
1382
1383 /* Create 2nd stage page table mapping - Level 3 */
1384 old_pte = *pte;
Marc Zyngierd4b9e072016-04-28 16:16:31 +01001385 if (pte_present(old_pte)) {
Punit Agrawal976d34e2018-08-13 11:43:51 +01001386 /* Skip page table update if there is no change */
1387 if (pte_val(old_pte) == pte_val(*new_pte))
1388 return 0;
1389
Marc Zyngierd4b9e072016-04-28 16:16:31 +01001390 kvm_set_pte(pte, __pte(0));
Marc Zyngier48762762013-01-28 15:27:00 +00001391 kvm_tlb_flush_vmid_ipa(kvm, addr);
Marc Zyngierd4b9e072016-04-28 16:16:31 +01001392 } else {
Christoffer Dalld5d81842013-01-20 18:28:07 -05001393 get_page(virt_to_page(pte));
Marc Zyngierd4b9e072016-04-28 16:16:31 +01001394 }
Christoffer Dalld5d81842013-01-20 18:28:07 -05001395
Marc Zyngierd4b9e072016-04-28 16:16:31 +01001396 kvm_set_pte(pte, *new_pte);
Christoffer Dalld5d81842013-01-20 18:28:07 -05001397 return 0;
1398}
1399
Catalin Marinas06485052016-04-13 17:57:37 +01001400#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
1401static int stage2_ptep_test_and_clear_young(pte_t *pte)
1402{
1403 if (pte_young(*pte)) {
1404 *pte = pte_mkold(*pte);
1405 return 1;
1406 }
1407 return 0;
1408}
1409#else
1410static int stage2_ptep_test_and_clear_young(pte_t *pte)
1411{
1412 return __ptep_test_and_clear_young(pte);
1413}
1414#endif
1415
1416static int stage2_pmdp_test_and_clear_young(pmd_t *pmd)
1417{
1418 return stage2_ptep_test_and_clear_young((pte_t *)pmd);
1419}
1420
Punit Agrawal35a63962018-12-11 17:10:40 +00001421static int stage2_pudp_test_and_clear_young(pud_t *pud)
1422{
1423 return stage2_ptep_test_and_clear_young((pte_t *)pud);
1424}
1425
Christoffer Dalld5d81842013-01-20 18:28:07 -05001426/**
1427 * kvm_phys_addr_ioremap - map a device range to guest IPA
1428 *
1429 * @kvm: The KVM pointer
1430 * @guest_ipa: The IPA at which to insert the mapping
1431 * @pa: The physical address of the device
1432 * @size: The size of the mapping
1433 */
1434int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
Ard Biesheuvelc40f2f82014-09-17 14:56:18 -07001435 phys_addr_t pa, unsigned long size, bool writable)
Christoffer Dalld5d81842013-01-20 18:28:07 -05001436{
1437 phys_addr_t addr, end;
1438 int ret = 0;
1439 unsigned long pfn;
Sean Christophersonc1a33ae2020-07-02 19:35:42 -07001440 struct kvm_mmu_memory_cache cache = { 0, __GFP_ZERO, NULL, };
Christoffer Dalld5d81842013-01-20 18:28:07 -05001441
1442 end = (guest_ipa + size + PAGE_SIZE - 1) & PAGE_MASK;
1443 pfn = __phys_to_pfn(pa);
1444
1445 for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
Punit Agrawalf8df7332018-12-11 17:10:36 +00001446 pte_t pte = kvm_pfn_pte(pfn, PAGE_S2_DEVICE);
Christoffer Dalld5d81842013-01-20 18:28:07 -05001447
Ard Biesheuvelc40f2f82014-09-17 14:56:18 -07001448 if (writable)
Catalin Marinas06485052016-04-13 17:57:37 +01001449 pte = kvm_s2pte_mkwrite(pte);
Ard Biesheuvelc40f2f82014-09-17 14:56:18 -07001450
Sean Christophersonc1a33ae2020-07-02 19:35:42 -07001451 ret = kvm_mmu_topup_memory_cache(&cache,
1452 kvm_mmu_cache_min_pages(kvm));
Christoffer Dalld5d81842013-01-20 18:28:07 -05001453 if (ret)
1454 goto out;
1455 spin_lock(&kvm->mmu_lock);
Mario Smarduch15a49a42015-01-15 15:58:58 -08001456 ret = stage2_set_pte(kvm, &cache, addr, &pte,
1457 KVM_S2PTE_FLAG_IS_IOMAP);
Christoffer Dalld5d81842013-01-20 18:28:07 -05001458 spin_unlock(&kvm->mmu_lock);
1459 if (ret)
1460 goto out;
1461
1462 pfn++;
1463 }
1464
1465out:
Sean Christophersonc1a33ae2020-07-02 19:35:42 -07001466 kvm_mmu_free_memory_cache(&cache);
Christoffer Dalld5d81842013-01-20 18:28:07 -05001467 return ret;
1468}
1469
Mario Smarduchc6473552015-01-15 15:58:56 -08001470/**
1471 * stage2_wp_ptes - write protect PMD range
1472 * @pmd: pointer to pmd entry
1473 * @addr: range start address
1474 * @end: range end address
1475 */
1476static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
1477{
1478 pte_t *pte;
1479
1480 pte = pte_offset_kernel(pmd, addr);
1481 do {
1482 if (!pte_none(*pte)) {
1483 if (!kvm_s2pte_readonly(pte))
1484 kvm_set_s2pte_readonly(pte);
1485 }
1486 } while (pte++, addr += PAGE_SIZE, addr != end);
1487}
1488
1489/**
1490 * stage2_wp_pmds - write protect PUD range
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001491 * kvm: kvm instance for the VM
Mario Smarduchc6473552015-01-15 15:58:56 -08001492 * @pud: pointer to pud entry
1493 * @addr: range start address
1494 * @end: range end address
1495 */
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001496static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud,
1497 phys_addr_t addr, phys_addr_t end)
Mario Smarduchc6473552015-01-15 15:58:56 -08001498{
1499 pmd_t *pmd;
1500 phys_addr_t next;
1501
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001502 pmd = stage2_pmd_offset(kvm, pud, addr);
Mario Smarduchc6473552015-01-15 15:58:56 -08001503
1504 do {
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001505 next = stage2_pmd_addr_end(kvm, addr, end);
Mario Smarduchc6473552015-01-15 15:58:56 -08001506 if (!pmd_none(*pmd)) {
Suzuki K Poulosebbb3b6b2016-03-01 12:00:39 +00001507 if (pmd_thp_or_huge(*pmd)) {
Mario Smarduchc6473552015-01-15 15:58:56 -08001508 if (!kvm_s2pmd_readonly(pmd))
1509 kvm_set_s2pmd_readonly(pmd);
1510 } else {
1511 stage2_wp_ptes(pmd, addr, next);
1512 }
1513 }
1514 } while (pmd++, addr = next, addr != end);
1515}
1516
1517/**
Mike Rapoporte9f63762020-06-04 16:46:23 -07001518 * stage2_wp_puds - write protect P4D range
Zenghui Yu8324c3d2019-03-25 08:02:05 +00001519 * @pgd: pointer to pgd entry
1520 * @addr: range start address
1521 * @end: range end address
1522 */
Mike Rapoporte9f63762020-06-04 16:46:23 -07001523static void stage2_wp_puds(struct kvm *kvm, p4d_t *p4d,
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001524 phys_addr_t addr, phys_addr_t end)
Mario Smarduchc6473552015-01-15 15:58:56 -08001525{
1526 pud_t *pud;
1527 phys_addr_t next;
1528
Mike Rapoporte9f63762020-06-04 16:46:23 -07001529 pud = stage2_pud_offset(kvm, p4d, addr);
Mario Smarduchc6473552015-01-15 15:58:56 -08001530 do {
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001531 next = stage2_pud_addr_end(kvm, addr, end);
1532 if (!stage2_pud_none(kvm, *pud)) {
Punit Agrawal4ea5af52018-12-11 17:10:37 +00001533 if (stage2_pud_huge(kvm, *pud)) {
1534 if (!kvm_s2pud_readonly(pud))
1535 kvm_set_s2pud_readonly(pud);
1536 } else {
1537 stage2_wp_pmds(kvm, pud, addr, next);
1538 }
Mario Smarduchc6473552015-01-15 15:58:56 -08001539 }
1540 } while (pud++, addr = next, addr != end);
1541}
1542
1543/**
Mike Rapoporte9f63762020-06-04 16:46:23 -07001544 * stage2_wp_p4ds - write protect PGD range
1545 * @pgd: pointer to pgd entry
1546 * @addr: range start address
1547 * @end: range end address
1548 */
1549static void stage2_wp_p4ds(struct kvm *kvm, pgd_t *pgd,
1550 phys_addr_t addr, phys_addr_t end)
1551{
1552 p4d_t *p4d;
1553 phys_addr_t next;
1554
1555 p4d = stage2_p4d_offset(kvm, pgd, addr);
1556 do {
1557 next = stage2_p4d_addr_end(kvm, addr, end);
1558 if (!stage2_p4d_none(kvm, *p4d))
1559 stage2_wp_puds(kvm, p4d, addr, next);
1560 } while (p4d++, addr = next, addr != end);
1561}
1562
1563/**
Mario Smarduchc6473552015-01-15 15:58:56 -08001564 * stage2_wp_range() - write protect stage2 memory region range
1565 * @kvm: The KVM pointer
1566 * @addr: Start address of range
1567 * @end: End address of range
1568 */
1569static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
1570{
1571 pgd_t *pgd;
1572 phys_addr_t next;
1573
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001574 pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr);
Mario Smarduchc6473552015-01-15 15:58:56 -08001575 do {
1576 /*
1577 * Release kvm_mmu_lock periodically if the memory region is
1578 * large. Otherwise, we may see kernel panics with
Christoffer Dall227ea812015-01-23 10:49:31 +01001579 * CONFIG_DETECT_HUNG_TASK, CONFIG_LOCKUP_DETECTOR,
1580 * CONFIG_LOCKDEP. Additionally, holding the lock too long
Suzuki K Poulose0c428a6a2017-05-16 10:34:55 +01001581 * will also starve other vCPUs. We have to also make sure
1582 * that the page tables are not freed while we released
1583 * the lock.
Mario Smarduchc6473552015-01-15 15:58:56 -08001584 */
Suzuki K Poulose0c428a6a2017-05-16 10:34:55 +01001585 cond_resched_lock(&kvm->mmu_lock);
1586 if (!READ_ONCE(kvm->arch.pgd))
1587 break;
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01001588 next = stage2_pgd_addr_end(kvm, addr, end);
1589 if (stage2_pgd_present(kvm, *pgd))
Mike Rapoporte9f63762020-06-04 16:46:23 -07001590 stage2_wp_p4ds(kvm, pgd, addr, next);
Mario Smarduchc6473552015-01-15 15:58:56 -08001591 } while (pgd++, addr = next, addr != end);
1592}
1593
1594/**
1595 * kvm_mmu_wp_memory_region() - write protect stage 2 entries for memory slot
1596 * @kvm: The KVM pointer
1597 * @slot: The memory slot to write protect
1598 *
1599 * Called to start logging dirty pages after memory region
1600 * KVM_MEM_LOG_DIRTY_PAGES operation is called. After this function returns
Punit Agrawal4ea5af52018-12-11 17:10:37 +00001601 * all present PUD, PMD and PTEs are write protected in the memory region.
Mario Smarduchc6473552015-01-15 15:58:56 -08001602 * Afterwards read of dirty page log can be called.
1603 *
1604 * Acquires kvm_mmu_lock. Called with kvm->slots_lock mutex acquired,
1605 * serializing operations for VM memory regions.
1606 */
1607void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
1608{
Paolo Bonzini9f6b8022015-05-17 16:20:07 +02001609 struct kvm_memslots *slots = kvm_memslots(kvm);
1610 struct kvm_memory_slot *memslot = id_to_memslot(slots, slot);
Sean Christopherson0577d1a2020-02-18 13:07:31 -08001611 phys_addr_t start, end;
1612
1613 if (WARN_ON_ONCE(!memslot))
1614 return;
1615
1616 start = memslot->base_gfn << PAGE_SHIFT;
1617 end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
Mario Smarduchc6473552015-01-15 15:58:56 -08001618
1619 spin_lock(&kvm->mmu_lock);
1620 stage2_wp_range(kvm, start, end);
1621 spin_unlock(&kvm->mmu_lock);
1622 kvm_flush_remote_tlbs(kvm);
1623}
Mario Smarduch53c810c2015-01-15 15:58:57 -08001624
1625/**
Kai Huang3b0f1d02015-01-28 10:54:23 +08001626 * kvm_mmu_write_protect_pt_masked() - write protect dirty pages
Mario Smarduch53c810c2015-01-15 15:58:57 -08001627 * @kvm: The KVM pointer
1628 * @slot: The memory slot associated with mask
1629 * @gfn_offset: The gfn offset in memory slot
1630 * @mask: The mask of dirty pages at offset 'gfn_offset' in this memory
1631 * slot to be write protected
1632 *
1633 * Walks bits set in mask write protects the associated pte's. Caller must
1634 * acquire kvm_mmu_lock.
1635 */
Kai Huang3b0f1d02015-01-28 10:54:23 +08001636static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
Mario Smarduch53c810c2015-01-15 15:58:57 -08001637 struct kvm_memory_slot *slot,
1638 gfn_t gfn_offset, unsigned long mask)
1639{
1640 phys_addr_t base_gfn = slot->base_gfn + gfn_offset;
1641 phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT;
1642 phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
1643
1644 stage2_wp_range(kvm, start, end);
1645}
Mario Smarduchc6473552015-01-15 15:58:56 -08001646
Kai Huang3b0f1d02015-01-28 10:54:23 +08001647/*
1648 * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected
1649 * dirty pages.
1650 *
1651 * It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to
1652 * enable dirty logging for them.
1653 */
1654void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
1655 struct kvm_memory_slot *slot,
1656 gfn_t gfn_offset, unsigned long mask)
1657{
1658 kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
1659}
1660
Marc Zyngier17ab9d52017-10-23 17:11:22 +01001661static void clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
Marc Zyngier0d3e4d42015-01-05 21:13:24 +00001662{
Marc Zyngier17ab9d52017-10-23 17:11:22 +01001663 __clean_dcache_guest_page(pfn, size);
Marc Zyngiera15f6932017-10-23 17:11:15 +01001664}
1665
Marc Zyngier17ab9d52017-10-23 17:11:22 +01001666static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size)
Marc Zyngiera15f6932017-10-23 17:11:15 +01001667{
Marc Zyngier17ab9d52017-10-23 17:11:22 +01001668 __invalidate_icache_guest_page(pfn, size);
Marc Zyngier0d3e4d42015-01-05 21:13:24 +00001669}
1670
James Morse1559b752019-12-17 12:38:09 +00001671static void kvm_send_hwpoison_signal(unsigned long address, short lsb)
James Morse196f8782017-06-20 17:11:48 +01001672{
Eric W. Biederman795a8372018-04-16 13:39:10 -05001673 send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current);
James Morse196f8782017-06-20 17:11:48 +01001674}
1675
Suzuki K Poulosea80868f2019-03-12 09:52:51 +00001676static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
1677 unsigned long hva,
1678 unsigned long map_size)
Christoffer Dall6794ad52018-11-02 08:53:22 +01001679{
Shaokun Zhangc2be79a2019-02-19 17:22:21 +08001680 gpa_t gpa_start;
Christoffer Dall6794ad52018-11-02 08:53:22 +01001681 hva_t uaddr_start, uaddr_end;
1682 size_t size;
1683
Suzuki K Poulose9f283612020-05-07 20:35:45 +08001684 /* The memslot and the VMA are guaranteed to be aligned to PAGE_SIZE */
1685 if (map_size == PAGE_SIZE)
1686 return true;
1687
Christoffer Dall6794ad52018-11-02 08:53:22 +01001688 size = memslot->npages * PAGE_SIZE;
1689
1690 gpa_start = memslot->base_gfn << PAGE_SHIFT;
Christoffer Dall6794ad52018-11-02 08:53:22 +01001691
1692 uaddr_start = memslot->userspace_addr;
1693 uaddr_end = uaddr_start + size;
1694
1695 /*
1696 * Pages belonging to memslots that don't have the same alignment
Suzuki K Poulosea80868f2019-03-12 09:52:51 +00001697 * within a PMD/PUD for userspace and IPA cannot be mapped with stage-2
1698 * PMD/PUD entries, because we'll end up mapping the wrong pages.
Christoffer Dall6794ad52018-11-02 08:53:22 +01001699 *
1700 * Consider a layout like the following:
1701 *
1702 * memslot->userspace_addr:
1703 * +-----+--------------------+--------------------+---+
Suzuki K Poulosea80868f2019-03-12 09:52:51 +00001704 * |abcde|fgh Stage-1 block | Stage-1 block tv|xyz|
Christoffer Dall6794ad52018-11-02 08:53:22 +01001705 * +-----+--------------------+--------------------+---+
1706 *
Suzuki K Poulose9f283612020-05-07 20:35:45 +08001707 * memslot->base_gfn << PAGE_SHIFT:
Christoffer Dall6794ad52018-11-02 08:53:22 +01001708 * +---+--------------------+--------------------+-----+
Suzuki K Poulosea80868f2019-03-12 09:52:51 +00001709 * |abc|def Stage-2 block | Stage-2 block |tvxyz|
Christoffer Dall6794ad52018-11-02 08:53:22 +01001710 * +---+--------------------+--------------------+-----+
1711 *
Suzuki K Poulosea80868f2019-03-12 09:52:51 +00001712 * If we create those stage-2 blocks, we'll end up with this incorrect
Christoffer Dall6794ad52018-11-02 08:53:22 +01001713 * mapping:
1714 * d -> f
1715 * e -> g
1716 * f -> h
1717 */
Suzuki K Poulosea80868f2019-03-12 09:52:51 +00001718 if ((gpa_start & (map_size - 1)) != (uaddr_start & (map_size - 1)))
Christoffer Dall6794ad52018-11-02 08:53:22 +01001719 return false;
1720
1721 /*
1722 * Next, let's make sure we're not trying to map anything not covered
Suzuki K Poulosea80868f2019-03-12 09:52:51 +00001723 * by the memslot. This means we have to prohibit block size mappings
1724 * for the beginning and end of a non-block aligned and non-block sized
Christoffer Dall6794ad52018-11-02 08:53:22 +01001725 * memory slot (illustrated by the head and tail parts of the
1726 * userspace view above containing pages 'abcde' and 'xyz',
1727 * respectively).
1728 *
1729 * Note that it doesn't matter if we do the check using the
1730 * userspace_addr or the base_gfn, as both are equally aligned (per
1731 * the check above) and equally sized.
1732 */
Suzuki K Poulosea80868f2019-03-12 09:52:51 +00001733 return (hva & ~(map_size - 1)) >= uaddr_start &&
1734 (hva & ~(map_size - 1)) + map_size <= uaddr_end;
Christoffer Dall6794ad52018-11-02 08:53:22 +01001735}
1736
Suzuki K Poulose0529c902020-05-07 20:35:46 +08001737/*
1738 * Check if the given hva is backed by a transparent huge page (THP) and
1739 * whether it can be mapped using block mapping in stage2. If so, adjust
1740 * the stage2 PFN and IPA accordingly. Only PMD_SIZE THPs are currently
1741 * supported. This will need to be updated to support other THP sizes.
1742 *
1743 * Returns the size of the mapping.
1744 */
1745static unsigned long
1746transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
1747 unsigned long hva, kvm_pfn_t *pfnp,
1748 phys_addr_t *ipap)
1749{
1750 kvm_pfn_t pfn = *pfnp;
1751
1752 /*
1753 * Make sure the adjustment is done only for THP pages. Also make
1754 * sure that the HVA and IPA are sufficiently aligned and that the
1755 * block map is contained within the memslot.
1756 */
1757 if (kvm_is_transparent_hugepage(pfn) &&
1758 fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) {
1759 /*
1760 * The address we faulted on is backed by a transparent huge
1761 * page. However, because we map the compound huge page and
1762 * not the individual tail page, we need to transfer the
1763 * refcount to the head page. We have to be careful that the
1764 * THP doesn't start to split while we are adjusting the
1765 * refcounts.
1766 *
1767 * We are sure this doesn't happen, because mmu_notifier_retry
1768 * was successful and we are holding the mmu_lock, so if this
1769 * THP is trying to split, it will be blocked in the mmu
1770 * notifier before touching any of the pages, specifically
1771 * before being able to call __split_huge_page_refcount().
1772 *
1773 * We can therefore safely transfer the refcount from PG_tail
1774 * to PG_head and switch the pfn from a tail page to the head
1775 * page accordingly.
1776 */
1777 *ipap &= PMD_MASK;
1778 kvm_release_pfn_clean(pfn);
1779 pfn &= ~(PTRS_PER_PMD - 1);
1780 kvm_get_pfn(pfn);
1781 *pfnp = pfn;
1782
1783 return PMD_SIZE;
1784 }
1785
1786 /* Use page mapping if we cannot use block mapping. */
1787 return PAGE_SIZE;
1788}
1789
Christoffer Dall94f8e642013-01-20 18:28:12 -05001790static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
Christoffer Dall98047882014-08-19 12:18:04 +02001791 struct kvm_memory_slot *memslot, unsigned long hva,
Christoffer Dall94f8e642013-01-20 18:28:12 -05001792 unsigned long fault_status)
1793{
Christoffer Dall94f8e642013-01-20 18:28:12 -05001794 int ret;
Punit Agrawal6396b852018-12-11 17:10:35 +00001795 bool write_fault, writable, force_pte = false;
1796 bool exec_fault, needs_exec;
Christoffer Dall94f8e642013-01-20 18:28:12 -05001797 unsigned long mmu_seq;
Christoffer Dallad361f02012-11-01 17:14:45 +01001798 gfn_t gfn = fault_ipa >> PAGE_SHIFT;
Christoffer Dallad361f02012-11-01 17:14:45 +01001799 struct kvm *kvm = vcpu->kvm;
Christoffer Dall94f8e642013-01-20 18:28:12 -05001800 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
Christoffer Dallad361f02012-11-01 17:14:45 +01001801 struct vm_area_struct *vma;
James Morse1559b752019-12-17 12:38:09 +00001802 short vma_shift;
Dan Williamsba049e92016-01-15 16:56:11 -08001803 kvm_pfn_t pfn;
Kim Phillipsb8865762014-06-26 01:45:51 +01001804 pgprot_t mem_type = PAGE_S2;
Mario Smarduch15a49a42015-01-15 15:58:58 -08001805 bool logging_active = memslot_is_logging(memslot);
Punit Agrawal3f58bf62018-12-11 17:10:34 +00001806 unsigned long vma_pagesize, flags = 0;
Christoffer Dall94f8e642013-01-20 18:28:12 -05001807
Ard Biesheuvela7d079c2014-09-09 11:27:09 +01001808 write_fault = kvm_is_write_fault(vcpu);
Marc Zyngierd0e22b42017-10-23 17:11:19 +01001809 exec_fault = kvm_vcpu_trap_is_iabt(vcpu);
1810 VM_BUG_ON(write_fault && exec_fault);
1811
1812 if (fault_status == FSC_PERM && !write_fault && !exec_fault) {
Christoffer Dall94f8e642013-01-20 18:28:12 -05001813 kvm_err("Unexpected L2 read permission error\n");
1814 return -EFAULT;
1815 }
1816
Christoffer Dallad361f02012-11-01 17:14:45 +01001817 /* Let's check if we will get back a huge page backed by hugetlbfs */
Michel Lespinasse89154dd2020-06-08 21:33:29 -07001818 mmap_read_lock(current->mm);
Christoffer Dallad361f02012-11-01 17:14:45 +01001819 vma = find_vma_intersection(current->mm, hva, hva + 1);
Ard Biesheuvel37b54402014-09-17 14:56:17 -07001820 if (unlikely(!vma)) {
1821 kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
Michel Lespinasse89154dd2020-06-08 21:33:29 -07001822 mmap_read_unlock(current->mm);
Ard Biesheuvel37b54402014-09-17 14:56:17 -07001823 return -EFAULT;
1824 }
1825
James Morse1559b752019-12-17 12:38:09 +00001826 if (is_vm_hugetlb_page(vma))
1827 vma_shift = huge_page_shift(hstate_vma(vma));
1828 else
1829 vma_shift = PAGE_SHIFT;
1830
1831 vma_pagesize = 1ULL << vma_shift;
Suzuki K Poulosea80868f2019-03-12 09:52:51 +00001832 if (logging_active ||
Marc Zyngier6d674e22019-12-11 16:56:48 +00001833 (vma->vm_flags & VM_PFNMAP) ||
Suzuki K Poulosea80868f2019-03-12 09:52:51 +00001834 !fault_supports_stage2_huge_mapping(memslot, hva, vma_pagesize)) {
1835 force_pte = true;
1836 vma_pagesize = PAGE_SIZE;
1837 }
1838
Punit Agrawalb8e0ba72018-12-11 17:10:41 +00001839 /*
Suzuki K Poulose280cebf2019-01-29 19:12:17 +00001840 * The stage2 has a minimum of 2 level table (For arm64 see
1841 * kvm_arm_setup_stage2()). Hence, we are guaranteed that we can
1842 * use PMD_SIZE huge mappings (even when the PMD is folded into PGD).
1843 * As for PUD huge maps, we must make sure that we have at least
1844 * 3 levels, i.e, PMD is not folded.
Punit Agrawalb8e0ba72018-12-11 17:10:41 +00001845 */
Suzuki K Poulosea80868f2019-03-12 09:52:51 +00001846 if (vma_pagesize == PMD_SIZE ||
1847 (vma_pagesize == PUD_SIZE && kvm_stage2_has_pmd(kvm)))
Punit Agrawalb8e0ba72018-12-11 17:10:41 +00001848 gfn = (fault_ipa & huge_page_mask(hstate_vma(vma))) >> PAGE_SHIFT;
Michel Lespinasse89154dd2020-06-08 21:33:29 -07001849 mmap_read_unlock(current->mm);
Christoffer Dallad361f02012-11-01 17:14:45 +01001850
Christoffer Dall94f8e642013-01-20 18:28:12 -05001851 /* We need minimum second+third level pages */
Sean Christophersonc1a33ae2020-07-02 19:35:42 -07001852 ret = kvm_mmu_topup_memory_cache(memcache, kvm_mmu_cache_min_pages(kvm));
Christoffer Dall94f8e642013-01-20 18:28:12 -05001853 if (ret)
1854 return ret;
1855
1856 mmu_seq = vcpu->kvm->mmu_notifier_seq;
1857 /*
1858 * Ensure the read of mmu_notifier_seq happens before we call
1859 * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk
1860 * the page we just got a reference to gets unmapped before we have a
1861 * chance to grab the mmu_lock, which ensure that if the page gets
1862 * unmapped afterwards, the call to kvm_unmap_hva will take it away
1863 * from us again properly. This smp_rmb() interacts with the smp_wmb()
1864 * in kvm_mmu_notifier_invalidate_<page|range_end>.
1865 */
1866 smp_rmb();
1867
Christoffer Dallad361f02012-11-01 17:14:45 +01001868 pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
James Morse196f8782017-06-20 17:11:48 +01001869 if (pfn == KVM_PFN_ERR_HWPOISON) {
James Morse1559b752019-12-17 12:38:09 +00001870 kvm_send_hwpoison_signal(hva, vma_shift);
James Morse196f8782017-06-20 17:11:48 +01001871 return 0;
1872 }
Christoffer Dall9ac71592016-08-17 10:46:10 +02001873 if (is_error_noslot_pfn(pfn))
Christoffer Dall94f8e642013-01-20 18:28:12 -05001874 return -EFAULT;
1875
Mario Smarduch15a49a42015-01-15 15:58:58 -08001876 if (kvm_is_device_pfn(pfn)) {
Kim Phillipsb8865762014-06-26 01:45:51 +01001877 mem_type = PAGE_S2_DEVICE;
Mario Smarduch15a49a42015-01-15 15:58:58 -08001878 flags |= KVM_S2PTE_FLAG_IS_IOMAP;
1879 } else if (logging_active) {
1880 /*
1881 * Faults on pages in a memslot with logging enabled
1882 * should not be mapped with huge pages (it introduces churn
1883 * and performance degradation), so force a pte mapping.
1884 */
Mario Smarduch15a49a42015-01-15 15:58:58 -08001885 flags |= KVM_S2_FLAG_LOGGING_ACTIVE;
1886
1887 /*
1888 * Only actually map the page as writable if this was a write
1889 * fault.
1890 */
1891 if (!write_fault)
1892 writable = false;
1893 }
Kim Phillipsb8865762014-06-26 01:45:51 +01001894
Marc Zyngier6d674e22019-12-11 16:56:48 +00001895 if (exec_fault && is_iomap(flags))
1896 return -ENOEXEC;
1897
Christoffer Dallad361f02012-11-01 17:14:45 +01001898 spin_lock(&kvm->mmu_lock);
1899 if (mmu_notifier_retry(kvm, mmu_seq))
Christoffer Dall94f8e642013-01-20 18:28:12 -05001900 goto out_unlock;
Mario Smarduch15a49a42015-01-15 15:58:58 -08001901
Suzuki K Poulose0529c902020-05-07 20:35:46 +08001902 /*
1903 * If we are not forced to use page mapping, check if we are
1904 * backed by a THP and thus use block mapping if possible.
1905 */
1906 if (vma_pagesize == PAGE_SIZE && !force_pte)
1907 vma_pagesize = transparent_hugepage_adjust(memslot, hva,
1908 &pfn, &fault_ipa);
Punit Agrawal3f58bf62018-12-11 17:10:34 +00001909 if (writable)
1910 kvm_set_pfn_dirty(pfn);
1911
Marc Zyngier6d674e22019-12-11 16:56:48 +00001912 if (fault_status != FSC_PERM && !is_iomap(flags))
Punit Agrawal3f58bf62018-12-11 17:10:34 +00001913 clean_dcache_guest_page(pfn, vma_pagesize);
1914
1915 if (exec_fault)
1916 invalidate_icache_guest_page(pfn, vma_pagesize);
1917
Punit Agrawal6396b852018-12-11 17:10:35 +00001918 /*
1919 * If we took an execution fault we have made the
1920 * icache/dcache coherent above and should now let the s2
1921 * mapping be executable.
1922 *
1923 * Write faults (!exec_fault && FSC_PERM) are orthogonal to
1924 * execute permissions, and we preserve whatever we have.
1925 */
1926 needs_exec = exec_fault ||
Will Deaconb757b472020-07-23 11:17:14 +01001927 (fault_status == FSC_PERM &&
1928 stage2_is_exec(kvm, fault_ipa, vma_pagesize));
Punit Agrawal6396b852018-12-11 17:10:35 +00001929
Punit Agrawalb8e0ba72018-12-11 17:10:41 +00001930 if (vma_pagesize == PUD_SIZE) {
1931 pud_t new_pud = kvm_pfn_pud(pfn, mem_type);
1932
1933 new_pud = kvm_pud_mkhuge(new_pud);
1934 if (writable)
1935 new_pud = kvm_s2pud_mkwrite(new_pud);
1936
1937 if (needs_exec)
1938 new_pud = kvm_s2pud_mkexec(new_pud);
1939
1940 ret = stage2_set_pud_huge(kvm, memcache, fault_ipa, &new_pud);
1941 } else if (vma_pagesize == PMD_SIZE) {
Punit Agrawalf8df7332018-12-11 17:10:36 +00001942 pmd_t new_pmd = kvm_pfn_pmd(pfn, mem_type);
1943
1944 new_pmd = kvm_pmd_mkhuge(new_pmd);
1945
Punit Agrawal3f58bf62018-12-11 17:10:34 +00001946 if (writable)
Catalin Marinas06485052016-04-13 17:57:37 +01001947 new_pmd = kvm_s2pmd_mkwrite(new_pmd);
Marc Zyngierd0e22b42017-10-23 17:11:19 +01001948
Punit Agrawal6396b852018-12-11 17:10:35 +00001949 if (needs_exec)
Marc Zyngierd0e22b42017-10-23 17:11:19 +01001950 new_pmd = kvm_s2pmd_mkexec(new_pmd);
Marc Zyngiera15f6932017-10-23 17:11:15 +01001951
Christoffer Dallad361f02012-11-01 17:14:45 +01001952 ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
1953 } else {
Punit Agrawalf8df7332018-12-11 17:10:36 +00001954 pte_t new_pte = kvm_pfn_pte(pfn, mem_type);
Mario Smarduch15a49a42015-01-15 15:58:58 -08001955
Christoffer Dallad361f02012-11-01 17:14:45 +01001956 if (writable) {
Catalin Marinas06485052016-04-13 17:57:37 +01001957 new_pte = kvm_s2pte_mkwrite(new_pte);
Mario Smarduch15a49a42015-01-15 15:58:58 -08001958 mark_page_dirty(kvm, gfn);
Christoffer Dallad361f02012-11-01 17:14:45 +01001959 }
Marc Zyngiera9c0e122017-10-23 17:11:20 +01001960
Punit Agrawal6396b852018-12-11 17:10:35 +00001961 if (needs_exec)
Marc Zyngierd0e22b42017-10-23 17:11:19 +01001962 new_pte = kvm_s2pte_mkexec(new_pte);
Marc Zyngiera15f6932017-10-23 17:11:15 +01001963
Mario Smarduch15a49a42015-01-15 15:58:58 -08001964 ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags);
Christoffer Dall94f8e642013-01-20 18:28:12 -05001965 }
Christoffer Dallad361f02012-11-01 17:14:45 +01001966
Christoffer Dall94f8e642013-01-20 18:28:12 -05001967out_unlock:
Christoffer Dallad361f02012-11-01 17:14:45 +01001968 spin_unlock(&kvm->mmu_lock);
Marc Zyngier35307b92015-03-12 18:16:51 +00001969 kvm_set_pfn_accessed(pfn);
Christoffer Dall94f8e642013-01-20 18:28:12 -05001970 kvm_release_pfn_clean(pfn);
Christoffer Dallad361f02012-11-01 17:14:45 +01001971 return ret;
Christoffer Dall94f8e642013-01-20 18:28:12 -05001972}
1973
Marc Zyngieraeda9132015-03-12 18:16:52 +00001974/*
1975 * Resolve the access fault by making the page young again.
1976 * Note that because the faulting entry is guaranteed not to be
1977 * cached in the TLB, we don't need to invalidate anything.
Catalin Marinas06485052016-04-13 17:57:37 +01001978 * Only the HW Access Flag updates are supported for Stage 2 (no DBM),
1979 * so there is no need for atomic (pte|pmd)_mkyoung operations.
Marc Zyngieraeda9132015-03-12 18:16:52 +00001980 */
1981static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
1982{
Punit Agrawaleb3f06242018-12-11 17:10:39 +00001983 pud_t *pud;
Marc Zyngieraeda9132015-03-12 18:16:52 +00001984 pmd_t *pmd;
1985 pte_t *pte;
Dan Williamsba049e92016-01-15 16:56:11 -08001986 kvm_pfn_t pfn;
Marc Zyngieraeda9132015-03-12 18:16:52 +00001987 bool pfn_valid = false;
1988
1989 trace_kvm_access_fault(fault_ipa);
1990
1991 spin_lock(&vcpu->kvm->mmu_lock);
1992
Punit Agrawaleb3f06242018-12-11 17:10:39 +00001993 if (!stage2_get_leaf_entry(vcpu->kvm, fault_ipa, &pud, &pmd, &pte))
Marc Zyngieraeda9132015-03-12 18:16:52 +00001994 goto out;
1995
Punit Agrawaleb3f06242018-12-11 17:10:39 +00001996 if (pud) { /* HugeTLB */
1997 *pud = kvm_s2pud_mkyoung(*pud);
1998 pfn = kvm_pud_pfn(*pud);
1999 pfn_valid = true;
2000 } else if (pmd) { /* THP, HugeTLB */
Marc Zyngieraeda9132015-03-12 18:16:52 +00002001 *pmd = pmd_mkyoung(*pmd);
2002 pfn = pmd_pfn(*pmd);
2003 pfn_valid = true;
Punit Agrawaleb3f06242018-12-11 17:10:39 +00002004 } else {
2005 *pte = pte_mkyoung(*pte); /* Just a page... */
2006 pfn = pte_pfn(*pte);
2007 pfn_valid = true;
Marc Zyngieraeda9132015-03-12 18:16:52 +00002008 }
2009
Marc Zyngieraeda9132015-03-12 18:16:52 +00002010out:
2011 spin_unlock(&vcpu->kvm->mmu_lock);
2012 if (pfn_valid)
2013 kvm_set_pfn_accessed(pfn);
2014}
2015
Christoffer Dall94f8e642013-01-20 18:28:12 -05002016/**
2017 * kvm_handle_guest_abort - handles all 2nd stage aborts
2018 * @vcpu: the VCPU pointer
Christoffer Dall94f8e642013-01-20 18:28:12 -05002019 *
2020 * Any abort that gets to the host is almost guaranteed to be caused by a
2021 * missing second stage translation table entry, which can mean that either the
2022 * guest simply needs more memory and we must allocate an appropriate page or it
2023 * can mean that the guest tried to access I/O memory, which is emulated by user
2024 * space. The distinction is based on the IPA causing the fault and whether this
2025 * memory region has been registered as standard RAM by user space.
2026 */
Tianjia Zhang74cc7e02020-06-23 21:14:15 +08002027int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
Christoffer Dall342cd0a2013-01-20 18:28:06 -05002028{
Christoffer Dall94f8e642013-01-20 18:28:12 -05002029 unsigned long fault_status;
2030 phys_addr_t fault_ipa;
2031 struct kvm_memory_slot *memslot;
Christoffer Dall98047882014-08-19 12:18:04 +02002032 unsigned long hva;
2033 bool is_iabt, write_fault, writable;
Christoffer Dall94f8e642013-01-20 18:28:12 -05002034 gfn_t gfn;
2035 int ret, idx;
2036
Tyler Baicar621f48e2017-06-21 12:17:14 -06002037 fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
2038
2039 fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
James Morsebb428922017-07-18 13:37:41 +01002040 is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
Tyler Baicar621f48e2017-06-21 12:17:14 -06002041
James Morsebb428922017-07-18 13:37:41 +01002042 /* Synchronous External Abort? */
2043 if (kvm_vcpu_dabt_isextabt(vcpu)) {
2044 /*
2045 * For RAS the host kernel may handle this abort.
2046 * There is no need to pass the error into the guest.
2047 */
James Morse0db5e022019-01-29 18:48:49 +00002048 if (!kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu)))
Tyler Baicar621f48e2017-06-21 12:17:14 -06002049 return 1;
Tyler Baicar621f48e2017-06-21 12:17:14 -06002050
James Morsebb428922017-07-18 13:37:41 +01002051 if (unlikely(!is_iabt)) {
2052 kvm_inject_vabt(vcpu);
2053 return 1;
2054 }
Marc Zyngier40557102016-09-06 14:02:15 +01002055 }
2056
Marc Zyngier7393b592012-09-17 19:27:09 +01002057 trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu),
2058 kvm_vcpu_get_hfar(vcpu), fault_ipa);
Christoffer Dall94f8e642013-01-20 18:28:12 -05002059
2060 /* Check the stage-2 fault is trans. fault or write fault */
Marc Zyngier35307b92015-03-12 18:16:51 +00002061 if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
2062 fault_status != FSC_ACCESS) {
Christoffer Dall0496daa52014-09-26 12:29:34 +02002063 kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
2064 kvm_vcpu_trap_get_class(vcpu),
2065 (unsigned long)kvm_vcpu_trap_get_fault(vcpu),
2066 (unsigned long)kvm_vcpu_get_hsr(vcpu));
Christoffer Dall94f8e642013-01-20 18:28:12 -05002067 return -EFAULT;
2068 }
2069
2070 idx = srcu_read_lock(&vcpu->kvm->srcu);
2071
2072 gfn = fault_ipa >> PAGE_SHIFT;
Christoffer Dall98047882014-08-19 12:18:04 +02002073 memslot = gfn_to_memslot(vcpu->kvm, gfn);
2074 hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
Ard Biesheuvela7d079c2014-09-09 11:27:09 +01002075 write_fault = kvm_is_write_fault(vcpu);
Christoffer Dall98047882014-08-19 12:18:04 +02002076 if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
Christoffer Dall94f8e642013-01-20 18:28:12 -05002077 if (is_iabt) {
2078 /* Prefetch Abort on I/O address */
Marc Zyngier6d674e22019-12-11 16:56:48 +00002079 ret = -ENOEXEC;
2080 goto out;
Christoffer Dall94f8e642013-01-20 18:28:12 -05002081 }
2082
Marc Zyngiercfe39502012-12-12 14:42:09 +00002083 /*
Marc Zyngier57c841f2016-01-29 15:01:28 +00002084 * Check for a cache maintenance operation. Since we
2085 * ended-up here, we know it is outside of any memory
2086 * slot. But we can't find out if that is for a device,
2087 * or if the guest is just being stupid. The only thing
2088 * we know for sure is that this range cannot be cached.
2089 *
2090 * So let's assume that the guest is just being
2091 * cautious, and skip the instruction.
2092 */
2093 if (kvm_vcpu_dabt_is_cm(vcpu)) {
2094 kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
2095 ret = 1;
2096 goto out_unlock;
2097 }
2098
2099 /*
Marc Zyngiercfe39502012-12-12 14:42:09 +00002100 * The IPA is reported as [MAX:12], so we need to
2101 * complement it with the bottom 12 bits from the
2102 * faulting VA. This is always 12 bits, irrespective
2103 * of the page size.
2104 */
2105 fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
Tianjia Zhang74cc7e02020-06-23 21:14:15 +08002106 ret = io_mem_abort(vcpu, fault_ipa);
Christoffer Dall94f8e642013-01-20 18:28:12 -05002107 goto out_unlock;
2108 }
2109
Christoffer Dallc3058d52014-10-10 12:14:29 +02002110 /* Userspace should not be able to register out-of-bounds IPAs */
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01002111 VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm));
Christoffer Dallc3058d52014-10-10 12:14:29 +02002112
Marc Zyngieraeda9132015-03-12 18:16:52 +00002113 if (fault_status == FSC_ACCESS) {
2114 handle_access_fault(vcpu, fault_ipa);
2115 ret = 1;
2116 goto out_unlock;
2117 }
2118
Christoffer Dall98047882014-08-19 12:18:04 +02002119 ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
Christoffer Dall94f8e642013-01-20 18:28:12 -05002120 if (ret == 0)
2121 ret = 1;
Marc Zyngier6d674e22019-12-11 16:56:48 +00002122out:
2123 if (ret == -ENOEXEC) {
2124 kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
2125 ret = 1;
2126 }
Christoffer Dall94f8e642013-01-20 18:28:12 -05002127out_unlock:
2128 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2129 return ret;
Christoffer Dall342cd0a2013-01-20 18:28:06 -05002130}
2131
Marc Zyngier1d2ebac2015-03-12 18:16:50 +00002132static int handle_hva_to_gpa(struct kvm *kvm,
2133 unsigned long start,
2134 unsigned long end,
2135 int (*handler)(struct kvm *kvm,
Suzuki K Poulose056aad62017-03-20 18:26:42 +00002136 gpa_t gpa, u64 size,
2137 void *data),
Marc Zyngier1d2ebac2015-03-12 18:16:50 +00002138 void *data)
Christoffer Dalld5d81842013-01-20 18:28:07 -05002139{
2140 struct kvm_memslots *slots;
2141 struct kvm_memory_slot *memslot;
Marc Zyngier1d2ebac2015-03-12 18:16:50 +00002142 int ret = 0;
Christoffer Dalld5d81842013-01-20 18:28:07 -05002143
2144 slots = kvm_memslots(kvm);
2145
2146 /* we only care about the pages that the guest sees */
2147 kvm_for_each_memslot(memslot, slots) {
2148 unsigned long hva_start, hva_end;
Suzuki K Poulose056aad62017-03-20 18:26:42 +00002149 gfn_t gpa;
Christoffer Dalld5d81842013-01-20 18:28:07 -05002150
2151 hva_start = max(start, memslot->userspace_addr);
2152 hva_end = min(end, memslot->userspace_addr +
2153 (memslot->npages << PAGE_SHIFT));
2154 if (hva_start >= hva_end)
2155 continue;
2156
Suzuki K Poulose056aad62017-03-20 18:26:42 +00002157 gpa = hva_to_gfn_memslot(hva_start, memslot) << PAGE_SHIFT;
2158 ret |= handler(kvm, gpa, (u64)(hva_end - hva_start), data);
Christoffer Dalld5d81842013-01-20 18:28:07 -05002159 }
Marc Zyngier1d2ebac2015-03-12 18:16:50 +00002160
2161 return ret;
Christoffer Dalld5d81842013-01-20 18:28:07 -05002162}
2163
Suzuki K Poulose056aad62017-03-20 18:26:42 +00002164static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
Christoffer Dalld5d81842013-01-20 18:28:07 -05002165{
Suzuki K Poulose056aad62017-03-20 18:26:42 +00002166 unmap_stage2_range(kvm, gpa, size);
Marc Zyngier1d2ebac2015-03-12 18:16:50 +00002167 return 0;
Christoffer Dalld5d81842013-01-20 18:28:07 -05002168}
2169
Christoffer Dalld5d81842013-01-20 18:28:07 -05002170int kvm_unmap_hva_range(struct kvm *kvm,
2171 unsigned long start, unsigned long end)
2172{
2173 if (!kvm->arch.pgd)
2174 return 0;
2175
2176 trace_kvm_unmap_hva_range(start, end);
2177 handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
2178 return 0;
2179}
2180
Suzuki K Poulose056aad62017-03-20 18:26:42 +00002181static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
Christoffer Dalld5d81842013-01-20 18:28:07 -05002182{
2183 pte_t *pte = (pte_t *)data;
2184
Suzuki K Poulose056aad62017-03-20 18:26:42 +00002185 WARN_ON(size != PAGE_SIZE);
Mario Smarduch15a49a42015-01-15 15:58:58 -08002186 /*
2187 * We can always call stage2_set_pte with KVM_S2PTE_FLAG_LOGGING_ACTIVE
2188 * flag clear because MMU notifiers will have unmapped a huge PMD before
2189 * calling ->change_pte() (which in turn calls kvm_set_spte_hva()) and
2190 * therefore stage2_set_pte() never needs to clear out a huge PMD
2191 * through this calling path.
2192 */
2193 stage2_set_pte(kvm, NULL, gpa, pte, 0);
Marc Zyngier1d2ebac2015-03-12 18:16:50 +00002194 return 0;
Christoffer Dalld5d81842013-01-20 18:28:07 -05002195}
2196
2197
Lan Tianyu748c0e32018-12-06 21:21:10 +08002198int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
Christoffer Dalld5d81842013-01-20 18:28:07 -05002199{
2200 unsigned long end = hva + PAGE_SIZE;
Marc Zyngier694556d2018-08-23 09:58:27 +01002201 kvm_pfn_t pfn = pte_pfn(pte);
Christoffer Dalld5d81842013-01-20 18:28:07 -05002202 pte_t stage2_pte;
2203
2204 if (!kvm->arch.pgd)
Lan Tianyu748c0e32018-12-06 21:21:10 +08002205 return 0;
Christoffer Dalld5d81842013-01-20 18:28:07 -05002206
2207 trace_kvm_set_spte_hva(hva);
Marc Zyngier694556d2018-08-23 09:58:27 +01002208
2209 /*
2210 * We've moved a page around, probably through CoW, so let's treat it
2211 * just like a translation fault and clean the cache to the PoC.
2212 */
2213 clean_dcache_guest_page(pfn, PAGE_SIZE);
Punit Agrawalf8df7332018-12-11 17:10:36 +00002214 stage2_pte = kvm_pfn_pte(pfn, PAGE_S2);
Christoffer Dalld5d81842013-01-20 18:28:07 -05002215 handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte);
Lan Tianyu748c0e32018-12-06 21:21:10 +08002216
2217 return 0;
Christoffer Dalld5d81842013-01-20 18:28:07 -05002218}
2219
Suzuki K Poulose056aad62017-03-20 18:26:42 +00002220static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
Marc Zyngier35307b92015-03-12 18:16:51 +00002221{
Punit Agrawal35a63962018-12-11 17:10:40 +00002222 pud_t *pud;
Marc Zyngier35307b92015-03-12 18:16:51 +00002223 pmd_t *pmd;
2224 pte_t *pte;
2225
Punit Agrawal35a63962018-12-11 17:10:40 +00002226 WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);
2227 if (!stage2_get_leaf_entry(kvm, gpa, &pud, &pmd, &pte))
Marc Zyngier35307b92015-03-12 18:16:51 +00002228 return 0;
2229
Punit Agrawal35a63962018-12-11 17:10:40 +00002230 if (pud)
2231 return stage2_pudp_test_and_clear_young(pud);
2232 else if (pmd)
Catalin Marinas06485052016-04-13 17:57:37 +01002233 return stage2_pmdp_test_and_clear_young(pmd);
Punit Agrawal35a63962018-12-11 17:10:40 +00002234 else
2235 return stage2_ptep_test_and_clear_young(pte);
Marc Zyngier35307b92015-03-12 18:16:51 +00002236}
2237
Suzuki K Poulose056aad62017-03-20 18:26:42 +00002238static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
Marc Zyngier35307b92015-03-12 18:16:51 +00002239{
Punit Agrawal35a63962018-12-11 17:10:40 +00002240 pud_t *pud;
Marc Zyngier35307b92015-03-12 18:16:51 +00002241 pmd_t *pmd;
2242 pte_t *pte;
2243
Punit Agrawal35a63962018-12-11 17:10:40 +00002244 WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);
2245 if (!stage2_get_leaf_entry(kvm, gpa, &pud, &pmd, &pte))
Marc Zyngier35307b92015-03-12 18:16:51 +00002246 return 0;
2247
Punit Agrawal35a63962018-12-11 17:10:40 +00002248 if (pud)
2249 return kvm_s2pud_young(*pud);
2250 else if (pmd)
Marc Zyngier35307b92015-03-12 18:16:51 +00002251 return pmd_young(*pmd);
Punit Agrawal35a63962018-12-11 17:10:40 +00002252 else
Marc Zyngier35307b92015-03-12 18:16:51 +00002253 return pte_young(*pte);
Marc Zyngier35307b92015-03-12 18:16:51 +00002254}
2255
2256int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
2257{
Suzuki K Poulose7e5a6722017-07-05 09:57:00 +01002258 if (!kvm->arch.pgd)
2259 return 0;
Marc Zyngier35307b92015-03-12 18:16:51 +00002260 trace_kvm_age_hva(start, end);
2261 return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL);
2262}
2263
2264int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
2265{
Suzuki K Poulose7e5a6722017-07-05 09:57:00 +01002266 if (!kvm->arch.pgd)
2267 return 0;
Marc Zyngier35307b92015-03-12 18:16:51 +00002268 trace_kvm_test_age_hva(hva);
Gavin Shancf2d23e2020-01-21 16:56:59 +11002269 return handle_hva_to_gpa(kvm, hva, hva + PAGE_SIZE,
2270 kvm_test_age_hva_handler, NULL);
Marc Zyngier35307b92015-03-12 18:16:51 +00002271}
2272
Christoffer Dalld5d81842013-01-20 18:28:07 -05002273void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
2274{
Sean Christophersonc1a33ae2020-07-02 19:35:42 -07002275 kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
Christoffer Dalld5d81842013-01-20 18:28:07 -05002276}
2277
Christoffer Dall342cd0a2013-01-20 18:28:06 -05002278phys_addr_t kvm_mmu_get_httbr(void)
2279{
Ard Biesheuvele4c5a682015-03-19 16:42:28 +00002280 if (__kvm_cpu_uses_extended_idmap())
2281 return virt_to_phys(merged_hyp_pgd);
2282 else
2283 return virt_to_phys(hyp_pgd);
Christoffer Dall342cd0a2013-01-20 18:28:06 -05002284}
2285
Marc Zyngier5a677ce2013-04-12 19:12:06 +01002286phys_addr_t kvm_get_idmap_vector(void)
2287{
2288 return hyp_idmap_vector;
2289}
2290
Marc Zyngier0535a3e2016-06-30 18:40:43 +01002291static int kvm_map_idmap_text(pgd_t *pgd)
2292{
2293 int err;
2294
2295 /* Create the idmap in the boot page tables */
Kristina Martsenko98732d12018-01-15 15:23:49 +00002296 err = __create_hyp_mappings(pgd, __kvm_idmap_ptrs_per_pgd(),
Marc Zyngier0535a3e2016-06-30 18:40:43 +01002297 hyp_idmap_start, hyp_idmap_end,
2298 __phys_to_pfn(hyp_idmap_start),
2299 PAGE_HYP_EXEC);
2300 if (err)
2301 kvm_err("Failed to idmap %lx-%lx\n",
2302 hyp_idmap_start, hyp_idmap_end);
2303
2304 return err;
2305}
2306
Christoffer Dall342cd0a2013-01-20 18:28:06 -05002307int kvm_mmu_init(void)
2308{
Marc Zyngier2fb41052013-04-12 19:12:03 +01002309 int err;
2310
Andrew Scull0a787912020-05-19 11:40:36 +01002311 hyp_idmap_start = __pa_symbol(__hyp_idmap_text_start);
Marc Zyngier46fef152018-03-12 14:25:10 +00002312 hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE);
Andrew Scull0a787912020-05-19 11:40:36 +01002313 hyp_idmap_end = __pa_symbol(__hyp_idmap_text_end);
Marc Zyngier46fef152018-03-12 14:25:10 +00002314 hyp_idmap_end = ALIGN(hyp_idmap_end, PAGE_SIZE);
Andrew Scull0a787912020-05-19 11:40:36 +01002315 hyp_idmap_vector = __pa_symbol(__kvm_hyp_init);
Marc Zyngier5a677ce2013-04-12 19:12:06 +01002316
Ard Biesheuvel06f75a12015-03-19 16:42:26 +00002317 /*
2318 * We rely on the linker script to ensure at build time that the HYP
2319 * init code does not cross a page boundary.
2320 */
2321 BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK);
Marc Zyngier5a677ce2013-04-12 19:12:06 +01002322
Marc Zyngierb4ef0492017-12-03 20:04:51 +00002323 kvm_debug("IDMAP page: %lx\n", hyp_idmap_start);
2324 kvm_debug("HYP VA range: %lx:%lx\n",
2325 kern_hyp_va(PAGE_OFFSET),
2326 kern_hyp_va((unsigned long)high_memory - 1));
Marc Zyngiereac378a2016-06-30 18:40:50 +01002327
Marc Zyngier6c41a412016-06-30 18:40:51 +01002328 if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) &&
Marc Zyngiered57cac2017-12-03 18:22:49 +00002329 hyp_idmap_start < kern_hyp_va((unsigned long)high_memory - 1) &&
Marc Zyngierd2896d42016-08-22 09:01:17 +01002330 hyp_idmap_start != (unsigned long)__hyp_idmap_text_start) {
Marc Zyngiereac378a2016-06-30 18:40:50 +01002331 /*
2332 * The idmap page is intersecting with the VA space,
2333 * it is not safe to continue further.
2334 */
2335 kvm_err("IDMAP intersecting with HYP VA, unable to continue\n");
2336 err = -EINVAL;
2337 goto out;
2338 }
2339
Christoffer Dall38f791a2014-10-10 12:14:28 +02002340 hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
Marc Zyngier0535a3e2016-06-30 18:40:43 +01002341 if (!hyp_pgd) {
Christoffer Dalld5d81842013-01-20 18:28:07 -05002342 kvm_err("Hyp mode PGD not allocated\n");
Marc Zyngier2fb41052013-04-12 19:12:03 +01002343 err = -ENOMEM;
2344 goto out;
2345 }
2346
Ard Biesheuvele4c5a682015-03-19 16:42:28 +00002347 if (__kvm_cpu_uses_extended_idmap()) {
Marc Zyngier0535a3e2016-06-30 18:40:43 +01002348 boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
2349 hyp_pgd_order);
2350 if (!boot_hyp_pgd) {
2351 kvm_err("Hyp boot PGD not allocated\n");
2352 err = -ENOMEM;
2353 goto out;
2354 }
2355
2356 err = kvm_map_idmap_text(boot_hyp_pgd);
2357 if (err)
2358 goto out;
2359
Ard Biesheuvele4c5a682015-03-19 16:42:28 +00002360 merged_hyp_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
2361 if (!merged_hyp_pgd) {
2362 kvm_err("Failed to allocate extra HYP pgd\n");
2363 goto out;
2364 }
2365 __kvm_extend_hypmap(boot_hyp_pgd, hyp_pgd, merged_hyp_pgd,
2366 hyp_idmap_start);
Marc Zyngier0535a3e2016-06-30 18:40:43 +01002367 } else {
2368 err = kvm_map_idmap_text(hyp_pgd);
2369 if (err)
2370 goto out;
Marc Zyngier5a677ce2013-04-12 19:12:06 +01002371 }
2372
Marc Zyngiere3f019b2017-12-04 17:04:38 +00002373 io_map_base = hyp_idmap_start;
Christoffer Dalld5d81842013-01-20 18:28:07 -05002374 return 0;
Marc Zyngier2fb41052013-04-12 19:12:03 +01002375out:
Marc Zyngier4f728272013-04-12 19:12:05 +01002376 free_hyp_pgds();
Marc Zyngier2fb41052013-04-12 19:12:03 +01002377 return err;
Christoffer Dall342cd0a2013-01-20 18:28:06 -05002378}
Eric Augerdf6ce242014-06-06 11:10:23 +02002379
2380void kvm_arch_commit_memory_region(struct kvm *kvm,
Paolo Bonzini09170a42015-05-18 13:59:39 +02002381 const struct kvm_userspace_memory_region *mem,
Sean Christopherson9d4c1972020-02-18 13:07:24 -08002382 struct kvm_memory_slot *old,
Paolo Bonzinif36f3f22015-05-18 13:20:23 +02002383 const struct kvm_memory_slot *new,
Eric Augerdf6ce242014-06-06 11:10:23 +02002384 enum kvm_mr_change change)
2385{
Mario Smarduchc6473552015-01-15 15:58:56 -08002386 /*
2387 * At this point memslot has been committed and there is an
Fuad Tabba656012c2020-04-01 15:03:10 +01002388 * allocated dirty_bitmap[], dirty pages will be tracked while the
Mario Smarduchc6473552015-01-15 15:58:56 -08002389 * memory slot is write protected.
2390 */
Keqian Zhuc8626262020-04-13 20:20:23 +08002391 if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
2392 /*
2393 * If we're with initial-all-set, we don't need to write
2394 * protect any pages because they're all reported as dirty.
2395 * Huge pages and normal pages will be write protect gradually.
2396 */
2397 if (!kvm_dirty_log_manual_protect_and_init_set(kvm)) {
2398 kvm_mmu_wp_memory_region(kvm, mem->slot);
2399 }
2400 }
Eric Augerdf6ce242014-06-06 11:10:23 +02002401}
2402
2403int kvm_arch_prepare_memory_region(struct kvm *kvm,
2404 struct kvm_memory_slot *memslot,
Paolo Bonzini09170a42015-05-18 13:59:39 +02002405 const struct kvm_userspace_memory_region *mem,
Eric Augerdf6ce242014-06-06 11:10:23 +02002406 enum kvm_mr_change change)
2407{
Ard Biesheuvel8eef9122014-10-10 17:00:32 +02002408 hva_t hva = mem->userspace_addr;
2409 hva_t reg_end = hva + mem->memory_size;
2410 bool writable = !(mem->flags & KVM_MEM_READONLY);
2411 int ret = 0;
2412
Mario Smarduch15a49a42015-01-15 15:58:58 -08002413 if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
2414 change != KVM_MR_FLAGS_ONLY)
Ard Biesheuvel8eef9122014-10-10 17:00:32 +02002415 return 0;
2416
2417 /*
Christoffer Dallc3058d52014-10-10 12:14:29 +02002418 * Prevent userspace from creating a memory region outside of the IPA
2419 * space addressable by the KVM guest IPA space.
2420 */
2421 if (memslot->base_gfn + memslot->npages >=
Suzuki K Poulosee55cac52018-09-26 17:32:44 +01002422 (kvm_phys_size(kvm) >> PAGE_SHIFT))
Christoffer Dallc3058d52014-10-10 12:14:29 +02002423 return -EFAULT;
2424
Michel Lespinasse89154dd2020-06-08 21:33:29 -07002425 mmap_read_lock(current->mm);
Christoffer Dallc3058d52014-10-10 12:14:29 +02002426 /*
Ard Biesheuvel8eef9122014-10-10 17:00:32 +02002427 * A memory region could potentially cover multiple VMAs, and any holes
2428 * between them, so iterate over all of them to find out if we can map
2429 * any of them right now.
2430 *
2431 * +--------------------------------------------+
2432 * +---------------+----------------+ +----------------+
2433 * | : VMA 1 | VMA 2 | | VMA 3 : |
2434 * +---------------+----------------+ +----------------+
2435 * | memory region |
2436 * +--------------------------------------------+
2437 */
2438 do {
2439 struct vm_area_struct *vma = find_vma(current->mm, hva);
2440 hva_t vm_start, vm_end;
2441
2442 if (!vma || vma->vm_start >= reg_end)
2443 break;
2444
2445 /*
Ard Biesheuvel8eef9122014-10-10 17:00:32 +02002446 * Take the intersection of this VMA with the memory region
2447 */
2448 vm_start = max(hva, vma->vm_start);
2449 vm_end = min(reg_end, vma->vm_end);
2450
2451 if (vma->vm_flags & VM_PFNMAP) {
2452 gpa_t gpa = mem->guest_phys_addr +
2453 (vm_start - mem->userspace_addr);
Marek Majtykaca09f022015-09-16 12:04:55 +02002454 phys_addr_t pa;
2455
2456 pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
2457 pa += vm_start - vma->vm_start;
Ard Biesheuvel8eef9122014-10-10 17:00:32 +02002458
Mario Smarduch15a49a42015-01-15 15:58:58 -08002459 /* IO region dirty page logging not allowed */
Marc Zyngier72f31042017-03-16 18:20:50 +00002460 if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
2461 ret = -EINVAL;
2462 goto out;
2463 }
Mario Smarduch15a49a42015-01-15 15:58:58 -08002464
Ard Biesheuvel8eef9122014-10-10 17:00:32 +02002465 ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
2466 vm_end - vm_start,
2467 writable);
2468 if (ret)
2469 break;
2470 }
2471 hva = vm_end;
2472 } while (hva < reg_end);
2473
Mario Smarduch15a49a42015-01-15 15:58:58 -08002474 if (change == KVM_MR_FLAGS_ONLY)
Marc Zyngier72f31042017-03-16 18:20:50 +00002475 goto out;
Mario Smarduch15a49a42015-01-15 15:58:58 -08002476
Ard Biesheuvel849260c2014-11-17 14:58:53 +00002477 spin_lock(&kvm->mmu_lock);
2478 if (ret)
Ard Biesheuvel8eef9122014-10-10 17:00:32 +02002479 unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
Ard Biesheuvel849260c2014-11-17 14:58:53 +00002480 else
2481 stage2_flush_memslot(kvm, memslot);
2482 spin_unlock(&kvm->mmu_lock);
Marc Zyngier72f31042017-03-16 18:20:50 +00002483out:
Michel Lespinasse89154dd2020-06-08 21:33:29 -07002484 mmap_read_unlock(current->mm);
Ard Biesheuvel8eef9122014-10-10 17:00:32 +02002485 return ret;
Eric Augerdf6ce242014-06-06 11:10:23 +02002486}
2487
Sean Christophersone96c81e2020-02-18 13:07:27 -08002488void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
Eric Augerdf6ce242014-06-06 11:10:23 +02002489{
2490}
2491
Sean Christopherson15248252019-02-05 12:54:17 -08002492void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
Eric Augerdf6ce242014-06-06 11:10:23 +02002493{
2494}
2495
2496void kvm_arch_flush_shadow_all(struct kvm *kvm)
2497{
Suzuki K Poulose293f2932016-09-08 16:25:49 +01002498 kvm_free_stage2_pgd(kvm);
Eric Augerdf6ce242014-06-06 11:10:23 +02002499}
2500
2501void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
2502 struct kvm_memory_slot *slot)
2503{
Ard Biesheuvel8eef9122014-10-10 17:00:32 +02002504 gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
2505 phys_addr_t size = slot->npages << PAGE_SHIFT;
2506
2507 spin_lock(&kvm->mmu_lock);
2508 unmap_stage2_range(kvm, gpa, size);
2509 spin_unlock(&kvm->mmu_lock);
Eric Augerdf6ce242014-06-06 11:10:23 +02002510}
Marc Zyngier3c1e7162014-12-19 16:05:31 +00002511
2512/*
2513 * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
2514 *
2515 * Main problems:
2516 * - S/W ops are local to a CPU (not broadcast)
2517 * - We have line migration behind our back (speculation)
2518 * - System caches don't support S/W at all (damn!)
2519 *
2520 * In the face of the above, the best we can do is to try and convert
2521 * S/W ops to VA ops. Because the guest is not allowed to infer the
2522 * S/W to PA mapping, it can only use S/W to nuke the whole cache,
2523 * which is a rather good thing for us.
2524 *
2525 * Also, it is only used when turning caches on/off ("The expected
2526 * usage of the cache maintenance instructions that operate by set/way
2527 * is associated with the cache maintenance instructions associated
2528 * with the powerdown and powerup of caches, if this is required by
2529 * the implementation.").
2530 *
2531 * We use the following policy:
2532 *
2533 * - If we trap a S/W operation, we enable VM trapping to detect
2534 * caches being turned on/off, and do a full clean.
2535 *
2536 * - We flush the caches on both caches being turned on and off.
2537 *
2538 * - Once the caches are enabled, we stop trapping VM ops.
2539 */
2540void kvm_set_way_flush(struct kvm_vcpu *vcpu)
2541{
Christoffer Dall3df59d82017-08-03 12:09:05 +02002542 unsigned long hcr = *vcpu_hcr(vcpu);
Marc Zyngier3c1e7162014-12-19 16:05:31 +00002543
2544 /*
2545 * If this is the first time we do a S/W operation
2546 * (i.e. HCR_TVM not set) flush the whole memory, and set the
2547 * VM trapping.
2548 *
2549 * Otherwise, rely on the VM trapping to wait for the MMU +
2550 * Caches to be turned off. At that point, we'll be able to
2551 * clean the caches again.
2552 */
2553 if (!(hcr & HCR_TVM)) {
2554 trace_kvm_set_way_flush(*vcpu_pc(vcpu),
2555 vcpu_has_cache_enabled(vcpu));
2556 stage2_flush_vm(vcpu->kvm);
Christoffer Dall3df59d82017-08-03 12:09:05 +02002557 *vcpu_hcr(vcpu) = hcr | HCR_TVM;
Marc Zyngier3c1e7162014-12-19 16:05:31 +00002558 }
2559}
2560
2561void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled)
2562{
2563 bool now_enabled = vcpu_has_cache_enabled(vcpu);
2564
2565 /*
2566 * If switching the MMU+caches on, need to invalidate the caches.
2567 * If switching it off, need to clean the caches.
2568 * Clean + invalidate does the trick always.
2569 */
2570 if (now_enabled != was_enabled)
2571 stage2_flush_vm(vcpu->kvm);
2572
2573 /* Caches are now on, stop trapping VM ops (until a S/W op) */
2574 if (now_enabled)
Christoffer Dall3df59d82017-08-03 12:09:05 +02002575 *vcpu_hcr(vcpu) &= ~HCR_TVM;
Marc Zyngier3c1e7162014-12-19 16:05:31 +00002576
2577 trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled);
2578}