blob: ea7314296ad1164c710c1e247f5142f2ca91872e [file] [log] [blame]
Christoffer Dall749cf76c2013-01-20 18:28:06 -05001/*
2 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
3 * Author: Christoffer Dall <c.dall@virtualopensystems.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License, version 2, as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
Christoffer Dall342cd0a2013-01-20 18:28:06 -050018
19#include <linux/mman.h>
20#include <linux/kvm_host.h>
21#include <linux/io.h>
Christoffer Dallad361f02012-11-01 17:14:45 +010022#include <linux/hugetlb.h>
James Morse196f8782017-06-20 17:11:48 +010023#include <linux/sched/signal.h>
Christoffer Dall45e96ea2013-01-20 18:43:58 -050024#include <trace/events/kvm.h>
Christoffer Dall342cd0a2013-01-20 18:28:06 -050025#include <asm/pgalloc.h>
Christoffer Dall94f8e642013-01-20 18:28:12 -050026#include <asm/cacheflush.h>
Christoffer Dall342cd0a2013-01-20 18:28:06 -050027#include <asm/kvm_arm.h>
28#include <asm/kvm_mmu.h>
Christoffer Dall45e96ea2013-01-20 18:43:58 -050029#include <asm/kvm_mmio.h>
Christoffer Dalld5d81842013-01-20 18:28:07 -050030#include <asm/kvm_asm.h>
Christoffer Dall94f8e642013-01-20 18:28:12 -050031#include <asm/kvm_emulate.h>
Marc Zyngier1e947ba2015-01-29 11:59:54 +000032#include <asm/virt.h>
Tyler Baicar621f48e2017-06-21 12:17:14 -060033#include <asm/system_misc.h>
Christoffer Dalld5d81842013-01-20 18:28:07 -050034
35#include "trace.h"
Christoffer Dall342cd0a2013-01-20 18:28:06 -050036
Marc Zyngier5a677ce2013-04-12 19:12:06 +010037static pgd_t *boot_hyp_pgd;
Marc Zyngier2fb41052013-04-12 19:12:03 +010038static pgd_t *hyp_pgd;
Ard Biesheuvele4c5a682015-03-19 16:42:28 +000039static pgd_t *merged_hyp_pgd;
Christoffer Dall342cd0a2013-01-20 18:28:06 -050040static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
41
Marc Zyngier5a677ce2013-04-12 19:12:06 +010042static unsigned long hyp_idmap_start;
43static unsigned long hyp_idmap_end;
44static phys_addr_t hyp_idmap_vector;
45
Marc Zyngiere3f019b2017-12-04 17:04:38 +000046static unsigned long io_map_base;
47
Suzuki K Poulose9163ee232016-03-22 17:01:21 +000048#define S2_PGD_SIZE (PTRS_PER_S2_PGD * sizeof(pgd_t))
Christoffer Dall38f791a2014-10-10 12:14:28 +020049#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
Mark Salter5d4e08c2014-03-28 14:25:19 +000050
Mario Smarduch15a49a42015-01-15 15:58:58 -080051#define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0)
52#define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1)
53
54static bool memslot_is_logging(struct kvm_memory_slot *memslot)
55{
Mario Smarduch15a49a42015-01-15 15:58:58 -080056 return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY);
Mario Smarduch72760302015-01-15 15:59:01 -080057}
58
59/**
60 * kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8
61 * @kvm: pointer to kvm structure.
62 *
63 * Interface to HYP function to flush all VM TLB entries
64 */
65void kvm_flush_remote_tlbs(struct kvm *kvm)
66{
67 kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
Mario Smarduch15a49a42015-01-15 15:58:58 -080068}
Christoffer Dallad361f02012-11-01 17:14:45 +010069
Marc Zyngier48762762013-01-28 15:27:00 +000070static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
Christoffer Dalld5d81842013-01-20 18:28:07 -050071{
Suzuki K Poulose8684e702016-03-22 17:14:25 +000072 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
Christoffer Dalld5d81842013-01-20 18:28:07 -050073}
74
Marc Zyngier363ef892014-12-19 16:48:06 +000075/*
76 * D-Cache management functions. They take the page table entries by
77 * value, as they are flushing the cache using the kernel mapping (or
78 * kmap on 32bit).
79 */
80static void kvm_flush_dcache_pte(pte_t pte)
81{
82 __kvm_flush_dcache_pte(pte);
83}
84
85static void kvm_flush_dcache_pmd(pmd_t pmd)
86{
87 __kvm_flush_dcache_pmd(pmd);
88}
89
90static void kvm_flush_dcache_pud(pud_t pud)
91{
92 __kvm_flush_dcache_pud(pud);
93}
94
Ard Biesheuvele6fab542015-11-10 15:11:20 +010095static bool kvm_is_device_pfn(unsigned long pfn)
96{
97 return !pfn_valid(pfn);
98}
99
Mario Smarduch15a49a42015-01-15 15:58:58 -0800100/**
101 * stage2_dissolve_pmd() - clear and flush huge PMD entry
102 * @kvm: pointer to kvm structure.
103 * @addr: IPA
104 * @pmd: pmd pointer for IPA
105 *
106 * Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs. Marks all
107 * pages in the range dirty.
108 */
109static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd)
110{
Suzuki K Poulosebbb3b6b2016-03-01 12:00:39 +0000111 if (!pmd_thp_or_huge(*pmd))
Mario Smarduch15a49a42015-01-15 15:58:58 -0800112 return;
113
114 pmd_clear(pmd);
115 kvm_tlb_flush_vmid_ipa(kvm, addr);
116 put_page(virt_to_page(pmd));
117}
118
Christoffer Dalld5d81842013-01-20 18:28:07 -0500119static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
120 int min, int max)
121{
122 void *page;
123
124 BUG_ON(max > KVM_NR_MEM_OBJS);
125 if (cache->nobjs >= min)
126 return 0;
127 while (cache->nobjs < max) {
128 page = (void *)__get_free_page(PGALLOC_GFP);
129 if (!page)
130 return -ENOMEM;
131 cache->objects[cache->nobjs++] = page;
132 }
133 return 0;
134}
135
136static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
137{
138 while (mc->nobjs)
139 free_page((unsigned long)mc->objects[--mc->nobjs]);
140}
141
142static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
143{
144 void *p;
145
146 BUG_ON(!mc || !mc->nobjs);
147 p = mc->objects[--mc->nobjs];
148 return p;
149}
150
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000151static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
Marc Zyngier979acd52013-08-06 13:05:48 +0100152{
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000153 pud_t *pud_table __maybe_unused = stage2_pud_offset(pgd, 0UL);
154 stage2_pgd_clear(pgd);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200155 kvm_tlb_flush_vmid_ipa(kvm, addr);
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000156 stage2_pud_free(pud_table);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200157 put_page(virt_to_page(pgd));
Marc Zyngier979acd52013-08-06 13:05:48 +0100158}
159
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000160static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500161{
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000162 pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(pud, 0);
163 VM_BUG_ON(stage2_pud_huge(*pud));
164 stage2_pud_clear(pud);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200165 kvm_tlb_flush_vmid_ipa(kvm, addr);
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000166 stage2_pmd_free(pmd_table);
Marc Zyngier4f728272013-04-12 19:12:05 +0100167 put_page(virt_to_page(pud));
168}
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500169
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000170static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
Marc Zyngier4f728272013-04-12 19:12:05 +0100171{
Christoffer Dall4f853a72014-05-09 23:31:31 +0200172 pte_t *pte_table = pte_offset_kernel(pmd, 0);
Suzuki K Poulosebbb3b6b2016-03-01 12:00:39 +0000173 VM_BUG_ON(pmd_thp_or_huge(*pmd));
Christoffer Dall4f853a72014-05-09 23:31:31 +0200174 pmd_clear(pmd);
175 kvm_tlb_flush_vmid_ipa(kvm, addr);
176 pte_free_kernel(NULL, pte_table);
Marc Zyngier4f728272013-04-12 19:12:05 +0100177 put_page(virt_to_page(pmd));
178}
179
Marc Zyngier363ef892014-12-19 16:48:06 +0000180/*
181 * Unmapping vs dcache management:
182 *
183 * If a guest maps certain memory pages as uncached, all writes will
184 * bypass the data cache and go directly to RAM. However, the CPUs
185 * can still speculate reads (not writes) and fill cache lines with
186 * data.
187 *
188 * Those cache lines will be *clean* cache lines though, so a
189 * clean+invalidate operation is equivalent to an invalidate
190 * operation, because no cache lines are marked dirty.
191 *
192 * Those clean cache lines could be filled prior to an uncached write
193 * by the guest, and the cache coherent IO subsystem would therefore
194 * end up writing old data to disk.
195 *
196 * This is why right after unmapping a page/section and invalidating
197 * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure
198 * the IO subsystem will never hit in the cache.
Marc Zyngiere48d53a2018-04-06 12:27:28 +0100199 *
200 * This is all avoided on systems that have ARM64_HAS_STAGE2_FWB, as
201 * we then fully enforce cacheability of RAM, no matter what the guest
202 * does.
Marc Zyngier363ef892014-12-19 16:48:06 +0000203 */
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000204static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
Christoffer Dall4f853a72014-05-09 23:31:31 +0200205 phys_addr_t addr, phys_addr_t end)
Marc Zyngier4f728272013-04-12 19:12:05 +0100206{
Christoffer Dall4f853a72014-05-09 23:31:31 +0200207 phys_addr_t start_addr = addr;
208 pte_t *pte, *start_pte;
209
210 start_pte = pte = pte_offset_kernel(pmd, addr);
211 do {
212 if (!pte_none(*pte)) {
Marc Zyngier363ef892014-12-19 16:48:06 +0000213 pte_t old_pte = *pte;
214
Christoffer Dall4f853a72014-05-09 23:31:31 +0200215 kvm_set_pte(pte, __pte(0));
Christoffer Dall4f853a72014-05-09 23:31:31 +0200216 kvm_tlb_flush_vmid_ipa(kvm, addr);
Marc Zyngier363ef892014-12-19 16:48:06 +0000217
218 /* No need to invalidate the cache for device mappings */
Ard Biesheuvel0de58f82015-12-03 09:25:22 +0100219 if (!kvm_is_device_pfn(pte_pfn(old_pte)))
Marc Zyngier363ef892014-12-19 16:48:06 +0000220 kvm_flush_dcache_pte(old_pte);
221
222 put_page(virt_to_page(pte));
Christoffer Dall4f853a72014-05-09 23:31:31 +0200223 }
224 } while (pte++, addr += PAGE_SIZE, addr != end);
225
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000226 if (stage2_pte_table_empty(start_pte))
227 clear_stage2_pmd_entry(kvm, pmd, start_addr);
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500228}
229
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000230static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud,
Christoffer Dall4f853a72014-05-09 23:31:31 +0200231 phys_addr_t addr, phys_addr_t end)
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500232{
Christoffer Dall4f853a72014-05-09 23:31:31 +0200233 phys_addr_t next, start_addr = addr;
234 pmd_t *pmd, *start_pmd;
Marc Zyngier000d3992013-03-05 02:43:17 +0000235
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000236 start_pmd = pmd = stage2_pmd_offset(pud, addr);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200237 do {
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000238 next = stage2_pmd_addr_end(addr, end);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200239 if (!pmd_none(*pmd)) {
Suzuki K Poulosebbb3b6b2016-03-01 12:00:39 +0000240 if (pmd_thp_or_huge(*pmd)) {
Marc Zyngier363ef892014-12-19 16:48:06 +0000241 pmd_t old_pmd = *pmd;
242
Christoffer Dall4f853a72014-05-09 23:31:31 +0200243 pmd_clear(pmd);
244 kvm_tlb_flush_vmid_ipa(kvm, addr);
Marc Zyngier363ef892014-12-19 16:48:06 +0000245
246 kvm_flush_dcache_pmd(old_pmd);
247
Christoffer Dall4f853a72014-05-09 23:31:31 +0200248 put_page(virt_to_page(pmd));
249 } else {
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000250 unmap_stage2_ptes(kvm, pmd, addr, next);
Marc Zyngier4f728272013-04-12 19:12:05 +0100251 }
252 }
Christoffer Dall4f853a72014-05-09 23:31:31 +0200253 } while (pmd++, addr = next, addr != end);
Marc Zyngier4f728272013-04-12 19:12:05 +0100254
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000255 if (stage2_pmd_table_empty(start_pmd))
256 clear_stage2_pud_entry(kvm, pud, start_addr);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200257}
258
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000259static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd,
Christoffer Dall4f853a72014-05-09 23:31:31 +0200260 phys_addr_t addr, phys_addr_t end)
261{
262 phys_addr_t next, start_addr = addr;
263 pud_t *pud, *start_pud;
264
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000265 start_pud = pud = stage2_pud_offset(pgd, addr);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200266 do {
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000267 next = stage2_pud_addr_end(addr, end);
268 if (!stage2_pud_none(*pud)) {
269 if (stage2_pud_huge(*pud)) {
Marc Zyngier363ef892014-12-19 16:48:06 +0000270 pud_t old_pud = *pud;
271
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000272 stage2_pud_clear(pud);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200273 kvm_tlb_flush_vmid_ipa(kvm, addr);
Marc Zyngier363ef892014-12-19 16:48:06 +0000274 kvm_flush_dcache_pud(old_pud);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200275 put_page(virt_to_page(pud));
276 } else {
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000277 unmap_stage2_pmds(kvm, pud, addr, next);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200278 }
279 }
280 } while (pud++, addr = next, addr != end);
281
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000282 if (stage2_pud_table_empty(start_pud))
283 clear_stage2_pgd_entry(kvm, pgd, start_addr);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200284}
285
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000286/**
287 * unmap_stage2_range -- Clear stage2 page table entries to unmap a range
288 * @kvm: The VM pointer
289 * @start: The intermediate physical base address of the range to unmap
290 * @size: The size of the area to unmap
291 *
292 * Clear a range of stage-2 mappings, lowering the various ref-counts. Must
293 * be called while holding mmu_lock (unless for freeing the stage2 pgd before
294 * destroying the VM), otherwise another faulting VCPU may come in and mess
295 * with things behind our backs.
296 */
297static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
Christoffer Dall4f853a72014-05-09 23:31:31 +0200298{
299 pgd_t *pgd;
300 phys_addr_t addr = start, end = start + size;
301 phys_addr_t next;
302
Suzuki K Poulose8b3405e2017-04-03 15:12:43 +0100303 assert_spin_locked(&kvm->mmu_lock);
Jia He47a91b72018-05-21 11:05:30 +0800304 WARN_ON(size & ~PAGE_MASK);
305
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000306 pgd = kvm->arch.pgd + stage2_pgd_index(addr);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200307 do {
Suzuki K Poulose0c428a6a2017-05-16 10:34:55 +0100308 /*
309 * Make sure the page table is still active, as another thread
310 * could have possibly freed the page table, while we released
311 * the lock.
312 */
313 if (!READ_ONCE(kvm->arch.pgd))
314 break;
Suzuki K Poulose7a1c8312016-03-23 12:08:02 +0000315 next = stage2_pgd_addr_end(addr, end);
316 if (!stage2_pgd_none(*pgd))
317 unmap_stage2_puds(kvm, pgd, addr, next);
Suzuki K Poulose8b3405e2017-04-03 15:12:43 +0100318 /*
319 * If the range is too large, release the kvm->mmu_lock
320 * to prevent starvation and lockup detector warnings.
321 */
322 if (next != end)
323 cond_resched_lock(&kvm->mmu_lock);
Christoffer Dall4f853a72014-05-09 23:31:31 +0200324 } while (pgd++, addr = next, addr != end);
Marc Zyngier000d3992013-03-05 02:43:17 +0000325}
326
Marc Zyngier9d218a12014-01-15 12:50:23 +0000327static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
328 phys_addr_t addr, phys_addr_t end)
329{
330 pte_t *pte;
331
332 pte = pte_offset_kernel(pmd, addr);
333 do {
Ard Biesheuvel0de58f82015-12-03 09:25:22 +0100334 if (!pte_none(*pte) && !kvm_is_device_pfn(pte_pfn(*pte)))
Marc Zyngier363ef892014-12-19 16:48:06 +0000335 kvm_flush_dcache_pte(*pte);
Marc Zyngier9d218a12014-01-15 12:50:23 +0000336 } while (pte++, addr += PAGE_SIZE, addr != end);
337}
338
339static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
340 phys_addr_t addr, phys_addr_t end)
341{
342 pmd_t *pmd;
343 phys_addr_t next;
344
Suzuki K Poulose70fd1902016-03-22 18:33:45 +0000345 pmd = stage2_pmd_offset(pud, addr);
Marc Zyngier9d218a12014-01-15 12:50:23 +0000346 do {
Suzuki K Poulose70fd1902016-03-22 18:33:45 +0000347 next = stage2_pmd_addr_end(addr, end);
Marc Zyngier9d218a12014-01-15 12:50:23 +0000348 if (!pmd_none(*pmd)) {
Suzuki K Poulosebbb3b6b2016-03-01 12:00:39 +0000349 if (pmd_thp_or_huge(*pmd))
Marc Zyngier363ef892014-12-19 16:48:06 +0000350 kvm_flush_dcache_pmd(*pmd);
351 else
Marc Zyngier9d218a12014-01-15 12:50:23 +0000352 stage2_flush_ptes(kvm, pmd, addr, next);
Marc Zyngier9d218a12014-01-15 12:50:23 +0000353 }
354 } while (pmd++, addr = next, addr != end);
355}
356
357static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
358 phys_addr_t addr, phys_addr_t end)
359{
360 pud_t *pud;
361 phys_addr_t next;
362
Suzuki K Poulose70fd1902016-03-22 18:33:45 +0000363 pud = stage2_pud_offset(pgd, addr);
Marc Zyngier9d218a12014-01-15 12:50:23 +0000364 do {
Suzuki K Poulose70fd1902016-03-22 18:33:45 +0000365 next = stage2_pud_addr_end(addr, end);
366 if (!stage2_pud_none(*pud)) {
367 if (stage2_pud_huge(*pud))
Marc Zyngier363ef892014-12-19 16:48:06 +0000368 kvm_flush_dcache_pud(*pud);
369 else
Marc Zyngier9d218a12014-01-15 12:50:23 +0000370 stage2_flush_pmds(kvm, pud, addr, next);
Marc Zyngier9d218a12014-01-15 12:50:23 +0000371 }
372 } while (pud++, addr = next, addr != end);
373}
374
375static void stage2_flush_memslot(struct kvm *kvm,
376 struct kvm_memory_slot *memslot)
377{
378 phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
379 phys_addr_t end = addr + PAGE_SIZE * memslot->npages;
380 phys_addr_t next;
381 pgd_t *pgd;
382
Suzuki K Poulose70fd1902016-03-22 18:33:45 +0000383 pgd = kvm->arch.pgd + stage2_pgd_index(addr);
Marc Zyngier9d218a12014-01-15 12:50:23 +0000384 do {
Suzuki K Poulose70fd1902016-03-22 18:33:45 +0000385 next = stage2_pgd_addr_end(addr, end);
Marc Zyngier9d218a12014-01-15 12:50:23 +0000386 stage2_flush_puds(kvm, pgd, addr, next);
387 } while (pgd++, addr = next, addr != end);
388}
389
390/**
391 * stage2_flush_vm - Invalidate cache for pages mapped in stage 2
392 * @kvm: The struct kvm pointer
393 *
394 * Go through the stage 2 page tables and invalidate any cache lines
395 * backing memory already mapped to the VM.
396 */
Marc Zyngier3c1e7162014-12-19 16:05:31 +0000397static void stage2_flush_vm(struct kvm *kvm)
Marc Zyngier9d218a12014-01-15 12:50:23 +0000398{
399 struct kvm_memslots *slots;
400 struct kvm_memory_slot *memslot;
401 int idx;
402
403 idx = srcu_read_lock(&kvm->srcu);
404 spin_lock(&kvm->mmu_lock);
405
406 slots = kvm_memslots(kvm);
407 kvm_for_each_memslot(memslot, slots)
408 stage2_flush_memslot(kvm, memslot);
409
410 spin_unlock(&kvm->mmu_lock);
411 srcu_read_unlock(&kvm->srcu, idx);
412}
413
Suzuki K Poulose64f32492016-03-22 18:56:21 +0000414static void clear_hyp_pgd_entry(pgd_t *pgd)
415{
416 pud_t *pud_table __maybe_unused = pud_offset(pgd, 0UL);
417 pgd_clear(pgd);
418 pud_free(NULL, pud_table);
419 put_page(virt_to_page(pgd));
420}
421
422static void clear_hyp_pud_entry(pud_t *pud)
423{
424 pmd_t *pmd_table __maybe_unused = pmd_offset(pud, 0);
425 VM_BUG_ON(pud_huge(*pud));
426 pud_clear(pud);
427 pmd_free(NULL, pmd_table);
428 put_page(virt_to_page(pud));
429}
430
431static void clear_hyp_pmd_entry(pmd_t *pmd)
432{
433 pte_t *pte_table = pte_offset_kernel(pmd, 0);
434 VM_BUG_ON(pmd_thp_or_huge(*pmd));
435 pmd_clear(pmd);
436 pte_free_kernel(NULL, pte_table);
437 put_page(virt_to_page(pmd));
438}
439
440static void unmap_hyp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
441{
442 pte_t *pte, *start_pte;
443
444 start_pte = pte = pte_offset_kernel(pmd, addr);
445 do {
446 if (!pte_none(*pte)) {
447 kvm_set_pte(pte, __pte(0));
448 put_page(virt_to_page(pte));
449 }
450 } while (pte++, addr += PAGE_SIZE, addr != end);
451
452 if (hyp_pte_table_empty(start_pte))
453 clear_hyp_pmd_entry(pmd);
454}
455
456static void unmap_hyp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
457{
458 phys_addr_t next;
459 pmd_t *pmd, *start_pmd;
460
461 start_pmd = pmd = pmd_offset(pud, addr);
462 do {
463 next = pmd_addr_end(addr, end);
464 /* Hyp doesn't use huge pmds */
465 if (!pmd_none(*pmd))
466 unmap_hyp_ptes(pmd, addr, next);
467 } while (pmd++, addr = next, addr != end);
468
469 if (hyp_pmd_table_empty(start_pmd))
470 clear_hyp_pud_entry(pud);
471}
472
473static void unmap_hyp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
474{
475 phys_addr_t next;
476 pud_t *pud, *start_pud;
477
478 start_pud = pud = pud_offset(pgd, addr);
479 do {
480 next = pud_addr_end(addr, end);
481 /* Hyp doesn't use huge puds */
482 if (!pud_none(*pud))
483 unmap_hyp_pmds(pud, addr, next);
484 } while (pud++, addr = next, addr != end);
485
486 if (hyp_pud_table_empty(start_pud))
487 clear_hyp_pgd_entry(pgd);
488}
489
Marc Zyngier3ddd4552018-03-14 15:17:33 +0000490static unsigned int kvm_pgd_index(unsigned long addr, unsigned int ptrs_per_pgd)
491{
492 return (addr >> PGDIR_SHIFT) & (ptrs_per_pgd - 1);
493}
494
495static void __unmap_hyp_range(pgd_t *pgdp, unsigned long ptrs_per_pgd,
496 phys_addr_t start, u64 size)
Suzuki K Poulose64f32492016-03-22 18:56:21 +0000497{
498 pgd_t *pgd;
499 phys_addr_t addr = start, end = start + size;
500 phys_addr_t next;
501
502 /*
503 * We don't unmap anything from HYP, except at the hyp tear down.
504 * Hence, we don't have to invalidate the TLBs here.
505 */
Marc Zyngier3ddd4552018-03-14 15:17:33 +0000506 pgd = pgdp + kvm_pgd_index(addr, ptrs_per_pgd);
Suzuki K Poulose64f32492016-03-22 18:56:21 +0000507 do {
508 next = pgd_addr_end(addr, end);
509 if (!pgd_none(*pgd))
510 unmap_hyp_puds(pgd, addr, next);
511 } while (pgd++, addr = next, addr != end);
512}
513
Marc Zyngier3ddd4552018-03-14 15:17:33 +0000514static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
515{
516 __unmap_hyp_range(pgdp, PTRS_PER_PGD, start, size);
517}
518
519static void unmap_hyp_idmap_range(pgd_t *pgdp, phys_addr_t start, u64 size)
520{
521 __unmap_hyp_range(pgdp, __kvm_idmap_ptrs_per_pgd(), start, size);
522}
523
Marc Zyngier000d3992013-03-05 02:43:17 +0000524/**
Marc Zyngier4f728272013-04-12 19:12:05 +0100525 * free_hyp_pgds - free Hyp-mode page tables
Marc Zyngier000d3992013-03-05 02:43:17 +0000526 *
Marc Zyngier5a677ce2013-04-12 19:12:06 +0100527 * Assumes hyp_pgd is a page table used strictly in Hyp-mode and
528 * therefore contains either mappings in the kernel memory area (above
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000529 * PAGE_OFFSET), or device mappings in the idmap range.
Marc Zyngier5a677ce2013-04-12 19:12:06 +0100530 *
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000531 * boot_hyp_pgd should only map the idmap range, and is only used in
532 * the extended idmap case.
Marc Zyngier000d3992013-03-05 02:43:17 +0000533 */
Marc Zyngier4f728272013-04-12 19:12:05 +0100534void free_hyp_pgds(void)
Marc Zyngier000d3992013-03-05 02:43:17 +0000535{
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000536 pgd_t *id_pgd;
537
Marc Zyngierd157f4a2013-04-12 19:12:07 +0100538 mutex_lock(&kvm_hyp_pgd_mutex);
Marc Zyngier5a677ce2013-04-12 19:12:06 +0100539
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000540 id_pgd = boot_hyp_pgd ? boot_hyp_pgd : hyp_pgd;
541
542 if (id_pgd) {
543 /* In case we never called hyp_mmu_init() */
544 if (!io_map_base)
545 io_map_base = hyp_idmap_start;
546 unmap_hyp_idmap_range(id_pgd, io_map_base,
547 hyp_idmap_start + PAGE_SIZE - io_map_base);
548 }
549
Marc Zyngier26781f9c2016-06-30 18:40:46 +0100550 if (boot_hyp_pgd) {
Marc Zyngier26781f9c2016-06-30 18:40:46 +0100551 free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order);
552 boot_hyp_pgd = NULL;
553 }
554
Marc Zyngier4f728272013-04-12 19:12:05 +0100555 if (hyp_pgd) {
Marc Zyngier7839c672017-12-07 11:45:45 +0000556 unmap_hyp_range(hyp_pgd, kern_hyp_va(PAGE_OFFSET),
557 (uintptr_t)high_memory - PAGE_OFFSET);
Marc Zyngierd4cb9df52013-05-14 12:11:34 +0100558
Christoffer Dall38f791a2014-10-10 12:14:28 +0200559 free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
Marc Zyngierd157f4a2013-04-12 19:12:07 +0100560 hyp_pgd = NULL;
Marc Zyngier4f728272013-04-12 19:12:05 +0100561 }
Ard Biesheuvele4c5a682015-03-19 16:42:28 +0000562 if (merged_hyp_pgd) {
563 clear_page(merged_hyp_pgd);
564 free_page((unsigned long)merged_hyp_pgd);
565 merged_hyp_pgd = NULL;
566 }
Marc Zyngier4f728272013-04-12 19:12:05 +0100567
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500568 mutex_unlock(&kvm_hyp_pgd_mutex);
569}
570
571static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
Marc Zyngier6060df82013-04-12 19:12:01 +0100572 unsigned long end, unsigned long pfn,
573 pgprot_t prot)
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500574{
575 pte_t *pte;
576 unsigned long addr;
577
Marc Zyngier3562c762013-04-12 19:12:02 +0100578 addr = start;
579 do {
Marc Zyngier6060df82013-04-12 19:12:01 +0100580 pte = pte_offset_kernel(pmd, addr);
581 kvm_set_pte(pte, pfn_pte(pfn, prot));
Marc Zyngier4f728272013-04-12 19:12:05 +0100582 get_page(virt_to_page(pte));
Marc Zyngier5a677ce2013-04-12 19:12:06 +0100583 kvm_flush_dcache_to_poc(pte, sizeof(*pte));
Marc Zyngier6060df82013-04-12 19:12:01 +0100584 pfn++;
Marc Zyngier3562c762013-04-12 19:12:02 +0100585 } while (addr += PAGE_SIZE, addr != end);
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500586}
587
588static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
Marc Zyngier6060df82013-04-12 19:12:01 +0100589 unsigned long end, unsigned long pfn,
590 pgprot_t prot)
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500591{
592 pmd_t *pmd;
593 pte_t *pte;
594 unsigned long addr, next;
595
Marc Zyngier3562c762013-04-12 19:12:02 +0100596 addr = start;
597 do {
Marc Zyngier6060df82013-04-12 19:12:01 +0100598 pmd = pmd_offset(pud, addr);
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500599
600 BUG_ON(pmd_sect(*pmd));
601
602 if (pmd_none(*pmd)) {
Marc Zyngier6060df82013-04-12 19:12:01 +0100603 pte = pte_alloc_one_kernel(NULL, addr);
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500604 if (!pte) {
605 kvm_err("Cannot allocate Hyp pte\n");
606 return -ENOMEM;
607 }
608 pmd_populate_kernel(NULL, pmd, pte);
Marc Zyngier4f728272013-04-12 19:12:05 +0100609 get_page(virt_to_page(pmd));
Marc Zyngier5a677ce2013-04-12 19:12:06 +0100610 kvm_flush_dcache_to_poc(pmd, sizeof(*pmd));
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500611 }
612
613 next = pmd_addr_end(addr, end);
614
Marc Zyngier6060df82013-04-12 19:12:01 +0100615 create_hyp_pte_mappings(pmd, addr, next, pfn, prot);
616 pfn += (next - addr) >> PAGE_SHIFT;
Marc Zyngier3562c762013-04-12 19:12:02 +0100617 } while (addr = next, addr != end);
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500618
619 return 0;
620}
621
Christoffer Dall38f791a2014-10-10 12:14:28 +0200622static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start,
623 unsigned long end, unsigned long pfn,
624 pgprot_t prot)
625{
626 pud_t *pud;
627 pmd_t *pmd;
628 unsigned long addr, next;
629 int ret;
630
631 addr = start;
632 do {
633 pud = pud_offset(pgd, addr);
634
635 if (pud_none_or_clear_bad(pud)) {
636 pmd = pmd_alloc_one(NULL, addr);
637 if (!pmd) {
638 kvm_err("Cannot allocate Hyp pmd\n");
639 return -ENOMEM;
640 }
641 pud_populate(NULL, pud, pmd);
642 get_page(virt_to_page(pud));
643 kvm_flush_dcache_to_poc(pud, sizeof(*pud));
644 }
645
646 next = pud_addr_end(addr, end);
647 ret = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
648 if (ret)
649 return ret;
650 pfn += (next - addr) >> PAGE_SHIFT;
651 } while (addr = next, addr != end);
652
653 return 0;
654}
655
Kristina Martsenko98732d12018-01-15 15:23:49 +0000656static int __create_hyp_mappings(pgd_t *pgdp, unsigned long ptrs_per_pgd,
Marc Zyngier6060df82013-04-12 19:12:01 +0100657 unsigned long start, unsigned long end,
658 unsigned long pfn, pgprot_t prot)
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500659{
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500660 pgd_t *pgd;
661 pud_t *pud;
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500662 unsigned long addr, next;
663 int err = 0;
664
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500665 mutex_lock(&kvm_hyp_pgd_mutex);
Marc Zyngier3562c762013-04-12 19:12:02 +0100666 addr = start & PAGE_MASK;
667 end = PAGE_ALIGN(end);
668 do {
Marc Zyngier3ddd4552018-03-14 15:17:33 +0000669 pgd = pgdp + kvm_pgd_index(addr, ptrs_per_pgd);
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500670
Christoffer Dall38f791a2014-10-10 12:14:28 +0200671 if (pgd_none(*pgd)) {
672 pud = pud_alloc_one(NULL, addr);
673 if (!pud) {
674 kvm_err("Cannot allocate Hyp pud\n");
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500675 err = -ENOMEM;
676 goto out;
677 }
Christoffer Dall38f791a2014-10-10 12:14:28 +0200678 pgd_populate(NULL, pgd, pud);
679 get_page(virt_to_page(pgd));
680 kvm_flush_dcache_to_poc(pgd, sizeof(*pgd));
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500681 }
682
683 next = pgd_addr_end(addr, end);
Christoffer Dall38f791a2014-10-10 12:14:28 +0200684 err = create_hyp_pud_mappings(pgd, addr, next, pfn, prot);
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500685 if (err)
686 goto out;
Marc Zyngier6060df82013-04-12 19:12:01 +0100687 pfn += (next - addr) >> PAGE_SHIFT;
Marc Zyngier3562c762013-04-12 19:12:02 +0100688 } while (addr = next, addr != end);
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500689out:
690 mutex_unlock(&kvm_hyp_pgd_mutex);
691 return err;
692}
693
Christoffer Dall40c27292013-11-15 13:14:12 -0800694static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
695{
696 if (!is_vmalloc_addr(kaddr)) {
697 BUG_ON(!virt_addr_valid(kaddr));
698 return __pa(kaddr);
699 } else {
700 return page_to_phys(vmalloc_to_page(kaddr)) +
701 offset_in_page(kaddr);
702 }
703}
704
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500705/**
Marc Zyngier06e8c3b2012-10-28 01:09:14 +0100706 * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500707 * @from: The virtual kernel start address of the range
708 * @to: The virtual kernel end address of the range (exclusive)
Marc Zyngierc8dddec2016-06-13 15:00:45 +0100709 * @prot: The protection to be applied to this range
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500710 *
Marc Zyngier06e8c3b2012-10-28 01:09:14 +0100711 * The same virtual address as the kernel virtual address is also used
712 * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying
713 * physical pages.
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500714 */
Marc Zyngierc8dddec2016-06-13 15:00:45 +0100715int create_hyp_mappings(void *from, void *to, pgprot_t prot)
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500716{
Christoffer Dall40c27292013-11-15 13:14:12 -0800717 phys_addr_t phys_addr;
718 unsigned long virt_addr;
Marc Zyngier6c41a412016-06-30 18:40:51 +0100719 unsigned long start = kern_hyp_va((unsigned long)from);
720 unsigned long end = kern_hyp_va((unsigned long)to);
Marc Zyngier6060df82013-04-12 19:12:01 +0100721
Marc Zyngier1e947ba2015-01-29 11:59:54 +0000722 if (is_kernel_in_hyp_mode())
723 return 0;
724
Christoffer Dall40c27292013-11-15 13:14:12 -0800725 start = start & PAGE_MASK;
726 end = PAGE_ALIGN(end);
Marc Zyngier6060df82013-04-12 19:12:01 +0100727
Christoffer Dall40c27292013-11-15 13:14:12 -0800728 for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) {
729 int err;
730
731 phys_addr = kvm_kaddr_to_phys(from + virt_addr - start);
Kristina Martsenko98732d12018-01-15 15:23:49 +0000732 err = __create_hyp_mappings(hyp_pgd, PTRS_PER_PGD,
733 virt_addr, virt_addr + PAGE_SIZE,
Christoffer Dall40c27292013-11-15 13:14:12 -0800734 __phys_to_pfn(phys_addr),
Marc Zyngierc8dddec2016-06-13 15:00:45 +0100735 prot);
Christoffer Dall40c27292013-11-15 13:14:12 -0800736 if (err)
737 return err;
738 }
739
740 return 0;
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500741}
742
Marc Zyngierdc2e4632018-02-13 11:00:29 +0000743static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
744 unsigned long *haddr, pgprot_t prot)
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500745{
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000746 pgd_t *pgd = hyp_pgd;
747 unsigned long base;
748 int ret = 0;
Marc Zyngier6060df82013-04-12 19:12:01 +0100749
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000750 mutex_lock(&kvm_hyp_pgd_mutex);
Marc Zyngier6060df82013-04-12 19:12:01 +0100751
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000752 /*
753 * This assumes that we we have enough space below the idmap
754 * page to allocate our VAs. If not, the check below will
755 * kick. A potential alternative would be to detect that
756 * overflow and switch to an allocation above the idmap.
757 *
758 * The allocated size is always a multiple of PAGE_SIZE.
759 */
760 size = PAGE_ALIGN(size + offset_in_page(phys_addr));
761 base = io_map_base - size;
Marc Zyngier1bb32a42017-12-04 16:43:23 +0000762
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000763 /*
764 * Verify that BIT(VA_BITS - 1) hasn't been flipped by
765 * allocating the new area, as it would indicate we've
766 * overflowed the idmap/IO address range.
767 */
768 if ((base ^ io_map_base) & BIT(VA_BITS - 1))
769 ret = -ENOMEM;
770 else
771 io_map_base = base;
772
773 mutex_unlock(&kvm_hyp_pgd_mutex);
774
775 if (ret)
776 goto out;
777
778 if (__kvm_cpu_uses_extended_idmap())
779 pgd = boot_hyp_pgd;
780
781 ret = __create_hyp_mappings(pgd, __kvm_idmap_ptrs_per_pgd(),
782 base, base + size,
Marc Zyngierdc2e4632018-02-13 11:00:29 +0000783 __phys_to_pfn(phys_addr), prot);
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000784 if (ret)
785 goto out;
786
Marc Zyngierdc2e4632018-02-13 11:00:29 +0000787 *haddr = base + offset_in_page(phys_addr);
Marc Zyngiere3f019b2017-12-04 17:04:38 +0000788
789out:
Marc Zyngierdc2e4632018-02-13 11:00:29 +0000790 return ret;
791}
792
793/**
794 * create_hyp_io_mappings - Map IO into both kernel and HYP
795 * @phys_addr: The physical start address which gets mapped
796 * @size: Size of the region being mapped
797 * @kaddr: Kernel VA for this mapping
798 * @haddr: HYP VA for this mapping
799 */
800int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
801 void __iomem **kaddr,
802 void __iomem **haddr)
803{
804 unsigned long addr;
805 int ret;
806
807 *kaddr = ioremap(phys_addr, size);
808 if (!*kaddr)
809 return -ENOMEM;
810
811 if (is_kernel_in_hyp_mode()) {
812 *haddr = *kaddr;
813 return 0;
814 }
815
816 ret = __create_hyp_private_mapping(phys_addr, size,
817 &addr, PAGE_HYP_DEVICE);
Marc Zyngier1bb32a42017-12-04 16:43:23 +0000818 if (ret) {
819 iounmap(*kaddr);
820 *kaddr = NULL;
Marc Zyngierdc2e4632018-02-13 11:00:29 +0000821 *haddr = NULL;
Marc Zyngier1bb32a42017-12-04 16:43:23 +0000822 return ret;
823 }
824
Marc Zyngierdc2e4632018-02-13 11:00:29 +0000825 *haddr = (void __iomem *)addr;
826 return 0;
827}
828
829/**
830 * create_hyp_exec_mappings - Map an executable range into HYP
831 * @phys_addr: The physical start address which gets mapped
832 * @size: Size of the region being mapped
833 * @haddr: HYP VA for this mapping
834 */
835int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
836 void **haddr)
837{
838 unsigned long addr;
839 int ret;
840
841 BUG_ON(is_kernel_in_hyp_mode());
842
843 ret = __create_hyp_private_mapping(phys_addr, size,
844 &addr, PAGE_HYP_EXEC);
845 if (ret) {
846 *haddr = NULL;
847 return ret;
848 }
849
850 *haddr = (void *)addr;
Marc Zyngier1bb32a42017-12-04 16:43:23 +0000851 return 0;
Christoffer Dall342cd0a2013-01-20 18:28:06 -0500852}
853
Christoffer Dalld5d81842013-01-20 18:28:07 -0500854/**
855 * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
856 * @kvm: The KVM struct pointer for the VM.
857 *
Vladimir Murzin9d4dc6882015-11-16 11:28:16 +0000858 * Allocates only the stage-2 HW PGD level table(s) (can support either full
859 * 40-bit input addresses or limited to 32-bit input addresses). Clears the
860 * allocated pages.
Christoffer Dalld5d81842013-01-20 18:28:07 -0500861 *
862 * Note we don't need locking here as this is only called when the VM is
863 * created, which can only be done once.
864 */
865int kvm_alloc_stage2_pgd(struct kvm *kvm)
866{
867 pgd_t *pgd;
868
869 if (kvm->arch.pgd != NULL) {
870 kvm_err("kvm_arch already initialized?\n");
871 return -EINVAL;
872 }
873
Suzuki K Poulose9163ee232016-03-22 17:01:21 +0000874 /* Allocate the HW PGD, making sure that each page gets its own refcount */
875 pgd = alloc_pages_exact(S2_PGD_SIZE, GFP_KERNEL | __GFP_ZERO);
876 if (!pgd)
Marc Zyngiera9873702015-03-10 19:06:59 +0000877 return -ENOMEM;
878
Christoffer Dalld5d81842013-01-20 18:28:07 -0500879 kvm->arch.pgd = pgd;
Christoffer Dalld5d81842013-01-20 18:28:07 -0500880 return 0;
881}
882
Christoffer Dall957db102014-11-27 10:35:03 +0100883static void stage2_unmap_memslot(struct kvm *kvm,
884 struct kvm_memory_slot *memslot)
885{
886 hva_t hva = memslot->userspace_addr;
887 phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
888 phys_addr_t size = PAGE_SIZE * memslot->npages;
889 hva_t reg_end = hva + size;
890
891 /*
892 * A memory region could potentially cover multiple VMAs, and any holes
893 * between them, so iterate over all of them to find out if we should
894 * unmap any of them.
895 *
896 * +--------------------------------------------+
897 * +---------------+----------------+ +----------------+
898 * | : VMA 1 | VMA 2 | | VMA 3 : |
899 * +---------------+----------------+ +----------------+
900 * | memory region |
901 * +--------------------------------------------+
902 */
903 do {
904 struct vm_area_struct *vma = find_vma(current->mm, hva);
905 hva_t vm_start, vm_end;
906
907 if (!vma || vma->vm_start >= reg_end)
908 break;
909
910 /*
911 * Take the intersection of this VMA with the memory region
912 */
913 vm_start = max(hva, vma->vm_start);
914 vm_end = min(reg_end, vma->vm_end);
915
916 if (!(vma->vm_flags & VM_PFNMAP)) {
917 gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
918 unmap_stage2_range(kvm, gpa, vm_end - vm_start);
919 }
920 hva = vm_end;
921 } while (hva < reg_end);
922}
923
924/**
925 * stage2_unmap_vm - Unmap Stage-2 RAM mappings
926 * @kvm: The struct kvm pointer
927 *
928 * Go through the memregions and unmap any reguler RAM
929 * backing memory already mapped to the VM.
930 */
931void stage2_unmap_vm(struct kvm *kvm)
932{
933 struct kvm_memslots *slots;
934 struct kvm_memory_slot *memslot;
935 int idx;
936
937 idx = srcu_read_lock(&kvm->srcu);
Marc Zyngier90f6e152017-03-16 18:20:49 +0000938 down_read(&current->mm->mmap_sem);
Christoffer Dall957db102014-11-27 10:35:03 +0100939 spin_lock(&kvm->mmu_lock);
940
941 slots = kvm_memslots(kvm);
942 kvm_for_each_memslot(memslot, slots)
943 stage2_unmap_memslot(kvm, memslot);
944
945 spin_unlock(&kvm->mmu_lock);
Marc Zyngier90f6e152017-03-16 18:20:49 +0000946 up_read(&current->mm->mmap_sem);
Christoffer Dall957db102014-11-27 10:35:03 +0100947 srcu_read_unlock(&kvm->srcu, idx);
948}
949
Christoffer Dalld5d81842013-01-20 18:28:07 -0500950/**
951 * kvm_free_stage2_pgd - free all stage-2 tables
952 * @kvm: The KVM struct pointer for the VM.
953 *
954 * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all
955 * underlying level-2 and level-3 tables before freeing the actual level-1 table
956 * and setting the struct pointer to NULL.
Christoffer Dalld5d81842013-01-20 18:28:07 -0500957 */
958void kvm_free_stage2_pgd(struct kvm *kvm)
959{
Suzuki K Poulose6c0d7062017-05-03 15:17:51 +0100960 void *pgd = NULL;
Christoffer Dalld5d81842013-01-20 18:28:07 -0500961
Suzuki K Poulose8b3405e2017-04-03 15:12:43 +0100962 spin_lock(&kvm->mmu_lock);
Suzuki K Poulose6c0d7062017-05-03 15:17:51 +0100963 if (kvm->arch.pgd) {
964 unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
Suzuki K Poulose2952a602017-05-16 10:34:54 +0100965 pgd = READ_ONCE(kvm->arch.pgd);
Suzuki K Poulose6c0d7062017-05-03 15:17:51 +0100966 kvm->arch.pgd = NULL;
967 }
Suzuki K Poulose8b3405e2017-04-03 15:12:43 +0100968 spin_unlock(&kvm->mmu_lock);
969
Suzuki K Poulose9163ee232016-03-22 17:01:21 +0000970 /* Free the HW pgd, one page at a time */
Suzuki K Poulose6c0d7062017-05-03 15:17:51 +0100971 if (pgd)
972 free_pages_exact(pgd, S2_PGD_SIZE);
Christoffer Dalld5d81842013-01-20 18:28:07 -0500973}
974
Christoffer Dall38f791a2014-10-10 12:14:28 +0200975static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
976 phys_addr_t addr)
977{
978 pgd_t *pgd;
979 pud_t *pud;
980
Suzuki K Poulose70fd1902016-03-22 18:33:45 +0000981 pgd = kvm->arch.pgd + stage2_pgd_index(addr);
982 if (WARN_ON(stage2_pgd_none(*pgd))) {
Christoffer Dall38f791a2014-10-10 12:14:28 +0200983 if (!cache)
984 return NULL;
985 pud = mmu_memory_cache_alloc(cache);
Suzuki K Poulose70fd1902016-03-22 18:33:45 +0000986 stage2_pgd_populate(pgd, pud);
Christoffer Dall38f791a2014-10-10 12:14:28 +0200987 get_page(virt_to_page(pgd));
988 }
989
Suzuki K Poulose70fd1902016-03-22 18:33:45 +0000990 return stage2_pud_offset(pgd, addr);
Christoffer Dall38f791a2014-10-10 12:14:28 +0200991}
992
Christoffer Dallad361f02012-11-01 17:14:45 +0100993static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
994 phys_addr_t addr)
Christoffer Dalld5d81842013-01-20 18:28:07 -0500995{
Christoffer Dalld5d81842013-01-20 18:28:07 -0500996 pud_t *pud;
997 pmd_t *pmd;
Christoffer Dalld5d81842013-01-20 18:28:07 -0500998
Christoffer Dall38f791a2014-10-10 12:14:28 +0200999 pud = stage2_get_pud(kvm, cache, addr);
Marc Zyngierd6dbdd32017-06-05 19:17:18 +01001000 if (!pud)
1001 return NULL;
1002
Suzuki K Poulose70fd1902016-03-22 18:33:45 +00001003 if (stage2_pud_none(*pud)) {
Christoffer Dalld5d81842013-01-20 18:28:07 -05001004 if (!cache)
Christoffer Dallad361f02012-11-01 17:14:45 +01001005 return NULL;
Christoffer Dalld5d81842013-01-20 18:28:07 -05001006 pmd = mmu_memory_cache_alloc(cache);
Suzuki K Poulose70fd1902016-03-22 18:33:45 +00001007 stage2_pud_populate(pud, pmd);
Christoffer Dalld5d81842013-01-20 18:28:07 -05001008 get_page(virt_to_page(pud));
Marc Zyngierc62ee2b2012-10-15 11:27:37 +01001009 }
1010
Suzuki K Poulose70fd1902016-03-22 18:33:45 +00001011 return stage2_pmd_offset(pud, addr);
Christoffer Dallad361f02012-11-01 17:14:45 +01001012}
Christoffer Dalld5d81842013-01-20 18:28:07 -05001013
Christoffer Dallad361f02012-11-01 17:14:45 +01001014static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
1015 *cache, phys_addr_t addr, const pmd_t *new_pmd)
1016{
1017 pmd_t *pmd, old_pmd;
1018
1019 pmd = stage2_get_pmd(kvm, cache, addr);
1020 VM_BUG_ON(!pmd);
1021
1022 /*
1023 * Mapping in huge pages should only happen through a fault. If a
1024 * page is merged into a transparent huge page, the individual
1025 * subpages of that huge page should be unmapped through MMU
1026 * notifiers before we get here.
1027 *
1028 * Merging of CompoundPages is not supported; they should become
1029 * splitting first, unmapped, merged, and mapped back in on-demand.
1030 */
1031 VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd));
1032
1033 old_pmd = *pmd;
Marc Zyngierd4b9e072016-04-28 16:16:31 +01001034 if (pmd_present(old_pmd)) {
1035 pmd_clear(pmd);
Christoffer Dallad361f02012-11-01 17:14:45 +01001036 kvm_tlb_flush_vmid_ipa(kvm, addr);
Marc Zyngierd4b9e072016-04-28 16:16:31 +01001037 } else {
Christoffer Dallad361f02012-11-01 17:14:45 +01001038 get_page(virt_to_page(pmd));
Marc Zyngierd4b9e072016-04-28 16:16:31 +01001039 }
1040
1041 kvm_set_pmd(pmd, *new_pmd);
Christoffer Dallad361f02012-11-01 17:14:45 +01001042 return 0;
1043}
1044
Marc Zyngier7a3796d2017-10-23 17:11:21 +01001045static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr)
1046{
1047 pmd_t *pmdp;
1048 pte_t *ptep;
1049
1050 pmdp = stage2_get_pmd(kvm, NULL, addr);
1051 if (!pmdp || pmd_none(*pmdp) || !pmd_present(*pmdp))
1052 return false;
1053
1054 if (pmd_thp_or_huge(*pmdp))
1055 return kvm_s2pmd_exec(pmdp);
1056
1057 ptep = pte_offset_kernel(pmdp, addr);
1058 if (!ptep || pte_none(*ptep) || !pte_present(*ptep))
1059 return false;
1060
1061 return kvm_s2pte_exec(ptep);
1062}
1063
Christoffer Dallad361f02012-11-01 17:14:45 +01001064static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
Mario Smarduch15a49a42015-01-15 15:58:58 -08001065 phys_addr_t addr, const pte_t *new_pte,
1066 unsigned long flags)
Christoffer Dallad361f02012-11-01 17:14:45 +01001067{
1068 pmd_t *pmd;
1069 pte_t *pte, old_pte;
Mario Smarduch15a49a42015-01-15 15:58:58 -08001070 bool iomap = flags & KVM_S2PTE_FLAG_IS_IOMAP;
1071 bool logging_active = flags & KVM_S2_FLAG_LOGGING_ACTIVE;
1072
1073 VM_BUG_ON(logging_active && !cache);
Christoffer Dallad361f02012-11-01 17:14:45 +01001074
Christoffer Dall38f791a2014-10-10 12:14:28 +02001075 /* Create stage-2 page table mapping - Levels 0 and 1 */
Christoffer Dallad361f02012-11-01 17:14:45 +01001076 pmd = stage2_get_pmd(kvm, cache, addr);
1077 if (!pmd) {
1078 /*
1079 * Ignore calls from kvm_set_spte_hva for unallocated
1080 * address ranges.
1081 */
1082 return 0;
1083 }
1084
Mario Smarduch15a49a42015-01-15 15:58:58 -08001085 /*
1086 * While dirty page logging - dissolve huge PMD, then continue on to
1087 * allocate page.
1088 */
1089 if (logging_active)
1090 stage2_dissolve_pmd(kvm, addr, pmd);
1091
Christoffer Dallad361f02012-11-01 17:14:45 +01001092 /* Create stage-2 page mappings - Level 2 */
Christoffer Dalld5d81842013-01-20 18:28:07 -05001093 if (pmd_none(*pmd)) {
1094 if (!cache)
1095 return 0; /* ignore calls from kvm_set_spte_hva */
1096 pte = mmu_memory_cache_alloc(cache);
Christoffer Dalld5d81842013-01-20 18:28:07 -05001097 pmd_populate_kernel(NULL, pmd, pte);
Christoffer Dalld5d81842013-01-20 18:28:07 -05001098 get_page(virt_to_page(pmd));
Marc Zyngierc62ee2b2012-10-15 11:27:37 +01001099 }
1100
1101 pte = pte_offset_kernel(pmd, addr);
Christoffer Dalld5d81842013-01-20 18:28:07 -05001102
1103 if (iomap && pte_present(*pte))
1104 return -EFAULT;
1105
1106 /* Create 2nd stage page table mapping - Level 3 */
1107 old_pte = *pte;
Marc Zyngierd4b9e072016-04-28 16:16:31 +01001108 if (pte_present(old_pte)) {
1109 kvm_set_pte(pte, __pte(0));
Marc Zyngier48762762013-01-28 15:27:00 +00001110 kvm_tlb_flush_vmid_ipa(kvm, addr);
Marc Zyngierd4b9e072016-04-28 16:16:31 +01001111 } else {
Christoffer Dalld5d81842013-01-20 18:28:07 -05001112 get_page(virt_to_page(pte));
Marc Zyngierd4b9e072016-04-28 16:16:31 +01001113 }
Christoffer Dalld5d81842013-01-20 18:28:07 -05001114
Marc Zyngierd4b9e072016-04-28 16:16:31 +01001115 kvm_set_pte(pte, *new_pte);
Christoffer Dalld5d81842013-01-20 18:28:07 -05001116 return 0;
1117}
1118
Catalin Marinas06485052016-04-13 17:57:37 +01001119#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
1120static int stage2_ptep_test_and_clear_young(pte_t *pte)
1121{
1122 if (pte_young(*pte)) {
1123 *pte = pte_mkold(*pte);
1124 return 1;
1125 }
1126 return 0;
1127}
1128#else
1129static int stage2_ptep_test_and_clear_young(pte_t *pte)
1130{
1131 return __ptep_test_and_clear_young(pte);
1132}
1133#endif
1134
1135static int stage2_pmdp_test_and_clear_young(pmd_t *pmd)
1136{
1137 return stage2_ptep_test_and_clear_young((pte_t *)pmd);
1138}
1139
Christoffer Dalld5d81842013-01-20 18:28:07 -05001140/**
1141 * kvm_phys_addr_ioremap - map a device range to guest IPA
1142 *
1143 * @kvm: The KVM pointer
1144 * @guest_ipa: The IPA at which to insert the mapping
1145 * @pa: The physical address of the device
1146 * @size: The size of the mapping
1147 */
1148int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
Ard Biesheuvelc40f2f82014-09-17 14:56:18 -07001149 phys_addr_t pa, unsigned long size, bool writable)
Christoffer Dalld5d81842013-01-20 18:28:07 -05001150{
1151 phys_addr_t addr, end;
1152 int ret = 0;
1153 unsigned long pfn;
1154 struct kvm_mmu_memory_cache cache = { 0, };
1155
1156 end = (guest_ipa + size + PAGE_SIZE - 1) & PAGE_MASK;
1157 pfn = __phys_to_pfn(pa);
1158
1159 for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
Marc Zyngierc62ee2b2012-10-15 11:27:37 +01001160 pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);
Christoffer Dalld5d81842013-01-20 18:28:07 -05001161
Ard Biesheuvelc40f2f82014-09-17 14:56:18 -07001162 if (writable)
Catalin Marinas06485052016-04-13 17:57:37 +01001163 pte = kvm_s2pte_mkwrite(pte);
Ard Biesheuvelc40f2f82014-09-17 14:56:18 -07001164
Christoffer Dall38f791a2014-10-10 12:14:28 +02001165 ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES,
1166 KVM_NR_MEM_OBJS);
Christoffer Dalld5d81842013-01-20 18:28:07 -05001167 if (ret)
1168 goto out;
1169 spin_lock(&kvm->mmu_lock);
Mario Smarduch15a49a42015-01-15 15:58:58 -08001170 ret = stage2_set_pte(kvm, &cache, addr, &pte,
1171 KVM_S2PTE_FLAG_IS_IOMAP);
Christoffer Dalld5d81842013-01-20 18:28:07 -05001172 spin_unlock(&kvm->mmu_lock);
1173 if (ret)
1174 goto out;
1175
1176 pfn++;
1177 }
1178
1179out:
1180 mmu_free_memory_cache(&cache);
1181 return ret;
1182}
1183
Dan Williamsba049e92016-01-15 16:56:11 -08001184static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap)
Christoffer Dall9b5fdb92013-10-02 15:32:01 -07001185{
Dan Williamsba049e92016-01-15 16:56:11 -08001186 kvm_pfn_t pfn = *pfnp;
Christoffer Dall9b5fdb92013-10-02 15:32:01 -07001187 gfn_t gfn = *ipap >> PAGE_SHIFT;
1188
Andrea Arcangeli127393f2016-05-05 16:22:20 -07001189 if (PageTransCompoundMap(pfn_to_page(pfn))) {
Christoffer Dall9b5fdb92013-10-02 15:32:01 -07001190 unsigned long mask;
1191 /*
1192 * The address we faulted on is backed by a transparent huge
1193 * page. However, because we map the compound huge page and
1194 * not the individual tail page, we need to transfer the
1195 * refcount to the head page. We have to be careful that the
1196 * THP doesn't start to split while we are adjusting the
1197 * refcounts.
1198 *
1199 * We are sure this doesn't happen, because mmu_notifier_retry
1200 * was successful and we are holding the mmu_lock, so if this
1201 * THP is trying to split, it will be blocked in the mmu
1202 * notifier before touching any of the pages, specifically
1203 * before being able to call __split_huge_page_refcount().
1204 *
1205 * We can therefore safely transfer the refcount from PG_tail
1206 * to PG_head and switch the pfn from a tail page to the head
1207 * page accordingly.
1208 */
1209 mask = PTRS_PER_PMD - 1;
1210 VM_BUG_ON((gfn & mask) != (pfn & mask));
1211 if (pfn & mask) {
1212 *ipap &= PMD_MASK;
1213 kvm_release_pfn_clean(pfn);
1214 pfn &= ~mask;
1215 kvm_get_pfn(pfn);
1216 *pfnp = pfn;
1217 }
1218
1219 return true;
1220 }
1221
1222 return false;
1223}
1224
Ard Biesheuvela7d079c2014-09-09 11:27:09 +01001225static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
1226{
1227 if (kvm_vcpu_trap_is_iabt(vcpu))
1228 return false;
1229
1230 return kvm_vcpu_dabt_iswrite(vcpu);
1231}
1232
Mario Smarduchc6473552015-01-15 15:58:56 -08001233/**
1234 * stage2_wp_ptes - write protect PMD range
1235 * @pmd: pointer to pmd entry
1236 * @addr: range start address
1237 * @end: range end address
1238 */
1239static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
1240{
1241 pte_t *pte;
1242
1243 pte = pte_offset_kernel(pmd, addr);
1244 do {
1245 if (!pte_none(*pte)) {
1246 if (!kvm_s2pte_readonly(pte))
1247 kvm_set_s2pte_readonly(pte);
1248 }
1249 } while (pte++, addr += PAGE_SIZE, addr != end);
1250}
1251
1252/**
1253 * stage2_wp_pmds - write protect PUD range
1254 * @pud: pointer to pud entry
1255 * @addr: range start address
1256 * @end: range end address
1257 */
1258static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
1259{
1260 pmd_t *pmd;
1261 phys_addr_t next;
1262
Suzuki K Poulose70fd1902016-03-22 18:33:45 +00001263 pmd = stage2_pmd_offset(pud, addr);
Mario Smarduchc6473552015-01-15 15:58:56 -08001264
1265 do {
Suzuki K Poulose70fd1902016-03-22 18:33:45 +00001266 next = stage2_pmd_addr_end(addr, end);
Mario Smarduchc6473552015-01-15 15:58:56 -08001267 if (!pmd_none(*pmd)) {
Suzuki K Poulosebbb3b6b2016-03-01 12:00:39 +00001268 if (pmd_thp_or_huge(*pmd)) {
Mario Smarduchc6473552015-01-15 15:58:56 -08001269 if (!kvm_s2pmd_readonly(pmd))
1270 kvm_set_s2pmd_readonly(pmd);
1271 } else {
1272 stage2_wp_ptes(pmd, addr, next);
1273 }
1274 }
1275 } while (pmd++, addr = next, addr != end);
1276}
1277
1278/**
1279 * stage2_wp_puds - write protect PGD range
1280 * @pgd: pointer to pgd entry
1281 * @addr: range start address
1282 * @end: range end address
1283 *
1284 * Process PUD entries, for a huge PUD we cause a panic.
1285 */
1286static void stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
1287{
1288 pud_t *pud;
1289 phys_addr_t next;
1290
Suzuki K Poulose70fd1902016-03-22 18:33:45 +00001291 pud = stage2_pud_offset(pgd, addr);
Mario Smarduchc6473552015-01-15 15:58:56 -08001292 do {
Suzuki K Poulose70fd1902016-03-22 18:33:45 +00001293 next = stage2_pud_addr_end(addr, end);
1294 if (!stage2_pud_none(*pud)) {
Mario Smarduchc6473552015-01-15 15:58:56 -08001295 /* TODO:PUD not supported, revisit later if supported */
Suzuki K Poulose70fd1902016-03-22 18:33:45 +00001296 BUG_ON(stage2_pud_huge(*pud));
Mario Smarduchc6473552015-01-15 15:58:56 -08001297 stage2_wp_pmds(pud, addr, next);
1298 }
1299 } while (pud++, addr = next, addr != end);
1300}
1301
1302/**
1303 * stage2_wp_range() - write protect stage2 memory region range
1304 * @kvm: The KVM pointer
1305 * @addr: Start address of range
1306 * @end: End address of range
1307 */
1308static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
1309{
1310 pgd_t *pgd;
1311 phys_addr_t next;
1312
Suzuki K Poulose70fd1902016-03-22 18:33:45 +00001313 pgd = kvm->arch.pgd + stage2_pgd_index(addr);
Mario Smarduchc6473552015-01-15 15:58:56 -08001314 do {
1315 /*
1316 * Release kvm_mmu_lock periodically if the memory region is
1317 * large. Otherwise, we may see kernel panics with
Christoffer Dall227ea812015-01-23 10:49:31 +01001318 * CONFIG_DETECT_HUNG_TASK, CONFIG_LOCKUP_DETECTOR,
1319 * CONFIG_LOCKDEP. Additionally, holding the lock too long
Suzuki K Poulose0c428a6a2017-05-16 10:34:55 +01001320 * will also starve other vCPUs. We have to also make sure
1321 * that the page tables are not freed while we released
1322 * the lock.
Mario Smarduchc6473552015-01-15 15:58:56 -08001323 */
Suzuki K Poulose0c428a6a2017-05-16 10:34:55 +01001324 cond_resched_lock(&kvm->mmu_lock);
1325 if (!READ_ONCE(kvm->arch.pgd))
1326 break;
Suzuki K Poulose70fd1902016-03-22 18:33:45 +00001327 next = stage2_pgd_addr_end(addr, end);
1328 if (stage2_pgd_present(*pgd))
Mario Smarduchc6473552015-01-15 15:58:56 -08001329 stage2_wp_puds(pgd, addr, next);
1330 } while (pgd++, addr = next, addr != end);
1331}
1332
1333/**
1334 * kvm_mmu_wp_memory_region() - write protect stage 2 entries for memory slot
1335 * @kvm: The KVM pointer
1336 * @slot: The memory slot to write protect
1337 *
1338 * Called to start logging dirty pages after memory region
1339 * KVM_MEM_LOG_DIRTY_PAGES operation is called. After this function returns
1340 * all present PMD and PTEs are write protected in the memory region.
1341 * Afterwards read of dirty page log can be called.
1342 *
1343 * Acquires kvm_mmu_lock. Called with kvm->slots_lock mutex acquired,
1344 * serializing operations for VM memory regions.
1345 */
1346void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
1347{
Paolo Bonzini9f6b8022015-05-17 16:20:07 +02001348 struct kvm_memslots *slots = kvm_memslots(kvm);
1349 struct kvm_memory_slot *memslot = id_to_memslot(slots, slot);
Mario Smarduchc6473552015-01-15 15:58:56 -08001350 phys_addr_t start = memslot->base_gfn << PAGE_SHIFT;
1351 phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
1352
1353 spin_lock(&kvm->mmu_lock);
1354 stage2_wp_range(kvm, start, end);
1355 spin_unlock(&kvm->mmu_lock);
1356 kvm_flush_remote_tlbs(kvm);
1357}
Mario Smarduch53c810c2015-01-15 15:58:57 -08001358
1359/**
Kai Huang3b0f1d02015-01-28 10:54:23 +08001360 * kvm_mmu_write_protect_pt_masked() - write protect dirty pages
Mario Smarduch53c810c2015-01-15 15:58:57 -08001361 * @kvm: The KVM pointer
1362 * @slot: The memory slot associated with mask
1363 * @gfn_offset: The gfn offset in memory slot
1364 * @mask: The mask of dirty pages at offset 'gfn_offset' in this memory
1365 * slot to be write protected
1366 *
1367 * Walks bits set in mask write protects the associated pte's. Caller must
1368 * acquire kvm_mmu_lock.
1369 */
Kai Huang3b0f1d02015-01-28 10:54:23 +08001370static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
Mario Smarduch53c810c2015-01-15 15:58:57 -08001371 struct kvm_memory_slot *slot,
1372 gfn_t gfn_offset, unsigned long mask)
1373{
1374 phys_addr_t base_gfn = slot->base_gfn + gfn_offset;
1375 phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT;
1376 phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
1377
1378 stage2_wp_range(kvm, start, end);
1379}
Mario Smarduchc6473552015-01-15 15:58:56 -08001380
Kai Huang3b0f1d02015-01-28 10:54:23 +08001381/*
1382 * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected
1383 * dirty pages.
1384 *
1385 * It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to
1386 * enable dirty logging for them.
1387 */
1388void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
1389 struct kvm_memory_slot *slot,
1390 gfn_t gfn_offset, unsigned long mask)
1391{
1392 kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
1393}
1394
Marc Zyngier17ab9d52017-10-23 17:11:22 +01001395static void clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
Marc Zyngier0d3e4d42015-01-05 21:13:24 +00001396{
Marc Zyngier17ab9d52017-10-23 17:11:22 +01001397 __clean_dcache_guest_page(pfn, size);
Marc Zyngiera15f6932017-10-23 17:11:15 +01001398}
1399
Marc Zyngier17ab9d52017-10-23 17:11:22 +01001400static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size)
Marc Zyngiera15f6932017-10-23 17:11:15 +01001401{
Marc Zyngier17ab9d52017-10-23 17:11:22 +01001402 __invalidate_icache_guest_page(pfn, size);
Marc Zyngier0d3e4d42015-01-05 21:13:24 +00001403}
1404
James Morse196f8782017-06-20 17:11:48 +01001405static void kvm_send_hwpoison_signal(unsigned long address,
1406 struct vm_area_struct *vma)
1407{
1408 siginfo_t info;
1409
Eric W. Biederman3eb0f512018-04-17 15:26:37 -05001410 clear_siginfo(&info);
James Morse196f8782017-06-20 17:11:48 +01001411 info.si_signo = SIGBUS;
1412 info.si_errno = 0;
1413 info.si_code = BUS_MCEERR_AR;
1414 info.si_addr = (void __user *)address;
1415
1416 if (is_vm_hugetlb_page(vma))
1417 info.si_addr_lsb = huge_page_shift(hstate_vma(vma));
1418 else
1419 info.si_addr_lsb = PAGE_SHIFT;
1420
1421 send_sig_info(SIGBUS, &info, current);
1422}
1423
Christoffer Dall94f8e642013-01-20 18:28:12 -05001424static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
Christoffer Dall98047882014-08-19 12:18:04 +02001425 struct kvm_memory_slot *memslot, unsigned long hva,
Christoffer Dall94f8e642013-01-20 18:28:12 -05001426 unsigned long fault_status)
1427{
Christoffer Dall94f8e642013-01-20 18:28:12 -05001428 int ret;
Marc Zyngierd0e22b42017-10-23 17:11:19 +01001429 bool write_fault, exec_fault, writable, hugetlb = false, force_pte = false;
Christoffer Dall94f8e642013-01-20 18:28:12 -05001430 unsigned long mmu_seq;
Christoffer Dallad361f02012-11-01 17:14:45 +01001431 gfn_t gfn = fault_ipa >> PAGE_SHIFT;
Christoffer Dallad361f02012-11-01 17:14:45 +01001432 struct kvm *kvm = vcpu->kvm;
Christoffer Dall94f8e642013-01-20 18:28:12 -05001433 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
Christoffer Dallad361f02012-11-01 17:14:45 +01001434 struct vm_area_struct *vma;
Dan Williamsba049e92016-01-15 16:56:11 -08001435 kvm_pfn_t pfn;
Kim Phillipsb8865762014-06-26 01:45:51 +01001436 pgprot_t mem_type = PAGE_S2;
Mario Smarduch15a49a42015-01-15 15:58:58 -08001437 bool logging_active = memslot_is_logging(memslot);
1438 unsigned long flags = 0;
Christoffer Dall94f8e642013-01-20 18:28:12 -05001439
Ard Biesheuvela7d079c2014-09-09 11:27:09 +01001440 write_fault = kvm_is_write_fault(vcpu);
Marc Zyngierd0e22b42017-10-23 17:11:19 +01001441 exec_fault = kvm_vcpu_trap_is_iabt(vcpu);
1442 VM_BUG_ON(write_fault && exec_fault);
1443
1444 if (fault_status == FSC_PERM && !write_fault && !exec_fault) {
Christoffer Dall94f8e642013-01-20 18:28:12 -05001445 kvm_err("Unexpected L2 read permission error\n");
1446 return -EFAULT;
1447 }
1448
Christoffer Dallad361f02012-11-01 17:14:45 +01001449 /* Let's check if we will get back a huge page backed by hugetlbfs */
1450 down_read(&current->mm->mmap_sem);
1451 vma = find_vma_intersection(current->mm, hva, hva + 1);
Ard Biesheuvel37b54402014-09-17 14:56:17 -07001452 if (unlikely(!vma)) {
1453 kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
1454 up_read(&current->mm->mmap_sem);
1455 return -EFAULT;
1456 }
1457
Punit Agrawalc507bab2018-01-04 18:24:33 +00001458 if (vma_kernel_pagesize(vma) == PMD_SIZE && !logging_active) {
Christoffer Dallad361f02012-11-01 17:14:45 +01001459 hugetlb = true;
1460 gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
Christoffer Dall9b5fdb92013-10-02 15:32:01 -07001461 } else {
1462 /*
Marc Zyngier136d7372013-12-13 16:56:06 +00001463 * Pages belonging to memslots that don't have the same
1464 * alignment for userspace and IPA cannot be mapped using
1465 * block descriptors even if the pages belong to a THP for
1466 * the process, because the stage-2 block descriptor will
1467 * cover more than a single THP and we loose atomicity for
1468 * unmapping, updates, and splits of the THP or other pages
1469 * in the stage-2 block range.
Christoffer Dall9b5fdb92013-10-02 15:32:01 -07001470 */
Marc Zyngier136d7372013-12-13 16:56:06 +00001471 if ((memslot->userspace_addr & ~PMD_MASK) !=
1472 ((memslot->base_gfn << PAGE_SHIFT) & ~PMD_MASK))
Christoffer Dall9b5fdb92013-10-02 15:32:01 -07001473 force_pte = true;
Christoffer Dallad361f02012-11-01 17:14:45 +01001474 }
1475 up_read(&current->mm->mmap_sem);
1476
Christoffer Dall94f8e642013-01-20 18:28:12 -05001477 /* We need minimum second+third level pages */
Christoffer Dall38f791a2014-10-10 12:14:28 +02001478 ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES,
1479 KVM_NR_MEM_OBJS);
Christoffer Dall94f8e642013-01-20 18:28:12 -05001480 if (ret)
1481 return ret;
1482
1483 mmu_seq = vcpu->kvm->mmu_notifier_seq;
1484 /*
1485 * Ensure the read of mmu_notifier_seq happens before we call
1486 * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk
1487 * the page we just got a reference to gets unmapped before we have a
1488 * chance to grab the mmu_lock, which ensure that if the page gets
1489 * unmapped afterwards, the call to kvm_unmap_hva will take it away
1490 * from us again properly. This smp_rmb() interacts with the smp_wmb()
1491 * in kvm_mmu_notifier_invalidate_<page|range_end>.
1492 */
1493 smp_rmb();
1494
Christoffer Dallad361f02012-11-01 17:14:45 +01001495 pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
James Morse196f8782017-06-20 17:11:48 +01001496 if (pfn == KVM_PFN_ERR_HWPOISON) {
1497 kvm_send_hwpoison_signal(hva, vma);
1498 return 0;
1499 }
Christoffer Dall9ac71592016-08-17 10:46:10 +02001500 if (is_error_noslot_pfn(pfn))
Christoffer Dall94f8e642013-01-20 18:28:12 -05001501 return -EFAULT;
1502
Mario Smarduch15a49a42015-01-15 15:58:58 -08001503 if (kvm_is_device_pfn(pfn)) {
Kim Phillipsb8865762014-06-26 01:45:51 +01001504 mem_type = PAGE_S2_DEVICE;
Mario Smarduch15a49a42015-01-15 15:58:58 -08001505 flags |= KVM_S2PTE_FLAG_IS_IOMAP;
1506 } else if (logging_active) {
1507 /*
1508 * Faults on pages in a memslot with logging enabled
1509 * should not be mapped with huge pages (it introduces churn
1510 * and performance degradation), so force a pte mapping.
1511 */
1512 force_pte = true;
1513 flags |= KVM_S2_FLAG_LOGGING_ACTIVE;
1514
1515 /*
1516 * Only actually map the page as writable if this was a write
1517 * fault.
1518 */
1519 if (!write_fault)
1520 writable = false;
1521 }
Kim Phillipsb8865762014-06-26 01:45:51 +01001522
Christoffer Dallad361f02012-11-01 17:14:45 +01001523 spin_lock(&kvm->mmu_lock);
1524 if (mmu_notifier_retry(kvm, mmu_seq))
Christoffer Dall94f8e642013-01-20 18:28:12 -05001525 goto out_unlock;
Mario Smarduch15a49a42015-01-15 15:58:58 -08001526
Christoffer Dall9b5fdb92013-10-02 15:32:01 -07001527 if (!hugetlb && !force_pte)
1528 hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
Christoffer Dallad361f02012-11-01 17:14:45 +01001529
1530 if (hugetlb) {
Kim Phillipsb8865762014-06-26 01:45:51 +01001531 pmd_t new_pmd = pfn_pmd(pfn, mem_type);
Christoffer Dallad361f02012-11-01 17:14:45 +01001532 new_pmd = pmd_mkhuge(new_pmd);
1533 if (writable) {
Catalin Marinas06485052016-04-13 17:57:37 +01001534 new_pmd = kvm_s2pmd_mkwrite(new_pmd);
Christoffer Dallad361f02012-11-01 17:14:45 +01001535 kvm_set_pfn_dirty(pfn);
1536 }
Marc Zyngiera9c0e122017-10-23 17:11:20 +01001537
1538 if (fault_status != FSC_PERM)
Marc Zyngier17ab9d52017-10-23 17:11:22 +01001539 clean_dcache_guest_page(pfn, PMD_SIZE);
Marc Zyngierd0e22b42017-10-23 17:11:19 +01001540
1541 if (exec_fault) {
1542 new_pmd = kvm_s2pmd_mkexec(new_pmd);
Marc Zyngier17ab9d52017-10-23 17:11:22 +01001543 invalidate_icache_guest_page(pfn, PMD_SIZE);
Marc Zyngier7a3796d2017-10-23 17:11:21 +01001544 } else if (fault_status == FSC_PERM) {
1545 /* Preserve execute if XN was already cleared */
1546 if (stage2_is_exec(kvm, fault_ipa))
1547 new_pmd = kvm_s2pmd_mkexec(new_pmd);
Marc Zyngierd0e22b42017-10-23 17:11:19 +01001548 }
Marc Zyngiera15f6932017-10-23 17:11:15 +01001549
Christoffer Dallad361f02012-11-01 17:14:45 +01001550 ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
1551 } else {
Kim Phillipsb8865762014-06-26 01:45:51 +01001552 pte_t new_pte = pfn_pte(pfn, mem_type);
Mario Smarduch15a49a42015-01-15 15:58:58 -08001553
Christoffer Dallad361f02012-11-01 17:14:45 +01001554 if (writable) {
Catalin Marinas06485052016-04-13 17:57:37 +01001555 new_pte = kvm_s2pte_mkwrite(new_pte);
Christoffer Dallad361f02012-11-01 17:14:45 +01001556 kvm_set_pfn_dirty(pfn);
Mario Smarduch15a49a42015-01-15 15:58:58 -08001557 mark_page_dirty(kvm, gfn);
Christoffer Dallad361f02012-11-01 17:14:45 +01001558 }
Marc Zyngiera9c0e122017-10-23 17:11:20 +01001559
1560 if (fault_status != FSC_PERM)
Marc Zyngier17ab9d52017-10-23 17:11:22 +01001561 clean_dcache_guest_page(pfn, PAGE_SIZE);
Marc Zyngierd0e22b42017-10-23 17:11:19 +01001562
1563 if (exec_fault) {
1564 new_pte = kvm_s2pte_mkexec(new_pte);
Marc Zyngier17ab9d52017-10-23 17:11:22 +01001565 invalidate_icache_guest_page(pfn, PAGE_SIZE);
Marc Zyngier7a3796d2017-10-23 17:11:21 +01001566 } else if (fault_status == FSC_PERM) {
1567 /* Preserve execute if XN was already cleared */
1568 if (stage2_is_exec(kvm, fault_ipa))
1569 new_pte = kvm_s2pte_mkexec(new_pte);
Marc Zyngierd0e22b42017-10-23 17:11:19 +01001570 }
Marc Zyngiera15f6932017-10-23 17:11:15 +01001571
Mario Smarduch15a49a42015-01-15 15:58:58 -08001572 ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags);
Christoffer Dall94f8e642013-01-20 18:28:12 -05001573 }
Christoffer Dallad361f02012-11-01 17:14:45 +01001574
Christoffer Dall94f8e642013-01-20 18:28:12 -05001575out_unlock:
Christoffer Dallad361f02012-11-01 17:14:45 +01001576 spin_unlock(&kvm->mmu_lock);
Marc Zyngier35307b92015-03-12 18:16:51 +00001577 kvm_set_pfn_accessed(pfn);
Christoffer Dall94f8e642013-01-20 18:28:12 -05001578 kvm_release_pfn_clean(pfn);
Christoffer Dallad361f02012-11-01 17:14:45 +01001579 return ret;
Christoffer Dall94f8e642013-01-20 18:28:12 -05001580}
1581
Marc Zyngieraeda9132015-03-12 18:16:52 +00001582/*
1583 * Resolve the access fault by making the page young again.
1584 * Note that because the faulting entry is guaranteed not to be
1585 * cached in the TLB, we don't need to invalidate anything.
Catalin Marinas06485052016-04-13 17:57:37 +01001586 * Only the HW Access Flag updates are supported for Stage 2 (no DBM),
1587 * so there is no need for atomic (pte|pmd)_mkyoung operations.
Marc Zyngieraeda9132015-03-12 18:16:52 +00001588 */
1589static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
1590{
1591 pmd_t *pmd;
1592 pte_t *pte;
Dan Williamsba049e92016-01-15 16:56:11 -08001593 kvm_pfn_t pfn;
Marc Zyngieraeda9132015-03-12 18:16:52 +00001594 bool pfn_valid = false;
1595
1596 trace_kvm_access_fault(fault_ipa);
1597
1598 spin_lock(&vcpu->kvm->mmu_lock);
1599
1600 pmd = stage2_get_pmd(vcpu->kvm, NULL, fault_ipa);
1601 if (!pmd || pmd_none(*pmd)) /* Nothing there */
1602 goto out;
1603
Suzuki K Poulosebbb3b6b2016-03-01 12:00:39 +00001604 if (pmd_thp_or_huge(*pmd)) { /* THP, HugeTLB */
Marc Zyngieraeda9132015-03-12 18:16:52 +00001605 *pmd = pmd_mkyoung(*pmd);
1606 pfn = pmd_pfn(*pmd);
1607 pfn_valid = true;
1608 goto out;
1609 }
1610
1611 pte = pte_offset_kernel(pmd, fault_ipa);
1612 if (pte_none(*pte)) /* Nothing there either */
1613 goto out;
1614
1615 *pte = pte_mkyoung(*pte); /* Just a page... */
1616 pfn = pte_pfn(*pte);
1617 pfn_valid = true;
1618out:
1619 spin_unlock(&vcpu->kvm->mmu_lock);
1620 if (pfn_valid)
1621 kvm_set_pfn_accessed(pfn);
1622}
1623
Christoffer Dall94f8e642013-01-20 18:28:12 -05001624/**
1625 * kvm_handle_guest_abort - handles all 2nd stage aborts
1626 * @vcpu: the VCPU pointer
1627 * @run: the kvm_run structure
1628 *
1629 * Any abort that gets to the host is almost guaranteed to be caused by a
1630 * missing second stage translation table entry, which can mean that either the
1631 * guest simply needs more memory and we must allocate an appropriate page or it
1632 * can mean that the guest tried to access I/O memory, which is emulated by user
1633 * space. The distinction is based on the IPA causing the fault and whether this
1634 * memory region has been registered as standard RAM by user space.
1635 */
Christoffer Dall342cd0a2013-01-20 18:28:06 -05001636int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
1637{
Christoffer Dall94f8e642013-01-20 18:28:12 -05001638 unsigned long fault_status;
1639 phys_addr_t fault_ipa;
1640 struct kvm_memory_slot *memslot;
Christoffer Dall98047882014-08-19 12:18:04 +02001641 unsigned long hva;
1642 bool is_iabt, write_fault, writable;
Christoffer Dall94f8e642013-01-20 18:28:12 -05001643 gfn_t gfn;
1644 int ret, idx;
1645
Tyler Baicar621f48e2017-06-21 12:17:14 -06001646 fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
1647
1648 fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
James Morsebb428922017-07-18 13:37:41 +01001649 is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
Tyler Baicar621f48e2017-06-21 12:17:14 -06001650
James Morsebb428922017-07-18 13:37:41 +01001651 /* Synchronous External Abort? */
1652 if (kvm_vcpu_dabt_isextabt(vcpu)) {
1653 /*
1654 * For RAS the host kernel may handle this abort.
1655 * There is no need to pass the error into the guest.
1656 */
Tyler Baicar621f48e2017-06-21 12:17:14 -06001657 if (!handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu)))
1658 return 1;
Tyler Baicar621f48e2017-06-21 12:17:14 -06001659
James Morsebb428922017-07-18 13:37:41 +01001660 if (unlikely(!is_iabt)) {
1661 kvm_inject_vabt(vcpu);
1662 return 1;
1663 }
Marc Zyngier40557102016-09-06 14:02:15 +01001664 }
1665
Marc Zyngier7393b592012-09-17 19:27:09 +01001666 trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu),
1667 kvm_vcpu_get_hfar(vcpu), fault_ipa);
Christoffer Dall94f8e642013-01-20 18:28:12 -05001668
1669 /* Check the stage-2 fault is trans. fault or write fault */
Marc Zyngier35307b92015-03-12 18:16:51 +00001670 if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
1671 fault_status != FSC_ACCESS) {
Christoffer Dall0496daa52014-09-26 12:29:34 +02001672 kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
1673 kvm_vcpu_trap_get_class(vcpu),
1674 (unsigned long)kvm_vcpu_trap_get_fault(vcpu),
1675 (unsigned long)kvm_vcpu_get_hsr(vcpu));
Christoffer Dall94f8e642013-01-20 18:28:12 -05001676 return -EFAULT;
1677 }
1678
1679 idx = srcu_read_lock(&vcpu->kvm->srcu);
1680
1681 gfn = fault_ipa >> PAGE_SHIFT;
Christoffer Dall98047882014-08-19 12:18:04 +02001682 memslot = gfn_to_memslot(vcpu->kvm, gfn);
1683 hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
Ard Biesheuvela7d079c2014-09-09 11:27:09 +01001684 write_fault = kvm_is_write_fault(vcpu);
Christoffer Dall98047882014-08-19 12:18:04 +02001685 if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
Christoffer Dall94f8e642013-01-20 18:28:12 -05001686 if (is_iabt) {
1687 /* Prefetch Abort on I/O address */
Marc Zyngier7393b592012-09-17 19:27:09 +01001688 kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
Christoffer Dall94f8e642013-01-20 18:28:12 -05001689 ret = 1;
1690 goto out_unlock;
1691 }
1692
Marc Zyngiercfe39502012-12-12 14:42:09 +00001693 /*
Marc Zyngier57c841f2016-01-29 15:01:28 +00001694 * Check for a cache maintenance operation. Since we
1695 * ended-up here, we know it is outside of any memory
1696 * slot. But we can't find out if that is for a device,
1697 * or if the guest is just being stupid. The only thing
1698 * we know for sure is that this range cannot be cached.
1699 *
1700 * So let's assume that the guest is just being
1701 * cautious, and skip the instruction.
1702 */
1703 if (kvm_vcpu_dabt_is_cm(vcpu)) {
1704 kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
1705 ret = 1;
1706 goto out_unlock;
1707 }
1708
1709 /*
Marc Zyngiercfe39502012-12-12 14:42:09 +00001710 * The IPA is reported as [MAX:12], so we need to
1711 * complement it with the bottom 12 bits from the
1712 * faulting VA. This is always 12 bits, irrespective
1713 * of the page size.
1714 */
1715 fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
Christoffer Dall45e96ea2013-01-20 18:43:58 -05001716 ret = io_mem_abort(vcpu, run, fault_ipa);
Christoffer Dall94f8e642013-01-20 18:28:12 -05001717 goto out_unlock;
1718 }
1719
Christoffer Dallc3058d52014-10-10 12:14:29 +02001720 /* Userspace should not be able to register out-of-bounds IPAs */
1721 VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE);
1722
Marc Zyngieraeda9132015-03-12 18:16:52 +00001723 if (fault_status == FSC_ACCESS) {
1724 handle_access_fault(vcpu, fault_ipa);
1725 ret = 1;
1726 goto out_unlock;
1727 }
1728
Christoffer Dall98047882014-08-19 12:18:04 +02001729 ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
Christoffer Dall94f8e642013-01-20 18:28:12 -05001730 if (ret == 0)
1731 ret = 1;
1732out_unlock:
1733 srcu_read_unlock(&vcpu->kvm->srcu, idx);
1734 return ret;
Christoffer Dall342cd0a2013-01-20 18:28:06 -05001735}
1736
Marc Zyngier1d2ebac2015-03-12 18:16:50 +00001737static int handle_hva_to_gpa(struct kvm *kvm,
1738 unsigned long start,
1739 unsigned long end,
1740 int (*handler)(struct kvm *kvm,
Suzuki K Poulose056aad62017-03-20 18:26:42 +00001741 gpa_t gpa, u64 size,
1742 void *data),
Marc Zyngier1d2ebac2015-03-12 18:16:50 +00001743 void *data)
Christoffer Dalld5d81842013-01-20 18:28:07 -05001744{
1745 struct kvm_memslots *slots;
1746 struct kvm_memory_slot *memslot;
Marc Zyngier1d2ebac2015-03-12 18:16:50 +00001747 int ret = 0;
Christoffer Dalld5d81842013-01-20 18:28:07 -05001748
1749 slots = kvm_memslots(kvm);
1750
1751 /* we only care about the pages that the guest sees */
1752 kvm_for_each_memslot(memslot, slots) {
1753 unsigned long hva_start, hva_end;
Suzuki K Poulose056aad62017-03-20 18:26:42 +00001754 gfn_t gpa;
Christoffer Dalld5d81842013-01-20 18:28:07 -05001755
1756 hva_start = max(start, memslot->userspace_addr);
1757 hva_end = min(end, memslot->userspace_addr +
1758 (memslot->npages << PAGE_SHIFT));
1759 if (hva_start >= hva_end)
1760 continue;
1761
Suzuki K Poulose056aad62017-03-20 18:26:42 +00001762 gpa = hva_to_gfn_memslot(hva_start, memslot) << PAGE_SHIFT;
1763 ret |= handler(kvm, gpa, (u64)(hva_end - hva_start), data);
Christoffer Dalld5d81842013-01-20 18:28:07 -05001764 }
Marc Zyngier1d2ebac2015-03-12 18:16:50 +00001765
1766 return ret;
Christoffer Dalld5d81842013-01-20 18:28:07 -05001767}
1768
Suzuki K Poulose056aad62017-03-20 18:26:42 +00001769static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
Christoffer Dalld5d81842013-01-20 18:28:07 -05001770{
Suzuki K Poulose056aad62017-03-20 18:26:42 +00001771 unmap_stage2_range(kvm, gpa, size);
Marc Zyngier1d2ebac2015-03-12 18:16:50 +00001772 return 0;
Christoffer Dalld5d81842013-01-20 18:28:07 -05001773}
1774
1775int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
1776{
1777 unsigned long end = hva + PAGE_SIZE;
1778
1779 if (!kvm->arch.pgd)
1780 return 0;
1781
1782 trace_kvm_unmap_hva(hva);
1783 handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL);
1784 return 0;
1785}
1786
1787int kvm_unmap_hva_range(struct kvm *kvm,
1788 unsigned long start, unsigned long end)
1789{
1790 if (!kvm->arch.pgd)
1791 return 0;
1792
1793 trace_kvm_unmap_hva_range(start, end);
1794 handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
1795 return 0;
1796}
1797
Suzuki K Poulose056aad62017-03-20 18:26:42 +00001798static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
Christoffer Dalld5d81842013-01-20 18:28:07 -05001799{
1800 pte_t *pte = (pte_t *)data;
1801
Suzuki K Poulose056aad62017-03-20 18:26:42 +00001802 WARN_ON(size != PAGE_SIZE);
Mario Smarduch15a49a42015-01-15 15:58:58 -08001803 /*
1804 * We can always call stage2_set_pte with KVM_S2PTE_FLAG_LOGGING_ACTIVE
1805 * flag clear because MMU notifiers will have unmapped a huge PMD before
1806 * calling ->change_pte() (which in turn calls kvm_set_spte_hva()) and
1807 * therefore stage2_set_pte() never needs to clear out a huge PMD
1808 * through this calling path.
1809 */
1810 stage2_set_pte(kvm, NULL, gpa, pte, 0);
Marc Zyngier1d2ebac2015-03-12 18:16:50 +00001811 return 0;
Christoffer Dalld5d81842013-01-20 18:28:07 -05001812}
1813
1814
1815void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
1816{
1817 unsigned long end = hva + PAGE_SIZE;
1818 pte_t stage2_pte;
1819
1820 if (!kvm->arch.pgd)
1821 return;
1822
1823 trace_kvm_set_spte_hva(hva);
1824 stage2_pte = pfn_pte(pte_pfn(pte), PAGE_S2);
1825 handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte);
1826}
1827
Suzuki K Poulose056aad62017-03-20 18:26:42 +00001828static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
Marc Zyngier35307b92015-03-12 18:16:51 +00001829{
1830 pmd_t *pmd;
1831 pte_t *pte;
1832
Suzuki K Poulose056aad62017-03-20 18:26:42 +00001833 WARN_ON(size != PAGE_SIZE && size != PMD_SIZE);
Marc Zyngier35307b92015-03-12 18:16:51 +00001834 pmd = stage2_get_pmd(kvm, NULL, gpa);
1835 if (!pmd || pmd_none(*pmd)) /* Nothing there */
1836 return 0;
1837
Catalin Marinas06485052016-04-13 17:57:37 +01001838 if (pmd_thp_or_huge(*pmd)) /* THP, HugeTLB */
1839 return stage2_pmdp_test_and_clear_young(pmd);
Marc Zyngier35307b92015-03-12 18:16:51 +00001840
1841 pte = pte_offset_kernel(pmd, gpa);
1842 if (pte_none(*pte))
1843 return 0;
1844
Catalin Marinas06485052016-04-13 17:57:37 +01001845 return stage2_ptep_test_and_clear_young(pte);
Marc Zyngier35307b92015-03-12 18:16:51 +00001846}
1847
Suzuki K Poulose056aad62017-03-20 18:26:42 +00001848static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
Marc Zyngier35307b92015-03-12 18:16:51 +00001849{
1850 pmd_t *pmd;
1851 pte_t *pte;
1852
Suzuki K Poulose056aad62017-03-20 18:26:42 +00001853 WARN_ON(size != PAGE_SIZE && size != PMD_SIZE);
Marc Zyngier35307b92015-03-12 18:16:51 +00001854 pmd = stage2_get_pmd(kvm, NULL, gpa);
1855 if (!pmd || pmd_none(*pmd)) /* Nothing there */
1856 return 0;
1857
Suzuki K Poulosebbb3b6b2016-03-01 12:00:39 +00001858 if (pmd_thp_or_huge(*pmd)) /* THP, HugeTLB */
Marc Zyngier35307b92015-03-12 18:16:51 +00001859 return pmd_young(*pmd);
1860
1861 pte = pte_offset_kernel(pmd, gpa);
1862 if (!pte_none(*pte)) /* Just a page... */
1863 return pte_young(*pte);
1864
1865 return 0;
1866}
1867
1868int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
1869{
Suzuki K Poulose7e5a6722017-07-05 09:57:00 +01001870 if (!kvm->arch.pgd)
1871 return 0;
Marc Zyngier35307b92015-03-12 18:16:51 +00001872 trace_kvm_age_hva(start, end);
1873 return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL);
1874}
1875
1876int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
1877{
Suzuki K Poulose7e5a6722017-07-05 09:57:00 +01001878 if (!kvm->arch.pgd)
1879 return 0;
Marc Zyngier35307b92015-03-12 18:16:51 +00001880 trace_kvm_test_age_hva(hva);
1881 return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL);
1882}
1883
Christoffer Dalld5d81842013-01-20 18:28:07 -05001884void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
1885{
1886 mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
1887}
1888
Christoffer Dall342cd0a2013-01-20 18:28:06 -05001889phys_addr_t kvm_mmu_get_httbr(void)
1890{
Ard Biesheuvele4c5a682015-03-19 16:42:28 +00001891 if (__kvm_cpu_uses_extended_idmap())
1892 return virt_to_phys(merged_hyp_pgd);
1893 else
1894 return virt_to_phys(hyp_pgd);
Christoffer Dall342cd0a2013-01-20 18:28:06 -05001895}
1896
Marc Zyngier5a677ce2013-04-12 19:12:06 +01001897phys_addr_t kvm_get_idmap_vector(void)
1898{
1899 return hyp_idmap_vector;
1900}
1901
Marc Zyngier0535a3e2016-06-30 18:40:43 +01001902static int kvm_map_idmap_text(pgd_t *pgd)
1903{
1904 int err;
1905
1906 /* Create the idmap in the boot page tables */
Kristina Martsenko98732d12018-01-15 15:23:49 +00001907 err = __create_hyp_mappings(pgd, __kvm_idmap_ptrs_per_pgd(),
Marc Zyngier0535a3e2016-06-30 18:40:43 +01001908 hyp_idmap_start, hyp_idmap_end,
1909 __phys_to_pfn(hyp_idmap_start),
1910 PAGE_HYP_EXEC);
1911 if (err)
1912 kvm_err("Failed to idmap %lx-%lx\n",
1913 hyp_idmap_start, hyp_idmap_end);
1914
1915 return err;
1916}
1917
Christoffer Dall342cd0a2013-01-20 18:28:06 -05001918int kvm_mmu_init(void)
1919{
Marc Zyngier2fb41052013-04-12 19:12:03 +01001920 int err;
1921
Santosh Shilimkar4fda3422013-11-19 14:59:12 -05001922 hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start);
Marc Zyngier46fef152018-03-12 14:25:10 +00001923 hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE);
Santosh Shilimkar4fda3422013-11-19 14:59:12 -05001924 hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end);
Marc Zyngier46fef152018-03-12 14:25:10 +00001925 hyp_idmap_end = ALIGN(hyp_idmap_end, PAGE_SIZE);
Santosh Shilimkar4fda3422013-11-19 14:59:12 -05001926 hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init);
Marc Zyngier5a677ce2013-04-12 19:12:06 +01001927
Ard Biesheuvel06f75a12015-03-19 16:42:26 +00001928 /*
1929 * We rely on the linker script to ensure at build time that the HYP
1930 * init code does not cross a page boundary.
1931 */
1932 BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK);
Marc Zyngier5a677ce2013-04-12 19:12:06 +01001933
Marc Zyngierb4ef0492017-12-03 20:04:51 +00001934 kvm_debug("IDMAP page: %lx\n", hyp_idmap_start);
1935 kvm_debug("HYP VA range: %lx:%lx\n",
1936 kern_hyp_va(PAGE_OFFSET),
1937 kern_hyp_va((unsigned long)high_memory - 1));
Marc Zyngiereac378a2016-06-30 18:40:50 +01001938
Marc Zyngier6c41a412016-06-30 18:40:51 +01001939 if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) &&
Marc Zyngiered57cac2017-12-03 18:22:49 +00001940 hyp_idmap_start < kern_hyp_va((unsigned long)high_memory - 1) &&
Marc Zyngierd2896d42016-08-22 09:01:17 +01001941 hyp_idmap_start != (unsigned long)__hyp_idmap_text_start) {
Marc Zyngiereac378a2016-06-30 18:40:50 +01001942 /*
1943 * The idmap page is intersecting with the VA space,
1944 * it is not safe to continue further.
1945 */
1946 kvm_err("IDMAP intersecting with HYP VA, unable to continue\n");
1947 err = -EINVAL;
1948 goto out;
1949 }
1950
Christoffer Dall38f791a2014-10-10 12:14:28 +02001951 hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
Marc Zyngier0535a3e2016-06-30 18:40:43 +01001952 if (!hyp_pgd) {
Christoffer Dalld5d81842013-01-20 18:28:07 -05001953 kvm_err("Hyp mode PGD not allocated\n");
Marc Zyngier2fb41052013-04-12 19:12:03 +01001954 err = -ENOMEM;
1955 goto out;
1956 }
1957
Ard Biesheuvele4c5a682015-03-19 16:42:28 +00001958 if (__kvm_cpu_uses_extended_idmap()) {
Marc Zyngier0535a3e2016-06-30 18:40:43 +01001959 boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
1960 hyp_pgd_order);
1961 if (!boot_hyp_pgd) {
1962 kvm_err("Hyp boot PGD not allocated\n");
1963 err = -ENOMEM;
1964 goto out;
1965 }
1966
1967 err = kvm_map_idmap_text(boot_hyp_pgd);
1968 if (err)
1969 goto out;
1970
Ard Biesheuvele4c5a682015-03-19 16:42:28 +00001971 merged_hyp_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
1972 if (!merged_hyp_pgd) {
1973 kvm_err("Failed to allocate extra HYP pgd\n");
1974 goto out;
1975 }
1976 __kvm_extend_hypmap(boot_hyp_pgd, hyp_pgd, merged_hyp_pgd,
1977 hyp_idmap_start);
Marc Zyngier0535a3e2016-06-30 18:40:43 +01001978 } else {
1979 err = kvm_map_idmap_text(hyp_pgd);
1980 if (err)
1981 goto out;
Marc Zyngier5a677ce2013-04-12 19:12:06 +01001982 }
1983
Marc Zyngiere3f019b2017-12-04 17:04:38 +00001984 io_map_base = hyp_idmap_start;
Christoffer Dalld5d81842013-01-20 18:28:07 -05001985 return 0;
Marc Zyngier2fb41052013-04-12 19:12:03 +01001986out:
Marc Zyngier4f728272013-04-12 19:12:05 +01001987 free_hyp_pgds();
Marc Zyngier2fb41052013-04-12 19:12:03 +01001988 return err;
Christoffer Dall342cd0a2013-01-20 18:28:06 -05001989}
Eric Augerdf6ce242014-06-06 11:10:23 +02001990
1991void kvm_arch_commit_memory_region(struct kvm *kvm,
Paolo Bonzini09170a42015-05-18 13:59:39 +02001992 const struct kvm_userspace_memory_region *mem,
Eric Augerdf6ce242014-06-06 11:10:23 +02001993 const struct kvm_memory_slot *old,
Paolo Bonzinif36f3f22015-05-18 13:20:23 +02001994 const struct kvm_memory_slot *new,
Eric Augerdf6ce242014-06-06 11:10:23 +02001995 enum kvm_mr_change change)
1996{
Mario Smarduchc6473552015-01-15 15:58:56 -08001997 /*
1998 * At this point memslot has been committed and there is an
1999 * allocated dirty_bitmap[], dirty pages will be be tracked while the
2000 * memory slot is write protected.
2001 */
2002 if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES)
2003 kvm_mmu_wp_memory_region(kvm, mem->slot);
Eric Augerdf6ce242014-06-06 11:10:23 +02002004}
2005
2006int kvm_arch_prepare_memory_region(struct kvm *kvm,
2007 struct kvm_memory_slot *memslot,
Paolo Bonzini09170a42015-05-18 13:59:39 +02002008 const struct kvm_userspace_memory_region *mem,
Eric Augerdf6ce242014-06-06 11:10:23 +02002009 enum kvm_mr_change change)
2010{
Ard Biesheuvel8eef9122014-10-10 17:00:32 +02002011 hva_t hva = mem->userspace_addr;
2012 hva_t reg_end = hva + mem->memory_size;
2013 bool writable = !(mem->flags & KVM_MEM_READONLY);
2014 int ret = 0;
2015
Mario Smarduch15a49a42015-01-15 15:58:58 -08002016 if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
2017 change != KVM_MR_FLAGS_ONLY)
Ard Biesheuvel8eef9122014-10-10 17:00:32 +02002018 return 0;
2019
2020 /*
Christoffer Dallc3058d52014-10-10 12:14:29 +02002021 * Prevent userspace from creating a memory region outside of the IPA
2022 * space addressable by the KVM guest IPA space.
2023 */
2024 if (memslot->base_gfn + memslot->npages >=
2025 (KVM_PHYS_SIZE >> PAGE_SHIFT))
2026 return -EFAULT;
2027
Marc Zyngier72f31042017-03-16 18:20:50 +00002028 down_read(&current->mm->mmap_sem);
Christoffer Dallc3058d52014-10-10 12:14:29 +02002029 /*
Ard Biesheuvel8eef9122014-10-10 17:00:32 +02002030 * A memory region could potentially cover multiple VMAs, and any holes
2031 * between them, so iterate over all of them to find out if we can map
2032 * any of them right now.
2033 *
2034 * +--------------------------------------------+
2035 * +---------------+----------------+ +----------------+
2036 * | : VMA 1 | VMA 2 | | VMA 3 : |
2037 * +---------------+----------------+ +----------------+
2038 * | memory region |
2039 * +--------------------------------------------+
2040 */
2041 do {
2042 struct vm_area_struct *vma = find_vma(current->mm, hva);
2043 hva_t vm_start, vm_end;
2044
2045 if (!vma || vma->vm_start >= reg_end)
2046 break;
2047
2048 /*
2049 * Mapping a read-only VMA is only allowed if the
2050 * memory region is configured as read-only.
2051 */
2052 if (writable && !(vma->vm_flags & VM_WRITE)) {
2053 ret = -EPERM;
2054 break;
2055 }
2056
2057 /*
2058 * Take the intersection of this VMA with the memory region
2059 */
2060 vm_start = max(hva, vma->vm_start);
2061 vm_end = min(reg_end, vma->vm_end);
2062
2063 if (vma->vm_flags & VM_PFNMAP) {
2064 gpa_t gpa = mem->guest_phys_addr +
2065 (vm_start - mem->userspace_addr);
Marek Majtykaca09f022015-09-16 12:04:55 +02002066 phys_addr_t pa;
2067
2068 pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
2069 pa += vm_start - vma->vm_start;
Ard Biesheuvel8eef9122014-10-10 17:00:32 +02002070
Mario Smarduch15a49a42015-01-15 15:58:58 -08002071 /* IO region dirty page logging not allowed */
Marc Zyngier72f31042017-03-16 18:20:50 +00002072 if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
2073 ret = -EINVAL;
2074 goto out;
2075 }
Mario Smarduch15a49a42015-01-15 15:58:58 -08002076
Ard Biesheuvel8eef9122014-10-10 17:00:32 +02002077 ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
2078 vm_end - vm_start,
2079 writable);
2080 if (ret)
2081 break;
2082 }
2083 hva = vm_end;
2084 } while (hva < reg_end);
2085
Mario Smarduch15a49a42015-01-15 15:58:58 -08002086 if (change == KVM_MR_FLAGS_ONLY)
Marc Zyngier72f31042017-03-16 18:20:50 +00002087 goto out;
Mario Smarduch15a49a42015-01-15 15:58:58 -08002088
Ard Biesheuvel849260c2014-11-17 14:58:53 +00002089 spin_lock(&kvm->mmu_lock);
2090 if (ret)
Ard Biesheuvel8eef9122014-10-10 17:00:32 +02002091 unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
Ard Biesheuvel849260c2014-11-17 14:58:53 +00002092 else
2093 stage2_flush_memslot(kvm, memslot);
2094 spin_unlock(&kvm->mmu_lock);
Marc Zyngier72f31042017-03-16 18:20:50 +00002095out:
2096 up_read(&current->mm->mmap_sem);
Ard Biesheuvel8eef9122014-10-10 17:00:32 +02002097 return ret;
Eric Augerdf6ce242014-06-06 11:10:23 +02002098}
2099
2100void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
2101 struct kvm_memory_slot *dont)
2102{
2103}
2104
2105int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2106 unsigned long npages)
2107{
2108 return 0;
2109}
2110
Paolo Bonzini15f46012015-05-17 21:26:08 +02002111void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots)
Eric Augerdf6ce242014-06-06 11:10:23 +02002112{
2113}
2114
2115void kvm_arch_flush_shadow_all(struct kvm *kvm)
2116{
Suzuki K Poulose293f2932016-09-08 16:25:49 +01002117 kvm_free_stage2_pgd(kvm);
Eric Augerdf6ce242014-06-06 11:10:23 +02002118}
2119
2120void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
2121 struct kvm_memory_slot *slot)
2122{
Ard Biesheuvel8eef9122014-10-10 17:00:32 +02002123 gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
2124 phys_addr_t size = slot->npages << PAGE_SHIFT;
2125
2126 spin_lock(&kvm->mmu_lock);
2127 unmap_stage2_range(kvm, gpa, size);
2128 spin_unlock(&kvm->mmu_lock);
Eric Augerdf6ce242014-06-06 11:10:23 +02002129}
Marc Zyngier3c1e7162014-12-19 16:05:31 +00002130
2131/*
2132 * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
2133 *
2134 * Main problems:
2135 * - S/W ops are local to a CPU (not broadcast)
2136 * - We have line migration behind our back (speculation)
2137 * - System caches don't support S/W at all (damn!)
2138 *
2139 * In the face of the above, the best we can do is to try and convert
2140 * S/W ops to VA ops. Because the guest is not allowed to infer the
2141 * S/W to PA mapping, it can only use S/W to nuke the whole cache,
2142 * which is a rather good thing for us.
2143 *
2144 * Also, it is only used when turning caches on/off ("The expected
2145 * usage of the cache maintenance instructions that operate by set/way
2146 * is associated with the cache maintenance instructions associated
2147 * with the powerdown and powerup of caches, if this is required by
2148 * the implementation.").
2149 *
2150 * We use the following policy:
2151 *
2152 * - If we trap a S/W operation, we enable VM trapping to detect
2153 * caches being turned on/off, and do a full clean.
2154 *
2155 * - We flush the caches on both caches being turned on and off.
2156 *
2157 * - Once the caches are enabled, we stop trapping VM ops.
2158 */
2159void kvm_set_way_flush(struct kvm_vcpu *vcpu)
2160{
Christoffer Dall3df59d82017-08-03 12:09:05 +02002161 unsigned long hcr = *vcpu_hcr(vcpu);
Marc Zyngier3c1e7162014-12-19 16:05:31 +00002162
2163 /*
2164 * If this is the first time we do a S/W operation
2165 * (i.e. HCR_TVM not set) flush the whole memory, and set the
2166 * VM trapping.
2167 *
2168 * Otherwise, rely on the VM trapping to wait for the MMU +
2169 * Caches to be turned off. At that point, we'll be able to
2170 * clean the caches again.
2171 */
2172 if (!(hcr & HCR_TVM)) {
2173 trace_kvm_set_way_flush(*vcpu_pc(vcpu),
2174 vcpu_has_cache_enabled(vcpu));
2175 stage2_flush_vm(vcpu->kvm);
Christoffer Dall3df59d82017-08-03 12:09:05 +02002176 *vcpu_hcr(vcpu) = hcr | HCR_TVM;
Marc Zyngier3c1e7162014-12-19 16:05:31 +00002177 }
2178}
2179
2180void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled)
2181{
2182 bool now_enabled = vcpu_has_cache_enabled(vcpu);
2183
2184 /*
2185 * If switching the MMU+caches on, need to invalidate the caches.
2186 * If switching it off, need to clean the caches.
2187 * Clean + invalidate does the trick always.
2188 */
2189 if (now_enabled != was_enabled)
2190 stage2_flush_vm(vcpu->kvm);
2191
2192 /* Caches are now on, stop trapping VM ops (until a S/W op) */
2193 if (now_enabled)
Christoffer Dall3df59d82017-08-03 12:09:05 +02002194 *vcpu_hcr(vcpu) &= ~HCR_TVM;
Marc Zyngier3c1e7162014-12-19 16:05:31 +00002195
2196 trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled);
2197}