blob: dc8ccc14241d253daefd36758a1b8b9b2e6880ce [file] [log] [blame]
Joerg Roedeleaf78262020-03-24 10:41:54 +01001// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Kernel-based Virtual Machine driver for Linux
4 *
5 * AMD SVM-SEV support
6 *
7 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
8 */
9
10#include <linux/kvm_types.h>
11#include <linux/kvm_host.h>
12#include <linux/kernel.h>
13#include <linux/highmem.h>
14#include <linux/psp-sev.h>
Borislav Petkovb2bce0a2020-04-11 18:09:27 +020015#include <linux/pagemap.h>
Joerg Roedeleaf78262020-03-24 10:41:54 +010016#include <linux/swap.h>
Tom Lendackyadd5e2f2020-12-10 11:09:40 -060017#include <linux/processor.h>
Tom Lendackyd523ab6b2020-12-10 11:09:48 -060018#include <linux/trace_events.h>
Joerg Roedeleaf78262020-03-24 10:41:54 +010019
20#include "x86.h"
21#include "svm.h"
Tom Lendacky291bd202020-12-10 11:09:47 -060022#include "cpuid.h"
Tom Lendackyd523ab6b2020-12-10 11:09:48 -060023#include "trace.h"
Joerg Roedeleaf78262020-03-24 10:41:54 +010024
Tom Lendacky1edc1452020-12-10 11:09:49 -060025static u8 sev_enc_bit;
Joerg Roedeleaf78262020-03-24 10:41:54 +010026static int sev_flush_asids(void);
27static DECLARE_RWSEM(sev_deactivate_lock);
28static DEFINE_MUTEX(sev_bitmap_lock);
29unsigned int max_sev_asid;
30static unsigned int min_sev_asid;
31static unsigned long *sev_asid_bitmap;
32static unsigned long *sev_reclaim_asid_bitmap;
33#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
34
35struct enc_region {
36 struct list_head list;
37 unsigned long npages;
38 struct page **pages;
39 unsigned long uaddr;
40 unsigned long size;
41};
42
43static int sev_flush_asids(void)
44{
45 int ret, error = 0;
46
47 /*
48 * DEACTIVATE will clear the WBINVD indicator causing DF_FLUSH to fail,
49 * so it must be guarded.
50 */
51 down_write(&sev_deactivate_lock);
52
53 wbinvd_on_all_cpus();
54 ret = sev_guest_df_flush(&error);
55
56 up_write(&sev_deactivate_lock);
57
58 if (ret)
59 pr_err("SEV: DF_FLUSH failed, ret=%d, error=%#x\n", ret, error);
60
61 return ret;
62}
63
64/* Must be called with the sev_bitmap_lock held */
65static bool __sev_recycle_asids(void)
66{
67 int pos;
68
69 /* Check if there are any ASIDs to reclaim before performing a flush */
70 pos = find_next_bit(sev_reclaim_asid_bitmap,
71 max_sev_asid, min_sev_asid - 1);
72 if (pos >= max_sev_asid)
73 return false;
74
75 if (sev_flush_asids())
76 return false;
77
78 bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap,
79 max_sev_asid);
80 bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid);
81
82 return true;
83}
84
85static int sev_asid_new(void)
86{
87 bool retry = true;
88 int pos;
89
90 mutex_lock(&sev_bitmap_lock);
91
92 /*
93 * SEV-enabled guest must use asid from min_sev_asid to max_sev_asid.
94 */
95again:
96 pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1);
97 if (pos >= max_sev_asid) {
98 if (retry && __sev_recycle_asids()) {
99 retry = false;
100 goto again;
101 }
102 mutex_unlock(&sev_bitmap_lock);
103 return -EBUSY;
104 }
105
106 __set_bit(pos, sev_asid_bitmap);
107
108 mutex_unlock(&sev_bitmap_lock);
109
110 return pos + 1;
111}
112
113static int sev_get_asid(struct kvm *kvm)
114{
115 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
116
117 return sev->asid;
118}
119
120static void sev_asid_free(int asid)
121{
122 struct svm_cpu_data *sd;
123 int cpu, pos;
124
125 mutex_lock(&sev_bitmap_lock);
126
127 pos = asid - 1;
128 __set_bit(pos, sev_reclaim_asid_bitmap);
129
130 for_each_possible_cpu(cpu) {
131 sd = per_cpu(svm_data, cpu);
132 sd->sev_vmcbs[pos] = NULL;
133 }
134
135 mutex_unlock(&sev_bitmap_lock);
136}
137
138static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
139{
140 struct sev_data_decommission *decommission;
141 struct sev_data_deactivate *data;
142
143 if (!handle)
144 return;
145
146 data = kzalloc(sizeof(*data), GFP_KERNEL);
147 if (!data)
148 return;
149
150 /* deactivate handle */
151 data->handle = handle;
152
153 /* Guard DEACTIVATE against WBINVD/DF_FLUSH used in ASID recycling */
154 down_read(&sev_deactivate_lock);
155 sev_guest_deactivate(data, NULL);
156 up_read(&sev_deactivate_lock);
157
158 kfree(data);
159
160 decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
161 if (!decommission)
162 return;
163
164 /* decommission handle */
165 decommission->handle = handle;
166 sev_guest_decommission(decommission, NULL);
167
168 kfree(decommission);
169}
170
171static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
172{
173 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
174 int asid, ret;
175
176 ret = -EBUSY;
177 if (unlikely(sev->active))
178 return ret;
179
180 asid = sev_asid_new();
181 if (asid < 0)
182 return ret;
183
184 ret = sev_platform_init(&argp->error);
185 if (ret)
186 goto e_free;
187
188 sev->active = true;
189 sev->asid = asid;
190 INIT_LIST_HEAD(&sev->regions_list);
191
192 return 0;
193
194e_free:
195 sev_asid_free(asid);
196 return ret;
197}
198
199static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
200{
201 struct sev_data_activate *data;
202 int asid = sev_get_asid(kvm);
203 int ret;
204
205 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
206 if (!data)
207 return -ENOMEM;
208
209 /* activate ASID on the given handle */
210 data->handle = handle;
211 data->asid = asid;
212 ret = sev_guest_activate(data, error);
213 kfree(data);
214
215 return ret;
216}
217
218static int __sev_issue_cmd(int fd, int id, void *data, int *error)
219{
220 struct fd f;
221 int ret;
222
223 f = fdget(fd);
224 if (!f.file)
225 return -EBADF;
226
227 ret = sev_issue_cmd_external_user(f.file, id, data, error);
228
229 fdput(f);
230 return ret;
231}
232
233static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error)
234{
235 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
236
237 return __sev_issue_cmd(sev->fd, id, data, error);
238}
239
240static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
241{
242 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
243 struct sev_data_launch_start *start;
244 struct kvm_sev_launch_start params;
245 void *dh_blob, *session_blob;
246 int *error = &argp->error;
247 int ret;
248
249 if (!sev_guest(kvm))
250 return -ENOTTY;
251
252 if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
253 return -EFAULT;
254
255 start = kzalloc(sizeof(*start), GFP_KERNEL_ACCOUNT);
256 if (!start)
257 return -ENOMEM;
258
259 dh_blob = NULL;
260 if (params.dh_uaddr) {
261 dh_blob = psp_copy_user_blob(params.dh_uaddr, params.dh_len);
262 if (IS_ERR(dh_blob)) {
263 ret = PTR_ERR(dh_blob);
264 goto e_free;
265 }
266
267 start->dh_cert_address = __sme_set(__pa(dh_blob));
268 start->dh_cert_len = params.dh_len;
269 }
270
271 session_blob = NULL;
272 if (params.session_uaddr) {
273 session_blob = psp_copy_user_blob(params.session_uaddr, params.session_len);
274 if (IS_ERR(session_blob)) {
275 ret = PTR_ERR(session_blob);
276 goto e_free_dh;
277 }
278
279 start->session_address = __sme_set(__pa(session_blob));
280 start->session_len = params.session_len;
281 }
282
283 start->handle = params.handle;
284 start->policy = params.policy;
285
286 /* create memory encryption context */
287 ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, start, error);
288 if (ret)
289 goto e_free_session;
290
291 /* Bind ASID to this guest */
292 ret = sev_bind_asid(kvm, start->handle, error);
293 if (ret)
294 goto e_free_session;
295
296 /* return handle to userspace */
297 params.handle = start->handle;
298 if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params))) {
299 sev_unbind_asid(kvm, start->handle);
300 ret = -EFAULT;
301 goto e_free_session;
302 }
303
304 sev->handle = start->handle;
305 sev->fd = argp->sev_fd;
306
307e_free_session:
308 kfree(session_blob);
309e_free_dh:
310 kfree(dh_blob);
311e_free:
312 kfree(start);
313 return ret;
314}
315
316static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
317 unsigned long ulen, unsigned long *n,
318 int write)
319{
320 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
John Hubbard78824fa2020-05-25 23:22:06 -0700321 unsigned long npages, size;
322 int npinned;
Joerg Roedeleaf78262020-03-24 10:41:54 +0100323 unsigned long locked, lock_limit;
324 struct page **pages;
325 unsigned long first, last;
Dan Carpenterff2bd9f2020-07-14 17:23:51 +0300326 int ret;
Joerg Roedeleaf78262020-03-24 10:41:54 +0100327
328 if (ulen == 0 || uaddr + ulen < uaddr)
Paolo Bonzinia8d908b2020-06-23 05:12:24 -0400329 return ERR_PTR(-EINVAL);
Joerg Roedeleaf78262020-03-24 10:41:54 +0100330
331 /* Calculate number of pages. */
332 first = (uaddr & PAGE_MASK) >> PAGE_SHIFT;
333 last = ((uaddr + ulen - 1) & PAGE_MASK) >> PAGE_SHIFT;
334 npages = (last - first + 1);
335
336 locked = sev->pages_locked + npages;
337 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
338 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
339 pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked, lock_limit);
Paolo Bonzinia8d908b2020-06-23 05:12:24 -0400340 return ERR_PTR(-ENOMEM);
Joerg Roedeleaf78262020-03-24 10:41:54 +0100341 }
342
John Hubbard78824fa2020-05-25 23:22:06 -0700343 if (WARN_ON_ONCE(npages > INT_MAX))
Paolo Bonzinia8d908b2020-06-23 05:12:24 -0400344 return ERR_PTR(-EINVAL);
John Hubbard78824fa2020-05-25 23:22:06 -0700345
Joerg Roedeleaf78262020-03-24 10:41:54 +0100346 /* Avoid using vmalloc for smaller buffers. */
347 size = npages * sizeof(struct page *);
348 if (size > PAGE_SIZE)
Christoph Hellwig88dca4c2020-06-01 21:51:40 -0700349 pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
Joerg Roedeleaf78262020-03-24 10:41:54 +0100350 else
351 pages = kmalloc(size, GFP_KERNEL_ACCOUNT);
352
353 if (!pages)
Paolo Bonzinia8d908b2020-06-23 05:12:24 -0400354 return ERR_PTR(-ENOMEM);
Joerg Roedeleaf78262020-03-24 10:41:54 +0100355
356 /* Pin the user virtual address. */
John Hubbarddc42c8a2020-05-25 23:22:07 -0700357 npinned = pin_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
Joerg Roedeleaf78262020-03-24 10:41:54 +0100358 if (npinned != npages) {
359 pr_err("SEV: Failure locking %lu pages.\n", npages);
Dan Carpenterff2bd9f2020-07-14 17:23:51 +0300360 ret = -ENOMEM;
Joerg Roedeleaf78262020-03-24 10:41:54 +0100361 goto err;
362 }
363
364 *n = npages;
365 sev->pages_locked = locked;
366
367 return pages;
368
369err:
Dan Carpenterff2bd9f2020-07-14 17:23:51 +0300370 if (npinned > 0)
John Hubbarddc42c8a2020-05-25 23:22:07 -0700371 unpin_user_pages(pages, npinned);
Joerg Roedeleaf78262020-03-24 10:41:54 +0100372
373 kvfree(pages);
Dan Carpenterff2bd9f2020-07-14 17:23:51 +0300374 return ERR_PTR(ret);
Joerg Roedeleaf78262020-03-24 10:41:54 +0100375}
376
377static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
378 unsigned long npages)
379{
380 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
381
John Hubbarddc42c8a2020-05-25 23:22:07 -0700382 unpin_user_pages(pages, npages);
Joerg Roedeleaf78262020-03-24 10:41:54 +0100383 kvfree(pages);
384 sev->pages_locked -= npages;
385}
386
387static void sev_clflush_pages(struct page *pages[], unsigned long npages)
388{
389 uint8_t *page_virtual;
390 unsigned long i;
391
Krish Sadhukhane1ebb2b2020-09-17 21:20:38 +0000392 if (this_cpu_has(X86_FEATURE_SME_COHERENT) || npages == 0 ||
393 pages == NULL)
Joerg Roedeleaf78262020-03-24 10:41:54 +0100394 return;
395
396 for (i = 0; i < npages; i++) {
397 page_virtual = kmap_atomic(pages[i]);
398 clflush_cache_range(page_virtual, PAGE_SIZE);
399 kunmap_atomic(page_virtual);
400 }
401}
402
403static unsigned long get_num_contig_pages(unsigned long idx,
404 struct page **inpages, unsigned long npages)
405{
406 unsigned long paddr, next_paddr;
407 unsigned long i = idx + 1, pages = 1;
408
409 /* find the number of contiguous pages starting from idx */
410 paddr = __sme_page_pa(inpages[idx]);
411 while (i < npages) {
412 next_paddr = __sme_page_pa(inpages[i++]);
413 if ((paddr + PAGE_SIZE) == next_paddr) {
414 pages++;
415 paddr = next_paddr;
416 continue;
417 }
418 break;
419 }
420
421 return pages;
422}
423
424static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
425{
426 unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i;
427 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
428 struct kvm_sev_launch_update_data params;
429 struct sev_data_launch_update_data *data;
430 struct page **inpages;
431 int ret;
432
433 if (!sev_guest(kvm))
434 return -ENOTTY;
435
436 if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
437 return -EFAULT;
438
439 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
440 if (!data)
441 return -ENOMEM;
442
443 vaddr = params.uaddr;
444 size = params.len;
445 vaddr_end = vaddr + size;
446
447 /* Lock the user memory. */
448 inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
Dan Carpenterff2bd9f2020-07-14 17:23:51 +0300449 if (IS_ERR(inpages)) {
450 ret = PTR_ERR(inpages);
Joerg Roedeleaf78262020-03-24 10:41:54 +0100451 goto e_free;
452 }
453
454 /*
Paolo Bonzini14e3dd82020-09-23 13:01:33 -0400455 * Flush (on non-coherent CPUs) before LAUNCH_UPDATE encrypts pages in
456 * place; the cache may contain the data that was written unencrypted.
Joerg Roedeleaf78262020-03-24 10:41:54 +0100457 */
458 sev_clflush_pages(inpages, npages);
459
460 for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) {
461 int offset, len;
462
463 /*
464 * If the user buffer is not page-aligned, calculate the offset
465 * within the page.
466 */
467 offset = vaddr & (PAGE_SIZE - 1);
468
469 /* Calculate the number of pages that can be encrypted in one go. */
470 pages = get_num_contig_pages(i, inpages, npages);
471
472 len = min_t(size_t, ((pages * PAGE_SIZE) - offset), size);
473
474 data->handle = sev->handle;
475 data->len = len;
476 data->address = __sme_page_pa(inpages[i]) + offset;
477 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, data, &argp->error);
478 if (ret)
479 goto e_unpin;
480
481 size -= len;
482 next_vaddr = vaddr + len;
483 }
484
485e_unpin:
486 /* content of memory is updated, mark pages dirty */
487 for (i = 0; i < npages; i++) {
488 set_page_dirty_lock(inpages[i]);
489 mark_page_accessed(inpages[i]);
490 }
491 /* unlock the user pages */
492 sev_unpin_memory(kvm, inpages, npages);
493e_free:
494 kfree(data);
495 return ret;
496}
497
498static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
499{
500 void __user *measure = (void __user *)(uintptr_t)argp->data;
501 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
502 struct sev_data_launch_measure *data;
503 struct kvm_sev_launch_measure params;
504 void __user *p = NULL;
505 void *blob = NULL;
506 int ret;
507
508 if (!sev_guest(kvm))
509 return -ENOTTY;
510
511 if (copy_from_user(&params, measure, sizeof(params)))
512 return -EFAULT;
513
514 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
515 if (!data)
516 return -ENOMEM;
517
518 /* User wants to query the blob length */
519 if (!params.len)
520 goto cmd;
521
522 p = (void __user *)(uintptr_t)params.uaddr;
523 if (p) {
524 if (params.len > SEV_FW_BLOB_MAX_SIZE) {
525 ret = -EINVAL;
526 goto e_free;
527 }
528
529 ret = -ENOMEM;
530 blob = kmalloc(params.len, GFP_KERNEL);
531 if (!blob)
532 goto e_free;
533
534 data->address = __psp_pa(blob);
535 data->len = params.len;
536 }
537
538cmd:
539 data->handle = sev->handle;
540 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, data, &argp->error);
541
542 /*
543 * If we query the session length, FW responded with expected data.
544 */
545 if (!params.len)
546 goto done;
547
548 if (ret)
549 goto e_free_blob;
550
551 if (blob) {
552 if (copy_to_user(p, blob, params.len))
553 ret = -EFAULT;
554 }
555
556done:
557 params.len = data->len;
558 if (copy_to_user(measure, &params, sizeof(params)))
559 ret = -EFAULT;
560e_free_blob:
561 kfree(blob);
562e_free:
563 kfree(data);
564 return ret;
565}
566
567static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
568{
569 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
570 struct sev_data_launch_finish *data;
571 int ret;
572
573 if (!sev_guest(kvm))
574 return -ENOTTY;
575
576 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
577 if (!data)
578 return -ENOMEM;
579
580 data->handle = sev->handle;
581 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, data, &argp->error);
582
583 kfree(data);
584 return ret;
585}
586
587static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
588{
589 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
590 struct kvm_sev_guest_status params;
591 struct sev_data_guest_status *data;
592 int ret;
593
594 if (!sev_guest(kvm))
595 return -ENOTTY;
596
597 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
598 if (!data)
599 return -ENOMEM;
600
601 data->handle = sev->handle;
602 ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, data, &argp->error);
603 if (ret)
604 goto e_free;
605
606 params.policy = data->policy;
607 params.state = data->state;
608 params.handle = data->handle;
609
610 if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
611 ret = -EFAULT;
612e_free:
613 kfree(data);
614 return ret;
615}
616
617static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
618 unsigned long dst, int size,
619 int *error, bool enc)
620{
621 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
622 struct sev_data_dbg *data;
623 int ret;
624
625 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
626 if (!data)
627 return -ENOMEM;
628
629 data->handle = sev->handle;
630 data->dst_addr = dst;
631 data->src_addr = src;
632 data->len = size;
633
634 ret = sev_issue_cmd(kvm,
635 enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT,
636 data, error);
637 kfree(data);
638 return ret;
639}
640
641static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
642 unsigned long dst_paddr, int sz, int *err)
643{
644 int offset;
645
646 /*
647 * Its safe to read more than we are asked, caller should ensure that
648 * destination has enough space.
649 */
650 src_paddr = round_down(src_paddr, 16);
651 offset = src_paddr & 15;
652 sz = round_up(sz + offset, 16);
653
654 return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false);
655}
656
657static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
658 unsigned long __user dst_uaddr,
659 unsigned long dst_paddr,
660 int size, int *err)
661{
662 struct page *tpage = NULL;
663 int ret, offset;
664
665 /* if inputs are not 16-byte then use intermediate buffer */
666 if (!IS_ALIGNED(dst_paddr, 16) ||
667 !IS_ALIGNED(paddr, 16) ||
668 !IS_ALIGNED(size, 16)) {
669 tpage = (void *)alloc_page(GFP_KERNEL);
670 if (!tpage)
671 return -ENOMEM;
672
673 dst_paddr = __sme_page_pa(tpage);
674 }
675
676 ret = __sev_dbg_decrypt(kvm, paddr, dst_paddr, size, err);
677 if (ret)
678 goto e_free;
679
680 if (tpage) {
681 offset = paddr & 15;
682 if (copy_to_user((void __user *)(uintptr_t)dst_uaddr,
683 page_address(tpage) + offset, size))
684 ret = -EFAULT;
685 }
686
687e_free:
688 if (tpage)
689 __free_page(tpage);
690
691 return ret;
692}
693
694static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
695 unsigned long __user vaddr,
696 unsigned long dst_paddr,
697 unsigned long __user dst_vaddr,
698 int size, int *error)
699{
700 struct page *src_tpage = NULL;
701 struct page *dst_tpage = NULL;
702 int ret, len = size;
703
704 /* If source buffer is not aligned then use an intermediate buffer */
705 if (!IS_ALIGNED(vaddr, 16)) {
706 src_tpage = alloc_page(GFP_KERNEL);
707 if (!src_tpage)
708 return -ENOMEM;
709
710 if (copy_from_user(page_address(src_tpage),
711 (void __user *)(uintptr_t)vaddr, size)) {
712 __free_page(src_tpage);
713 return -EFAULT;
714 }
715
716 paddr = __sme_page_pa(src_tpage);
717 }
718
719 /*
720 * If destination buffer or length is not aligned then do read-modify-write:
721 * - decrypt destination in an intermediate buffer
722 * - copy the source buffer in an intermediate buffer
723 * - use the intermediate buffer as source buffer
724 */
725 if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
726 int dst_offset;
727
728 dst_tpage = alloc_page(GFP_KERNEL);
729 if (!dst_tpage) {
730 ret = -ENOMEM;
731 goto e_free;
732 }
733
734 ret = __sev_dbg_decrypt(kvm, dst_paddr,
735 __sme_page_pa(dst_tpage), size, error);
736 if (ret)
737 goto e_free;
738
739 /*
740 * If source is kernel buffer then use memcpy() otherwise
741 * copy_from_user().
742 */
743 dst_offset = dst_paddr & 15;
744
745 if (src_tpage)
746 memcpy(page_address(dst_tpage) + dst_offset,
747 page_address(src_tpage), size);
748 else {
749 if (copy_from_user(page_address(dst_tpage) + dst_offset,
750 (void __user *)(uintptr_t)vaddr, size)) {
751 ret = -EFAULT;
752 goto e_free;
753 }
754 }
755
756 paddr = __sme_page_pa(dst_tpage);
757 dst_paddr = round_down(dst_paddr, 16);
758 len = round_up(size, 16);
759 }
760
761 ret = __sev_issue_dbg_cmd(kvm, paddr, dst_paddr, len, error, true);
762
763e_free:
764 if (src_tpage)
765 __free_page(src_tpage);
766 if (dst_tpage)
767 __free_page(dst_tpage);
768 return ret;
769}
770
771static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
772{
773 unsigned long vaddr, vaddr_end, next_vaddr;
774 unsigned long dst_vaddr;
775 struct page **src_p, **dst_p;
776 struct kvm_sev_dbg debug;
777 unsigned long n;
778 unsigned int size;
779 int ret;
780
781 if (!sev_guest(kvm))
782 return -ENOTTY;
783
784 if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug)))
785 return -EFAULT;
786
787 if (!debug.len || debug.src_uaddr + debug.len < debug.src_uaddr)
788 return -EINVAL;
789 if (!debug.dst_uaddr)
790 return -EINVAL;
791
792 vaddr = debug.src_uaddr;
793 size = debug.len;
794 vaddr_end = vaddr + size;
795 dst_vaddr = debug.dst_uaddr;
796
797 for (; vaddr < vaddr_end; vaddr = next_vaddr) {
798 int len, s_off, d_off;
799
800 /* lock userspace source and destination page */
801 src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0);
Dan Carpenterff2bd9f2020-07-14 17:23:51 +0300802 if (IS_ERR(src_p))
803 return PTR_ERR(src_p);
Joerg Roedeleaf78262020-03-24 10:41:54 +0100804
805 dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1);
Dan Carpenterff2bd9f2020-07-14 17:23:51 +0300806 if (IS_ERR(dst_p)) {
Joerg Roedeleaf78262020-03-24 10:41:54 +0100807 sev_unpin_memory(kvm, src_p, n);
Dan Carpenterff2bd9f2020-07-14 17:23:51 +0300808 return PTR_ERR(dst_p);
Joerg Roedeleaf78262020-03-24 10:41:54 +0100809 }
810
811 /*
Paolo Bonzini14e3dd82020-09-23 13:01:33 -0400812 * Flush (on non-coherent CPUs) before DBG_{DE,EN}CRYPT read or modify
813 * the pages; flush the destination too so that future accesses do not
814 * see stale data.
Joerg Roedeleaf78262020-03-24 10:41:54 +0100815 */
816 sev_clflush_pages(src_p, 1);
817 sev_clflush_pages(dst_p, 1);
818
819 /*
820 * Since user buffer may not be page aligned, calculate the
821 * offset within the page.
822 */
823 s_off = vaddr & ~PAGE_MASK;
824 d_off = dst_vaddr & ~PAGE_MASK;
825 len = min_t(size_t, (PAGE_SIZE - s_off), size);
826
827 if (dec)
828 ret = __sev_dbg_decrypt_user(kvm,
829 __sme_page_pa(src_p[0]) + s_off,
830 dst_vaddr,
831 __sme_page_pa(dst_p[0]) + d_off,
832 len, &argp->error);
833 else
834 ret = __sev_dbg_encrypt_user(kvm,
835 __sme_page_pa(src_p[0]) + s_off,
836 vaddr,
837 __sme_page_pa(dst_p[0]) + d_off,
838 dst_vaddr,
839 len, &argp->error);
840
841 sev_unpin_memory(kvm, src_p, n);
842 sev_unpin_memory(kvm, dst_p, n);
843
844 if (ret)
845 goto err;
846
847 next_vaddr = vaddr + len;
848 dst_vaddr = dst_vaddr + len;
849 size -= len;
850 }
851err:
852 return ret;
853}
854
855static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
856{
857 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
858 struct sev_data_launch_secret *data;
859 struct kvm_sev_launch_secret params;
860 struct page **pages;
861 void *blob, *hdr;
Cfir Cohen50085be2020-08-07 17:37:46 -0700862 unsigned long n, i;
Joerg Roedeleaf78262020-03-24 10:41:54 +0100863 int ret, offset;
864
865 if (!sev_guest(kvm))
866 return -ENOTTY;
867
868 if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
869 return -EFAULT;
870
871 pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1);
Paolo Bonzinia8d908b2020-06-23 05:12:24 -0400872 if (IS_ERR(pages))
873 return PTR_ERR(pages);
Joerg Roedeleaf78262020-03-24 10:41:54 +0100874
875 /*
Paolo Bonzini14e3dd82020-09-23 13:01:33 -0400876 * Flush (on non-coherent CPUs) before LAUNCH_SECRET encrypts pages in
877 * place; the cache may contain the data that was written unencrypted.
Cfir Cohen50085be2020-08-07 17:37:46 -0700878 */
879 sev_clflush_pages(pages, n);
880
881 /*
Joerg Roedeleaf78262020-03-24 10:41:54 +0100882 * The secret must be copied into contiguous memory region, lets verify
883 * that userspace memory pages are contiguous before we issue command.
884 */
885 if (get_num_contig_pages(0, pages, n) != n) {
886 ret = -EINVAL;
887 goto e_unpin_memory;
888 }
889
890 ret = -ENOMEM;
891 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
892 if (!data)
893 goto e_unpin_memory;
894
895 offset = params.guest_uaddr & (PAGE_SIZE - 1);
896 data->guest_address = __sme_page_pa(pages[0]) + offset;
897 data->guest_len = params.guest_len;
898
899 blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
900 if (IS_ERR(blob)) {
901 ret = PTR_ERR(blob);
902 goto e_free;
903 }
904
905 data->trans_address = __psp_pa(blob);
906 data->trans_len = params.trans_len;
907
908 hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len);
909 if (IS_ERR(hdr)) {
910 ret = PTR_ERR(hdr);
911 goto e_free_blob;
912 }
913 data->hdr_address = __psp_pa(hdr);
914 data->hdr_len = params.hdr_len;
915
916 data->handle = sev->handle;
917 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);
918
919 kfree(hdr);
920
921e_free_blob:
922 kfree(blob);
923e_free:
924 kfree(data);
925e_unpin_memory:
Cfir Cohen50085be2020-08-07 17:37:46 -0700926 /* content of memory is updated, mark pages dirty */
927 for (i = 0; i < n; i++) {
928 set_page_dirty_lock(pages[i]);
929 mark_page_accessed(pages[i]);
930 }
Joerg Roedeleaf78262020-03-24 10:41:54 +0100931 sev_unpin_memory(kvm, pages, n);
932 return ret;
933}
934
935int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
936{
937 struct kvm_sev_cmd sev_cmd;
938 int r;
939
Tom Lendacky916391a2020-12-10 11:09:38 -0600940 if (!svm_sev_enabled() || !sev)
Joerg Roedeleaf78262020-03-24 10:41:54 +0100941 return -ENOTTY;
942
943 if (!argp)
944 return 0;
945
946 if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd)))
947 return -EFAULT;
948
949 mutex_lock(&kvm->lock);
950
951 switch (sev_cmd.id) {
952 case KVM_SEV_INIT:
953 r = sev_guest_init(kvm, &sev_cmd);
954 break;
955 case KVM_SEV_LAUNCH_START:
956 r = sev_launch_start(kvm, &sev_cmd);
957 break;
958 case KVM_SEV_LAUNCH_UPDATE_DATA:
959 r = sev_launch_update_data(kvm, &sev_cmd);
960 break;
961 case KVM_SEV_LAUNCH_MEASURE:
962 r = sev_launch_measure(kvm, &sev_cmd);
963 break;
964 case KVM_SEV_LAUNCH_FINISH:
965 r = sev_launch_finish(kvm, &sev_cmd);
966 break;
967 case KVM_SEV_GUEST_STATUS:
968 r = sev_guest_status(kvm, &sev_cmd);
969 break;
970 case KVM_SEV_DBG_DECRYPT:
971 r = sev_dbg_crypt(kvm, &sev_cmd, true);
972 break;
973 case KVM_SEV_DBG_ENCRYPT:
974 r = sev_dbg_crypt(kvm, &sev_cmd, false);
975 break;
976 case KVM_SEV_LAUNCH_SECRET:
977 r = sev_launch_secret(kvm, &sev_cmd);
978 break;
979 default:
980 r = -EINVAL;
981 goto out;
982 }
983
984 if (copy_to_user(argp, &sev_cmd, sizeof(struct kvm_sev_cmd)))
985 r = -EFAULT;
986
987out:
988 mutex_unlock(&kvm->lock);
989 return r;
990}
991
992int svm_register_enc_region(struct kvm *kvm,
993 struct kvm_enc_region *range)
994{
995 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
996 struct enc_region *region;
997 int ret = 0;
998
999 if (!sev_guest(kvm))
1000 return -ENOTTY;
1001
1002 if (range->addr > ULONG_MAX || range->size > ULONG_MAX)
1003 return -EINVAL;
1004
1005 region = kzalloc(sizeof(*region), GFP_KERNEL_ACCOUNT);
1006 if (!region)
1007 return -ENOMEM;
1008
1009 region->pages = sev_pin_memory(kvm, range->addr, range->size, &region->npages, 1);
Paolo Bonzinia8d908b2020-06-23 05:12:24 -04001010 if (IS_ERR(region->pages)) {
1011 ret = PTR_ERR(region->pages);
Joerg Roedeleaf78262020-03-24 10:41:54 +01001012 goto e_free;
1013 }
1014
1015 /*
1016 * The guest may change the memory encryption attribute from C=0 -> C=1
1017 * or vice versa for this memory range. Lets make sure caches are
1018 * flushed to ensure that guest data gets written into memory with
1019 * correct C-bit.
1020 */
1021 sev_clflush_pages(region->pages, region->npages);
1022
1023 region->uaddr = range->addr;
1024 region->size = range->size;
1025
1026 mutex_lock(&kvm->lock);
1027 list_add_tail(&region->list, &sev->regions_list);
1028 mutex_unlock(&kvm->lock);
1029
1030 return ret;
1031
1032e_free:
1033 kfree(region);
1034 return ret;
1035}
1036
1037static struct enc_region *
1038find_enc_region(struct kvm *kvm, struct kvm_enc_region *range)
1039{
1040 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1041 struct list_head *head = &sev->regions_list;
1042 struct enc_region *i;
1043
1044 list_for_each_entry(i, head, list) {
1045 if (i->uaddr == range->addr &&
1046 i->size == range->size)
1047 return i;
1048 }
1049
1050 return NULL;
1051}
1052
1053static void __unregister_enc_region_locked(struct kvm *kvm,
1054 struct enc_region *region)
1055{
1056 sev_unpin_memory(kvm, region->pages, region->npages);
1057 list_del(&region->list);
1058 kfree(region);
1059}
1060
1061int svm_unregister_enc_region(struct kvm *kvm,
1062 struct kvm_enc_region *range)
1063{
1064 struct enc_region *region;
1065 int ret;
1066
1067 mutex_lock(&kvm->lock);
1068
1069 if (!sev_guest(kvm)) {
1070 ret = -ENOTTY;
1071 goto failed;
1072 }
1073
1074 region = find_enc_region(kvm, range);
1075 if (!region) {
1076 ret = -EINVAL;
1077 goto failed;
1078 }
1079
1080 /*
1081 * Ensure that all guest tagged cache entries are flushed before
1082 * releasing the pages back to the system for use. CLFLUSH will
1083 * not do this, so issue a WBINVD.
1084 */
1085 wbinvd_on_all_cpus();
1086
1087 __unregister_enc_region_locked(kvm, region);
1088
1089 mutex_unlock(&kvm->lock);
1090 return 0;
1091
1092failed:
1093 mutex_unlock(&kvm->lock);
1094 return ret;
1095}
1096
1097void sev_vm_destroy(struct kvm *kvm)
1098{
1099 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1100 struct list_head *head = &sev->regions_list;
1101 struct list_head *pos, *q;
1102
1103 if (!sev_guest(kvm))
1104 return;
1105
1106 mutex_lock(&kvm->lock);
1107
1108 /*
1109 * Ensure that all guest tagged cache entries are flushed before
1110 * releasing the pages back to the system for use. CLFLUSH will
1111 * not do this, so issue a WBINVD.
1112 */
1113 wbinvd_on_all_cpus();
1114
1115 /*
1116 * if userspace was terminated before unregistering the memory regions
1117 * then lets unpin all the registered memory.
1118 */
1119 if (!list_empty(head)) {
1120 list_for_each_safe(pos, q, head) {
1121 __unregister_enc_region_locked(kvm,
1122 list_entry(pos, struct enc_region, list));
David Rientjes7be74942020-08-25 12:56:28 -07001123 cond_resched();
Joerg Roedeleaf78262020-03-24 10:41:54 +01001124 }
1125 }
1126
1127 mutex_unlock(&kvm->lock);
1128
1129 sev_unbind_asid(kvm, sev->handle);
1130 sev_asid_free(sev->asid);
1131}
1132
Tom Lendacky916391a2020-12-10 11:09:38 -06001133void __init sev_hardware_setup(void)
Joerg Roedeleaf78262020-03-24 10:41:54 +01001134{
Tom Lendacky916391a2020-12-10 11:09:38 -06001135 unsigned int eax, ebx, ecx, edx;
1136 bool sev_es_supported = false;
1137 bool sev_supported = false;
1138
1139 /* Does the CPU support SEV? */
1140 if (!boot_cpu_has(X86_FEATURE_SEV))
1141 goto out;
1142
1143 /* Retrieve SEV CPUID information */
1144 cpuid(0x8000001f, &eax, &ebx, &ecx, &edx);
1145
Tom Lendacky1edc1452020-12-10 11:09:49 -06001146 /* Set encryption bit location for SEV-ES guests */
1147 sev_enc_bit = ebx & 0x3f;
1148
Joerg Roedeleaf78262020-03-24 10:41:54 +01001149 /* Maximum number of encrypted guests supported simultaneously */
Tom Lendacky916391a2020-12-10 11:09:38 -06001150 max_sev_asid = ecx;
Joerg Roedeleaf78262020-03-24 10:41:54 +01001151
Paolo Bonzini9ef15302020-04-13 03:20:06 -04001152 if (!svm_sev_enabled())
Tom Lendacky916391a2020-12-10 11:09:38 -06001153 goto out;
Joerg Roedeleaf78262020-03-24 10:41:54 +01001154
1155 /* Minimum ASID value that should be used for SEV guest */
Tom Lendacky916391a2020-12-10 11:09:38 -06001156 min_sev_asid = edx;
Joerg Roedeleaf78262020-03-24 10:41:54 +01001157
1158 /* Initialize SEV ASID bitmaps */
1159 sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
1160 if (!sev_asid_bitmap)
Tom Lendacky916391a2020-12-10 11:09:38 -06001161 goto out;
Joerg Roedeleaf78262020-03-24 10:41:54 +01001162
1163 sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
1164 if (!sev_reclaim_asid_bitmap)
Tom Lendacky916391a2020-12-10 11:09:38 -06001165 goto out;
Joerg Roedeleaf78262020-03-24 10:41:54 +01001166
Tom Lendacky916391a2020-12-10 11:09:38 -06001167 pr_info("SEV supported: %u ASIDs\n", max_sev_asid - min_sev_asid + 1);
1168 sev_supported = true;
Joerg Roedeleaf78262020-03-24 10:41:54 +01001169
Tom Lendacky916391a2020-12-10 11:09:38 -06001170 /* SEV-ES support requested? */
1171 if (!sev_es)
1172 goto out;
1173
1174 /* Does the CPU support SEV-ES? */
1175 if (!boot_cpu_has(X86_FEATURE_SEV_ES))
1176 goto out;
1177
1178 /* Has the system been allocated ASIDs for SEV-ES? */
1179 if (min_sev_asid == 1)
1180 goto out;
1181
1182 pr_info("SEV-ES supported: %u ASIDs\n", min_sev_asid - 1);
1183 sev_es_supported = true;
1184
1185out:
1186 sev = sev_supported;
1187 sev_es = sev_es_supported;
Joerg Roedeleaf78262020-03-24 10:41:54 +01001188}
1189
1190void sev_hardware_teardown(void)
1191{
Paolo Bonzini9ef15302020-04-13 03:20:06 -04001192 if (!svm_sev_enabled())
1193 return;
1194
Joerg Roedeleaf78262020-03-24 10:41:54 +01001195 bitmap_free(sev_asid_bitmap);
1196 bitmap_free(sev_reclaim_asid_bitmap);
1197
1198 sev_flush_asids();
1199}
1200
Tom Lendackyadd5e2f2020-12-10 11:09:40 -06001201/*
1202 * Pages used by hardware to hold guest encrypted state must be flushed before
1203 * returning them to the system.
1204 */
1205static void sev_flush_guest_memory(struct vcpu_svm *svm, void *va,
1206 unsigned long len)
1207{
1208 /*
1209 * If hardware enforced cache coherency for encrypted mappings of the
1210 * same physical page is supported, nothing to do.
1211 */
1212 if (boot_cpu_has(X86_FEATURE_SME_COHERENT))
1213 return;
1214
1215 /*
1216 * If the VM Page Flush MSR is supported, use it to flush the page
1217 * (using the page virtual address and the guest ASID).
1218 */
1219 if (boot_cpu_has(X86_FEATURE_VM_PAGE_FLUSH)) {
1220 struct kvm_sev_info *sev;
1221 unsigned long va_start;
1222 u64 start, stop;
1223
1224 /* Align start and stop to page boundaries. */
1225 va_start = (unsigned long)va;
1226 start = (u64)va_start & PAGE_MASK;
1227 stop = PAGE_ALIGN((u64)va_start + len);
1228
1229 if (start < stop) {
1230 sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
1231
1232 while (start < stop) {
1233 wrmsrl(MSR_AMD64_VM_PAGE_FLUSH,
1234 start | sev->asid);
1235
1236 start += PAGE_SIZE;
1237 }
1238
1239 return;
1240 }
1241
1242 WARN(1, "Address overflow, using WBINVD\n");
1243 }
1244
1245 /*
1246 * Hardware should always have one of the above features,
1247 * but if not, use WBINVD and issue a warning.
1248 */
1249 WARN_ONCE(1, "Using WBINVD to flush guest memory\n");
1250 wbinvd_on_all_cpus();
1251}
1252
1253void sev_free_vcpu(struct kvm_vcpu *vcpu)
1254{
1255 struct vcpu_svm *svm;
1256
1257 if (!sev_es_guest(vcpu->kvm))
1258 return;
1259
1260 svm = to_svm(vcpu);
1261
1262 if (vcpu->arch.guest_state_protected)
1263 sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE);
1264 __free_page(virt_to_page(svm->vmsa));
Tom Lendacky8f423a82020-12-10 11:09:53 -06001265
1266 if (svm->ghcb_sa_free)
1267 kfree(svm->ghcb_sa);
Tom Lendackyadd5e2f2020-12-10 11:09:40 -06001268}
1269
Tom Lendacky291bd202020-12-10 11:09:47 -06001270static void dump_ghcb(struct vcpu_svm *svm)
1271{
1272 struct ghcb *ghcb = svm->ghcb;
1273 unsigned int nbits;
1274
1275 /* Re-use the dump_invalid_vmcb module parameter */
1276 if (!dump_invalid_vmcb) {
1277 pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n");
1278 return;
1279 }
1280
1281 nbits = sizeof(ghcb->save.valid_bitmap) * 8;
1282
1283 pr_err("GHCB (GPA=%016llx):\n", svm->vmcb->control.ghcb_gpa);
1284 pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code",
1285 ghcb->save.sw_exit_code, ghcb_sw_exit_code_is_valid(ghcb));
1286 pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1",
1287 ghcb->save.sw_exit_info_1, ghcb_sw_exit_info_1_is_valid(ghcb));
1288 pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2",
1289 ghcb->save.sw_exit_info_2, ghcb_sw_exit_info_2_is_valid(ghcb));
1290 pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch",
1291 ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb));
1292 pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap);
1293}
1294
1295static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
1296{
1297 struct kvm_vcpu *vcpu = &svm->vcpu;
1298 struct ghcb *ghcb = svm->ghcb;
1299
1300 /*
1301 * The GHCB protocol so far allows for the following data
1302 * to be returned:
1303 * GPRs RAX, RBX, RCX, RDX
1304 *
1305 * Copy their values to the GHCB if they are dirty.
1306 */
1307 if (kvm_register_is_dirty(vcpu, VCPU_REGS_RAX))
1308 ghcb_set_rax(ghcb, vcpu->arch.regs[VCPU_REGS_RAX]);
1309 if (kvm_register_is_dirty(vcpu, VCPU_REGS_RBX))
1310 ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]);
1311 if (kvm_register_is_dirty(vcpu, VCPU_REGS_RCX))
1312 ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]);
1313 if (kvm_register_is_dirty(vcpu, VCPU_REGS_RDX))
1314 ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]);
1315}
1316
1317static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
1318{
1319 struct vmcb_control_area *control = &svm->vmcb->control;
1320 struct kvm_vcpu *vcpu = &svm->vcpu;
1321 struct ghcb *ghcb = svm->ghcb;
1322 u64 exit_code;
1323
1324 /*
1325 * The GHCB protocol so far allows for the following data
1326 * to be supplied:
1327 * GPRs RAX, RBX, RCX, RDX
1328 * XCR0
1329 * CPL
1330 *
1331 * VMMCALL allows the guest to provide extra registers. KVM also
1332 * expects RSI for hypercalls, so include that, too.
1333 *
1334 * Copy their values to the appropriate location if supplied.
1335 */
1336 memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
1337
1338 vcpu->arch.regs[VCPU_REGS_RAX] = ghcb_get_rax_if_valid(ghcb);
1339 vcpu->arch.regs[VCPU_REGS_RBX] = ghcb_get_rbx_if_valid(ghcb);
1340 vcpu->arch.regs[VCPU_REGS_RCX] = ghcb_get_rcx_if_valid(ghcb);
1341 vcpu->arch.regs[VCPU_REGS_RDX] = ghcb_get_rdx_if_valid(ghcb);
1342 vcpu->arch.regs[VCPU_REGS_RSI] = ghcb_get_rsi_if_valid(ghcb);
1343
1344 svm->vmcb->save.cpl = ghcb_get_cpl_if_valid(ghcb);
1345
1346 if (ghcb_xcr0_is_valid(ghcb)) {
1347 vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb);
1348 kvm_update_cpuid_runtime(vcpu);
1349 }
1350
1351 /* Copy the GHCB exit information into the VMCB fields */
1352 exit_code = ghcb_get_sw_exit_code(ghcb);
1353 control->exit_code = lower_32_bits(exit_code);
1354 control->exit_code_hi = upper_32_bits(exit_code);
1355 control->exit_info_1 = ghcb_get_sw_exit_info_1(ghcb);
1356 control->exit_info_2 = ghcb_get_sw_exit_info_2(ghcb);
1357
1358 /* Clear the valid entries fields */
1359 memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
1360}
1361
1362static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
1363{
1364 struct kvm_vcpu *vcpu;
1365 struct ghcb *ghcb;
1366 u64 exit_code = 0;
1367
1368 ghcb = svm->ghcb;
1369
1370 /* Only GHCB Usage code 0 is supported */
1371 if (ghcb->ghcb_usage)
1372 goto vmgexit_err;
1373
1374 /*
1375 * Retrieve the exit code now even though is may not be marked valid
1376 * as it could help with debugging.
1377 */
1378 exit_code = ghcb_get_sw_exit_code(ghcb);
1379
1380 if (!ghcb_sw_exit_code_is_valid(ghcb) ||
1381 !ghcb_sw_exit_info_1_is_valid(ghcb) ||
1382 !ghcb_sw_exit_info_2_is_valid(ghcb))
1383 goto vmgexit_err;
1384
1385 switch (ghcb_get_sw_exit_code(ghcb)) {
1386 case SVM_EXIT_READ_DR7:
1387 break;
1388 case SVM_EXIT_WRITE_DR7:
1389 if (!ghcb_rax_is_valid(ghcb))
1390 goto vmgexit_err;
1391 break;
1392 case SVM_EXIT_RDTSC:
1393 break;
1394 case SVM_EXIT_RDPMC:
1395 if (!ghcb_rcx_is_valid(ghcb))
1396 goto vmgexit_err;
1397 break;
1398 case SVM_EXIT_CPUID:
1399 if (!ghcb_rax_is_valid(ghcb) ||
1400 !ghcb_rcx_is_valid(ghcb))
1401 goto vmgexit_err;
1402 if (ghcb_get_rax(ghcb) == 0xd)
1403 if (!ghcb_xcr0_is_valid(ghcb))
1404 goto vmgexit_err;
1405 break;
1406 case SVM_EXIT_INVD:
1407 break;
1408 case SVM_EXIT_IOIO:
1409 if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK))
1410 if (!ghcb_rax_is_valid(ghcb))
1411 goto vmgexit_err;
1412 break;
1413 case SVM_EXIT_MSR:
1414 if (!ghcb_rcx_is_valid(ghcb))
1415 goto vmgexit_err;
1416 if (ghcb_get_sw_exit_info_1(ghcb)) {
1417 if (!ghcb_rax_is_valid(ghcb) ||
1418 !ghcb_rdx_is_valid(ghcb))
1419 goto vmgexit_err;
1420 }
1421 break;
1422 case SVM_EXIT_VMMCALL:
1423 if (!ghcb_rax_is_valid(ghcb) ||
1424 !ghcb_cpl_is_valid(ghcb))
1425 goto vmgexit_err;
1426 break;
1427 case SVM_EXIT_RDTSCP:
1428 break;
1429 case SVM_EXIT_WBINVD:
1430 break;
1431 case SVM_EXIT_MONITOR:
1432 if (!ghcb_rax_is_valid(ghcb) ||
1433 !ghcb_rcx_is_valid(ghcb) ||
1434 !ghcb_rdx_is_valid(ghcb))
1435 goto vmgexit_err;
1436 break;
1437 case SVM_EXIT_MWAIT:
1438 if (!ghcb_rax_is_valid(ghcb) ||
1439 !ghcb_rcx_is_valid(ghcb))
1440 goto vmgexit_err;
1441 break;
Tom Lendacky8f423a82020-12-10 11:09:53 -06001442 case SVM_VMGEXIT_MMIO_READ:
1443 case SVM_VMGEXIT_MMIO_WRITE:
1444 if (!ghcb_sw_scratch_is_valid(ghcb))
1445 goto vmgexit_err;
1446 break;
Tom Lendacky291bd202020-12-10 11:09:47 -06001447 case SVM_VMGEXIT_UNSUPPORTED_EVENT:
1448 break;
1449 default:
1450 goto vmgexit_err;
1451 }
1452
1453 return 0;
1454
1455vmgexit_err:
1456 vcpu = &svm->vcpu;
1457
1458 if (ghcb->ghcb_usage) {
1459 vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n",
1460 ghcb->ghcb_usage);
1461 } else {
1462 vcpu_unimpl(vcpu, "vmgexit: exit reason %#llx is not valid\n",
1463 exit_code);
1464 dump_ghcb(svm);
1465 }
1466
1467 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1468 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
1469 vcpu->run->internal.ndata = 2;
1470 vcpu->run->internal.data[0] = exit_code;
1471 vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
1472
1473 return -EINVAL;
1474}
1475
1476static void pre_sev_es_run(struct vcpu_svm *svm)
1477{
1478 if (!svm->ghcb)
1479 return;
1480
Tom Lendacky8f423a82020-12-10 11:09:53 -06001481 if (svm->ghcb_sa_free) {
1482 /*
1483 * The scratch area lives outside the GHCB, so there is a
1484 * buffer that, depending on the operation performed, may
1485 * need to be synced, then freed.
1486 */
1487 if (svm->ghcb_sa_sync) {
1488 kvm_write_guest(svm->vcpu.kvm,
1489 ghcb_get_sw_scratch(svm->ghcb),
1490 svm->ghcb_sa, svm->ghcb_sa_len);
1491 svm->ghcb_sa_sync = false;
1492 }
1493
1494 kfree(svm->ghcb_sa);
1495 svm->ghcb_sa = NULL;
1496 svm->ghcb_sa_free = false;
1497 }
1498
Tom Lendackyd523ab6b2020-12-10 11:09:48 -06001499 trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->ghcb);
1500
Tom Lendacky291bd202020-12-10 11:09:47 -06001501 sev_es_sync_to_ghcb(svm);
1502
1503 kvm_vcpu_unmap(&svm->vcpu, &svm->ghcb_map, true);
1504 svm->ghcb = NULL;
1505}
1506
Joerg Roedeleaf78262020-03-24 10:41:54 +01001507void pre_sev_run(struct vcpu_svm *svm, int cpu)
1508{
1509 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
1510 int asid = sev_get_asid(svm->vcpu.kvm);
1511
Tom Lendacky291bd202020-12-10 11:09:47 -06001512 /* Perform any SEV-ES pre-run actions */
1513 pre_sev_es_run(svm);
1514
Joerg Roedeleaf78262020-03-24 10:41:54 +01001515 /* Assign the asid allocated with this SEV guest */
Paolo Bonzinidee734a2020-11-30 09:39:59 -05001516 svm->asid = asid;
Joerg Roedeleaf78262020-03-24 10:41:54 +01001517
1518 /*
1519 * Flush guest TLB:
1520 *
1521 * 1) when different VMCB for the same ASID is to be run on the same host CPU.
1522 * 2) or this VMCB was executed on different host CPU in previous VMRUNs.
1523 */
1524 if (sd->sev_vmcbs[asid] == svm->vmcb &&
Jim Mattson8a14fe42020-06-03 16:56:22 -07001525 svm->vcpu.arch.last_vmentry_cpu == cpu)
Joerg Roedeleaf78262020-03-24 10:41:54 +01001526 return;
1527
Joerg Roedeleaf78262020-03-24 10:41:54 +01001528 sd->sev_vmcbs[asid] = svm->vmcb;
1529 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
Joerg Roedel06e78522020-06-25 10:03:23 +02001530 vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
Joerg Roedeleaf78262020-03-24 10:41:54 +01001531}
Tom Lendacky291bd202020-12-10 11:09:47 -06001532
Tom Lendacky8f423a82020-12-10 11:09:53 -06001533#define GHCB_SCRATCH_AREA_LIMIT (16ULL * PAGE_SIZE)
1534static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
1535{
1536 struct vmcb_control_area *control = &svm->vmcb->control;
1537 struct ghcb *ghcb = svm->ghcb;
1538 u64 ghcb_scratch_beg, ghcb_scratch_end;
1539 u64 scratch_gpa_beg, scratch_gpa_end;
1540 void *scratch_va;
1541
1542 scratch_gpa_beg = ghcb_get_sw_scratch(ghcb);
1543 if (!scratch_gpa_beg) {
1544 pr_err("vmgexit: scratch gpa not provided\n");
1545 return false;
1546 }
1547
1548 scratch_gpa_end = scratch_gpa_beg + len;
1549 if (scratch_gpa_end < scratch_gpa_beg) {
1550 pr_err("vmgexit: scratch length (%#llx) not valid for scratch address (%#llx)\n",
1551 len, scratch_gpa_beg);
1552 return false;
1553 }
1554
1555 if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) {
1556 /* Scratch area begins within GHCB */
1557 ghcb_scratch_beg = control->ghcb_gpa +
1558 offsetof(struct ghcb, shared_buffer);
1559 ghcb_scratch_end = control->ghcb_gpa +
1560 offsetof(struct ghcb, reserved_1);
1561
1562 /*
1563 * If the scratch area begins within the GHCB, it must be
1564 * completely contained in the GHCB shared buffer area.
1565 */
1566 if (scratch_gpa_beg < ghcb_scratch_beg ||
1567 scratch_gpa_end > ghcb_scratch_end) {
1568 pr_err("vmgexit: scratch area is outside of GHCB shared buffer area (%#llx - %#llx)\n",
1569 scratch_gpa_beg, scratch_gpa_end);
1570 return false;
1571 }
1572
1573 scratch_va = (void *)svm->ghcb;
1574 scratch_va += (scratch_gpa_beg - control->ghcb_gpa);
1575 } else {
1576 /*
1577 * The guest memory must be read into a kernel buffer, so
1578 * limit the size
1579 */
1580 if (len > GHCB_SCRATCH_AREA_LIMIT) {
1581 pr_err("vmgexit: scratch area exceeds KVM limits (%#llx requested, %#llx limit)\n",
1582 len, GHCB_SCRATCH_AREA_LIMIT);
1583 return false;
1584 }
1585 scratch_va = kzalloc(len, GFP_KERNEL);
1586 if (!scratch_va)
1587 return false;
1588
1589 if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, scratch_va, len)) {
1590 /* Unable to copy scratch area from guest */
1591 pr_err("vmgexit: kvm_read_guest for scratch area failed\n");
1592
1593 kfree(scratch_va);
1594 return false;
1595 }
1596
1597 /*
1598 * The scratch area is outside the GHCB. The operation will
1599 * dictate whether the buffer needs to be synced before running
1600 * the vCPU next time (i.e. a read was requested so the data
1601 * must be written back to the guest memory).
1602 */
1603 svm->ghcb_sa_sync = sync;
1604 svm->ghcb_sa_free = true;
1605 }
1606
1607 svm->ghcb_sa = scratch_va;
1608 svm->ghcb_sa_len = len;
1609
1610 return true;
1611}
1612
Tom Lendackyd3694662020-12-10 11:09:50 -06001613static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask,
1614 unsigned int pos)
1615{
1616 svm->vmcb->control.ghcb_gpa &= ~(mask << pos);
1617 svm->vmcb->control.ghcb_gpa |= (value & mask) << pos;
1618}
1619
1620static u64 get_ghcb_msr_bits(struct vcpu_svm *svm, u64 mask, unsigned int pos)
1621{
1622 return (svm->vmcb->control.ghcb_gpa >> pos) & mask;
1623}
1624
Tom Lendacky1edc1452020-12-10 11:09:49 -06001625static void set_ghcb_msr(struct vcpu_svm *svm, u64 value)
1626{
1627 svm->vmcb->control.ghcb_gpa = value;
1628}
1629
Tom Lendacky291bd202020-12-10 11:09:47 -06001630static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
1631{
Tom Lendacky1edc1452020-12-10 11:09:49 -06001632 struct vmcb_control_area *control = &svm->vmcb->control;
Tom Lendackyd3694662020-12-10 11:09:50 -06001633 struct kvm_vcpu *vcpu = &svm->vcpu;
Tom Lendacky1edc1452020-12-10 11:09:49 -06001634 u64 ghcb_info;
Tom Lendackyd3694662020-12-10 11:09:50 -06001635 int ret = 1;
Tom Lendacky1edc1452020-12-10 11:09:49 -06001636
1637 ghcb_info = control->ghcb_gpa & GHCB_MSR_INFO_MASK;
1638
Tom Lendacky59e38b52020-12-10 11:09:52 -06001639 trace_kvm_vmgexit_msr_protocol_enter(svm->vcpu.vcpu_id,
1640 control->ghcb_gpa);
1641
Tom Lendacky1edc1452020-12-10 11:09:49 -06001642 switch (ghcb_info) {
1643 case GHCB_MSR_SEV_INFO_REQ:
1644 set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX,
1645 GHCB_VERSION_MIN,
1646 sev_enc_bit));
1647 break;
Tom Lendackyd3694662020-12-10 11:09:50 -06001648 case GHCB_MSR_CPUID_REQ: {
1649 u64 cpuid_fn, cpuid_reg, cpuid_value;
1650
1651 cpuid_fn = get_ghcb_msr_bits(svm,
1652 GHCB_MSR_CPUID_FUNC_MASK,
1653 GHCB_MSR_CPUID_FUNC_POS);
1654
1655 /* Initialize the registers needed by the CPUID intercept */
1656 vcpu->arch.regs[VCPU_REGS_RAX] = cpuid_fn;
1657 vcpu->arch.regs[VCPU_REGS_RCX] = 0;
1658
1659 ret = svm_invoke_exit_handler(svm, SVM_EXIT_CPUID);
1660 if (!ret) {
1661 ret = -EINVAL;
1662 break;
1663 }
1664
1665 cpuid_reg = get_ghcb_msr_bits(svm,
1666 GHCB_MSR_CPUID_REG_MASK,
1667 GHCB_MSR_CPUID_REG_POS);
1668 if (cpuid_reg == 0)
1669 cpuid_value = vcpu->arch.regs[VCPU_REGS_RAX];
1670 else if (cpuid_reg == 1)
1671 cpuid_value = vcpu->arch.regs[VCPU_REGS_RBX];
1672 else if (cpuid_reg == 2)
1673 cpuid_value = vcpu->arch.regs[VCPU_REGS_RCX];
1674 else
1675 cpuid_value = vcpu->arch.regs[VCPU_REGS_RDX];
1676
1677 set_ghcb_msr_bits(svm, cpuid_value,
1678 GHCB_MSR_CPUID_VALUE_MASK,
1679 GHCB_MSR_CPUID_VALUE_POS);
1680
1681 set_ghcb_msr_bits(svm, GHCB_MSR_CPUID_RESP,
1682 GHCB_MSR_INFO_MASK,
1683 GHCB_MSR_INFO_POS);
1684 break;
1685 }
Tom Lendackye1d71112020-12-10 11:09:51 -06001686 case GHCB_MSR_TERM_REQ: {
1687 u64 reason_set, reason_code;
1688
1689 reason_set = get_ghcb_msr_bits(svm,
1690 GHCB_MSR_TERM_REASON_SET_MASK,
1691 GHCB_MSR_TERM_REASON_SET_POS);
1692 reason_code = get_ghcb_msr_bits(svm,
1693 GHCB_MSR_TERM_REASON_MASK,
1694 GHCB_MSR_TERM_REASON_POS);
1695 pr_info("SEV-ES guest requested termination: %#llx:%#llx\n",
1696 reason_set, reason_code);
1697 fallthrough;
1698 }
Tom Lendacky1edc1452020-12-10 11:09:49 -06001699 default:
Tom Lendackyd3694662020-12-10 11:09:50 -06001700 ret = -EINVAL;
Tom Lendacky1edc1452020-12-10 11:09:49 -06001701 }
1702
Tom Lendacky59e38b52020-12-10 11:09:52 -06001703 trace_kvm_vmgexit_msr_protocol_exit(svm->vcpu.vcpu_id,
1704 control->ghcb_gpa, ret);
1705
Tom Lendackyd3694662020-12-10 11:09:50 -06001706 return ret;
Tom Lendacky291bd202020-12-10 11:09:47 -06001707}
1708
1709int sev_handle_vmgexit(struct vcpu_svm *svm)
1710{
1711 struct vmcb_control_area *control = &svm->vmcb->control;
1712 u64 ghcb_gpa, exit_code;
1713 struct ghcb *ghcb;
1714 int ret;
1715
1716 /* Validate the GHCB */
1717 ghcb_gpa = control->ghcb_gpa;
1718 if (ghcb_gpa & GHCB_MSR_INFO_MASK)
1719 return sev_handle_vmgexit_msr_protocol(svm);
1720
1721 if (!ghcb_gpa) {
1722 vcpu_unimpl(&svm->vcpu, "vmgexit: GHCB gpa is not set\n");
1723 return -EINVAL;
1724 }
1725
1726 if (kvm_vcpu_map(&svm->vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->ghcb_map)) {
1727 /* Unable to map GHCB from guest */
1728 vcpu_unimpl(&svm->vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n",
1729 ghcb_gpa);
1730 return -EINVAL;
1731 }
1732
1733 svm->ghcb = svm->ghcb_map.hva;
1734 ghcb = svm->ghcb_map.hva;
1735
Tom Lendackyd523ab6b2020-12-10 11:09:48 -06001736 trace_kvm_vmgexit_enter(svm->vcpu.vcpu_id, ghcb);
1737
Tom Lendacky291bd202020-12-10 11:09:47 -06001738 exit_code = ghcb_get_sw_exit_code(ghcb);
1739
1740 ret = sev_es_validate_vmgexit(svm);
1741 if (ret)
1742 return ret;
1743
1744 sev_es_sync_from_ghcb(svm);
1745 ghcb_set_sw_exit_info_1(ghcb, 0);
1746 ghcb_set_sw_exit_info_2(ghcb, 0);
1747
1748 ret = -EINVAL;
1749 switch (exit_code) {
Tom Lendacky8f423a82020-12-10 11:09:53 -06001750 case SVM_VMGEXIT_MMIO_READ:
1751 if (!setup_vmgexit_scratch(svm, true, control->exit_info_2))
1752 break;
1753
1754 ret = kvm_sev_es_mmio_read(&svm->vcpu,
1755 control->exit_info_1,
1756 control->exit_info_2,
1757 svm->ghcb_sa);
1758 break;
1759 case SVM_VMGEXIT_MMIO_WRITE:
1760 if (!setup_vmgexit_scratch(svm, false, control->exit_info_2))
1761 break;
1762
1763 ret = kvm_sev_es_mmio_write(&svm->vcpu,
1764 control->exit_info_1,
1765 control->exit_info_2,
1766 svm->ghcb_sa);
1767 break;
Tom Lendacky291bd202020-12-10 11:09:47 -06001768 case SVM_VMGEXIT_UNSUPPORTED_EVENT:
1769 vcpu_unimpl(&svm->vcpu,
1770 "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
1771 control->exit_info_1, control->exit_info_2);
1772 break;
1773 default:
1774 ret = svm_invoke_exit_handler(svm, exit_code);
1775 }
1776
1777 return ret;
1778}