Peter Xu | fb04a1e | 2020-09-30 21:22:22 -0400 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
| 2 | /* |
| 3 | * KVM dirty ring implementation |
| 4 | * |
| 5 | * Copyright 2019 Red Hat, Inc. |
| 6 | */ |
| 7 | #include <linux/kvm_host.h> |
| 8 | #include <linux/kvm.h> |
| 9 | #include <linux/vmalloc.h> |
| 10 | #include <linux/kvm_dirty_ring.h> |
| 11 | #include <trace/events/kvm.h> |
David Woodhouse | 982ed0d | 2021-12-10 16:36:21 +0000 | [diff] [blame] | 12 | #include "kvm_mm.h" |
Peter Xu | fb04a1e | 2020-09-30 21:22:22 -0400 | [diff] [blame] | 13 | |
| 14 | int __weak kvm_cpu_dirty_log_size(void) |
| 15 | { |
| 16 | return 0; |
| 17 | } |
| 18 | |
| 19 | u32 kvm_dirty_ring_get_rsvd_entries(void) |
| 20 | { |
| 21 | return KVM_DIRTY_RING_RSVD_ENTRIES + kvm_cpu_dirty_log_size(); |
| 22 | } |
| 23 | |
| 24 | static u32 kvm_dirty_ring_used(struct kvm_dirty_ring *ring) |
| 25 | { |
| 26 | return READ_ONCE(ring->dirty_index) - READ_ONCE(ring->reset_index); |
| 27 | } |
| 28 | |
| 29 | bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring) |
| 30 | { |
| 31 | return kvm_dirty_ring_used(ring) >= ring->soft_limit; |
| 32 | } |
| 33 | |
| 34 | static bool kvm_dirty_ring_full(struct kvm_dirty_ring *ring) |
| 35 | { |
| 36 | return kvm_dirty_ring_used(ring) >= ring->size; |
| 37 | } |
| 38 | |
Peter Xu | fb04a1e | 2020-09-30 21:22:22 -0400 | [diff] [blame] | 39 | static void kvm_reset_dirty_gfn(struct kvm *kvm, u32 slot, u64 offset, u64 mask) |
| 40 | { |
| 41 | struct kvm_memory_slot *memslot; |
| 42 | int as_id, id; |
| 43 | |
| 44 | as_id = slot >> 16; |
| 45 | id = (u16)slot; |
| 46 | |
| 47 | if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) |
| 48 | return; |
| 49 | |
| 50 | memslot = id_to_memslot(__kvm_memslots(kvm, as_id), id); |
| 51 | |
| 52 | if (!memslot || (offset + __fls(mask)) >= memslot->npages) |
| 53 | return; |
| 54 | |
Ben Gardon | 531810c | 2021-02-02 10:57:24 -0800 | [diff] [blame] | 55 | KVM_MMU_LOCK(kvm); |
Peter Xu | fb04a1e | 2020-09-30 21:22:22 -0400 | [diff] [blame] | 56 | kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset, mask); |
Ben Gardon | 531810c | 2021-02-02 10:57:24 -0800 | [diff] [blame] | 57 | KVM_MMU_UNLOCK(kvm); |
Peter Xu | fb04a1e | 2020-09-30 21:22:22 -0400 | [diff] [blame] | 58 | } |
| 59 | |
| 60 | int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size) |
| 61 | { |
Tian Tao | c910662 | 2021-01-25 11:57:25 +0800 | [diff] [blame] | 62 | ring->dirty_gfns = vzalloc(size); |
Peter Xu | fb04a1e | 2020-09-30 21:22:22 -0400 | [diff] [blame] | 63 | if (!ring->dirty_gfns) |
| 64 | return -ENOMEM; |
Peter Xu | fb04a1e | 2020-09-30 21:22:22 -0400 | [diff] [blame] | 65 | |
| 66 | ring->size = size / sizeof(struct kvm_dirty_gfn); |
| 67 | ring->soft_limit = ring->size - kvm_dirty_ring_get_rsvd_entries(); |
| 68 | ring->dirty_index = 0; |
| 69 | ring->reset_index = 0; |
| 70 | ring->index = index; |
| 71 | |
| 72 | return 0; |
| 73 | } |
| 74 | |
| 75 | static inline void kvm_dirty_gfn_set_invalid(struct kvm_dirty_gfn *gfn) |
| 76 | { |
| 77 | gfn->flags = 0; |
| 78 | } |
| 79 | |
| 80 | static inline void kvm_dirty_gfn_set_dirtied(struct kvm_dirty_gfn *gfn) |
| 81 | { |
| 82 | gfn->flags = KVM_DIRTY_GFN_F_DIRTY; |
| 83 | } |
| 84 | |
Peter Xu | fb04a1e | 2020-09-30 21:22:22 -0400 | [diff] [blame] | 85 | static inline bool kvm_dirty_gfn_harvested(struct kvm_dirty_gfn *gfn) |
| 86 | { |
| 87 | return gfn->flags & KVM_DIRTY_GFN_F_RESET; |
| 88 | } |
| 89 | |
| 90 | int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring) |
| 91 | { |
| 92 | u32 cur_slot, next_slot; |
| 93 | u64 cur_offset, next_offset; |
| 94 | unsigned long mask; |
| 95 | int count = 0; |
| 96 | struct kvm_dirty_gfn *entry; |
| 97 | bool first_round = true; |
| 98 | |
| 99 | /* This is only needed to make compilers happy */ |
| 100 | cur_slot = cur_offset = mask = 0; |
| 101 | |
| 102 | while (true) { |
| 103 | entry = &ring->dirty_gfns[ring->reset_index & (ring->size - 1)]; |
| 104 | |
| 105 | if (!kvm_dirty_gfn_harvested(entry)) |
| 106 | break; |
| 107 | |
| 108 | next_slot = READ_ONCE(entry->slot); |
| 109 | next_offset = READ_ONCE(entry->offset); |
| 110 | |
| 111 | /* Update the flags to reflect that this GFN is reset */ |
| 112 | kvm_dirty_gfn_set_invalid(entry); |
| 113 | |
| 114 | ring->reset_index++; |
| 115 | count++; |
| 116 | /* |
| 117 | * Try to coalesce the reset operations when the guest is |
| 118 | * scanning pages in the same slot. |
| 119 | */ |
| 120 | if (!first_round && next_slot == cur_slot) { |
| 121 | s64 delta = next_offset - cur_offset; |
| 122 | |
| 123 | if (delta >= 0 && delta < BITS_PER_LONG) { |
| 124 | mask |= 1ull << delta; |
| 125 | continue; |
| 126 | } |
| 127 | |
| 128 | /* Backwards visit, careful about overflows! */ |
| 129 | if (delta > -BITS_PER_LONG && delta < 0 && |
| 130 | (mask << -delta >> -delta) == mask) { |
| 131 | cur_offset = next_offset; |
| 132 | mask = (mask << -delta) | 1; |
| 133 | continue; |
| 134 | } |
| 135 | } |
| 136 | kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask); |
| 137 | cur_slot = next_slot; |
| 138 | cur_offset = next_offset; |
| 139 | mask = 1; |
| 140 | first_round = false; |
| 141 | } |
| 142 | |
| 143 | kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask); |
| 144 | |
| 145 | trace_kvm_dirty_ring_reset(ring); |
| 146 | |
| 147 | return count; |
| 148 | } |
| 149 | |
| 150 | void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset) |
| 151 | { |
| 152 | struct kvm_dirty_gfn *entry; |
| 153 | |
| 154 | /* It should never get full */ |
| 155 | WARN_ON_ONCE(kvm_dirty_ring_full(ring)); |
| 156 | |
| 157 | entry = &ring->dirty_gfns[ring->dirty_index & (ring->size - 1)]; |
| 158 | |
| 159 | entry->slot = slot; |
| 160 | entry->offset = offset; |
| 161 | /* |
| 162 | * Make sure the data is filled in before we publish this to |
| 163 | * the userspace program. There's no paired kernel-side reader. |
| 164 | */ |
| 165 | smp_wmb(); |
| 166 | kvm_dirty_gfn_set_dirtied(entry); |
| 167 | ring->dirty_index++; |
| 168 | trace_kvm_dirty_ring_push(ring, slot, offset); |
| 169 | } |
| 170 | |
| 171 | struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset) |
| 172 | { |
| 173 | return vmalloc_to_page((void *)ring->dirty_gfns + offset * PAGE_SIZE); |
| 174 | } |
| 175 | |
| 176 | void kvm_dirty_ring_free(struct kvm_dirty_ring *ring) |
| 177 | { |
| 178 | vfree(ring->dirty_gfns); |
| 179 | ring->dirty_gfns = NULL; |
| 180 | } |