Peter Xu | fb04a1e | 2020-09-30 21:22:22 -0400 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
| 2 | /* |
| 3 | * KVM dirty ring implementation |
| 4 | * |
| 5 | * Copyright 2019 Red Hat, Inc. |
| 6 | */ |
| 7 | #include <linux/kvm_host.h> |
| 8 | #include <linux/kvm.h> |
| 9 | #include <linux/vmalloc.h> |
| 10 | #include <linux/kvm_dirty_ring.h> |
| 11 | #include <trace/events/kvm.h> |
Ben Gardon | 531810c | 2021-02-02 10:57:24 -0800 | [diff] [blame] | 12 | #include "mmu_lock.h" |
Peter Xu | fb04a1e | 2020-09-30 21:22:22 -0400 | [diff] [blame] | 13 | |
| 14 | int __weak kvm_cpu_dirty_log_size(void) |
| 15 | { |
| 16 | return 0; |
| 17 | } |
| 18 | |
| 19 | u32 kvm_dirty_ring_get_rsvd_entries(void) |
| 20 | { |
| 21 | return KVM_DIRTY_RING_RSVD_ENTRIES + kvm_cpu_dirty_log_size(); |
| 22 | } |
| 23 | |
| 24 | static u32 kvm_dirty_ring_used(struct kvm_dirty_ring *ring) |
| 25 | { |
| 26 | return READ_ONCE(ring->dirty_index) - READ_ONCE(ring->reset_index); |
| 27 | } |
| 28 | |
| 29 | bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring) |
| 30 | { |
| 31 | return kvm_dirty_ring_used(ring) >= ring->soft_limit; |
| 32 | } |
| 33 | |
| 34 | static bool kvm_dirty_ring_full(struct kvm_dirty_ring *ring) |
| 35 | { |
| 36 | return kvm_dirty_ring_used(ring) >= ring->size; |
| 37 | } |
| 38 | |
| 39 | struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm) |
| 40 | { |
| 41 | struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); |
| 42 | |
| 43 | WARN_ON_ONCE(vcpu->kvm != kvm); |
| 44 | |
| 45 | return &vcpu->dirty_ring; |
| 46 | } |
| 47 | |
| 48 | static void kvm_reset_dirty_gfn(struct kvm *kvm, u32 slot, u64 offset, u64 mask) |
| 49 | { |
| 50 | struct kvm_memory_slot *memslot; |
| 51 | int as_id, id; |
| 52 | |
| 53 | as_id = slot >> 16; |
| 54 | id = (u16)slot; |
| 55 | |
| 56 | if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) |
| 57 | return; |
| 58 | |
| 59 | memslot = id_to_memslot(__kvm_memslots(kvm, as_id), id); |
| 60 | |
| 61 | if (!memslot || (offset + __fls(mask)) >= memslot->npages) |
| 62 | return; |
| 63 | |
Ben Gardon | 531810c | 2021-02-02 10:57:24 -0800 | [diff] [blame] | 64 | KVM_MMU_LOCK(kvm); |
Peter Xu | fb04a1e | 2020-09-30 21:22:22 -0400 | [diff] [blame] | 65 | kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset, mask); |
Ben Gardon | 531810c | 2021-02-02 10:57:24 -0800 | [diff] [blame] | 66 | KVM_MMU_UNLOCK(kvm); |
Peter Xu | fb04a1e | 2020-09-30 21:22:22 -0400 | [diff] [blame] | 67 | } |
| 68 | |
| 69 | int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size) |
| 70 | { |
Tian Tao | c910662 | 2021-01-25 11:57:25 +0800 | [diff] [blame] | 71 | ring->dirty_gfns = vzalloc(size); |
Peter Xu | fb04a1e | 2020-09-30 21:22:22 -0400 | [diff] [blame] | 72 | if (!ring->dirty_gfns) |
| 73 | return -ENOMEM; |
Peter Xu | fb04a1e | 2020-09-30 21:22:22 -0400 | [diff] [blame] | 74 | |
| 75 | ring->size = size / sizeof(struct kvm_dirty_gfn); |
| 76 | ring->soft_limit = ring->size - kvm_dirty_ring_get_rsvd_entries(); |
| 77 | ring->dirty_index = 0; |
| 78 | ring->reset_index = 0; |
| 79 | ring->index = index; |
| 80 | |
| 81 | return 0; |
| 82 | } |
| 83 | |
| 84 | static inline void kvm_dirty_gfn_set_invalid(struct kvm_dirty_gfn *gfn) |
| 85 | { |
| 86 | gfn->flags = 0; |
| 87 | } |
| 88 | |
| 89 | static inline void kvm_dirty_gfn_set_dirtied(struct kvm_dirty_gfn *gfn) |
| 90 | { |
| 91 | gfn->flags = KVM_DIRTY_GFN_F_DIRTY; |
| 92 | } |
| 93 | |
Peter Xu | fb04a1e | 2020-09-30 21:22:22 -0400 | [diff] [blame] | 94 | static inline bool kvm_dirty_gfn_harvested(struct kvm_dirty_gfn *gfn) |
| 95 | { |
| 96 | return gfn->flags & KVM_DIRTY_GFN_F_RESET; |
| 97 | } |
| 98 | |
| 99 | int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring) |
| 100 | { |
| 101 | u32 cur_slot, next_slot; |
| 102 | u64 cur_offset, next_offset; |
| 103 | unsigned long mask; |
| 104 | int count = 0; |
| 105 | struct kvm_dirty_gfn *entry; |
| 106 | bool first_round = true; |
| 107 | |
| 108 | /* This is only needed to make compilers happy */ |
| 109 | cur_slot = cur_offset = mask = 0; |
| 110 | |
| 111 | while (true) { |
| 112 | entry = &ring->dirty_gfns[ring->reset_index & (ring->size - 1)]; |
| 113 | |
| 114 | if (!kvm_dirty_gfn_harvested(entry)) |
| 115 | break; |
| 116 | |
| 117 | next_slot = READ_ONCE(entry->slot); |
| 118 | next_offset = READ_ONCE(entry->offset); |
| 119 | |
| 120 | /* Update the flags to reflect that this GFN is reset */ |
| 121 | kvm_dirty_gfn_set_invalid(entry); |
| 122 | |
| 123 | ring->reset_index++; |
| 124 | count++; |
| 125 | /* |
| 126 | * Try to coalesce the reset operations when the guest is |
| 127 | * scanning pages in the same slot. |
| 128 | */ |
| 129 | if (!first_round && next_slot == cur_slot) { |
| 130 | s64 delta = next_offset - cur_offset; |
| 131 | |
| 132 | if (delta >= 0 && delta < BITS_PER_LONG) { |
| 133 | mask |= 1ull << delta; |
| 134 | continue; |
| 135 | } |
| 136 | |
| 137 | /* Backwards visit, careful about overflows! */ |
| 138 | if (delta > -BITS_PER_LONG && delta < 0 && |
| 139 | (mask << -delta >> -delta) == mask) { |
| 140 | cur_offset = next_offset; |
| 141 | mask = (mask << -delta) | 1; |
| 142 | continue; |
| 143 | } |
| 144 | } |
| 145 | kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask); |
| 146 | cur_slot = next_slot; |
| 147 | cur_offset = next_offset; |
| 148 | mask = 1; |
| 149 | first_round = false; |
| 150 | } |
| 151 | |
| 152 | kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask); |
| 153 | |
| 154 | trace_kvm_dirty_ring_reset(ring); |
| 155 | |
| 156 | return count; |
| 157 | } |
| 158 | |
| 159 | void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset) |
| 160 | { |
| 161 | struct kvm_dirty_gfn *entry; |
| 162 | |
| 163 | /* It should never get full */ |
| 164 | WARN_ON_ONCE(kvm_dirty_ring_full(ring)); |
| 165 | |
| 166 | entry = &ring->dirty_gfns[ring->dirty_index & (ring->size - 1)]; |
| 167 | |
| 168 | entry->slot = slot; |
| 169 | entry->offset = offset; |
| 170 | /* |
| 171 | * Make sure the data is filled in before we publish this to |
| 172 | * the userspace program. There's no paired kernel-side reader. |
| 173 | */ |
| 174 | smp_wmb(); |
| 175 | kvm_dirty_gfn_set_dirtied(entry); |
| 176 | ring->dirty_index++; |
| 177 | trace_kvm_dirty_ring_push(ring, slot, offset); |
| 178 | } |
| 179 | |
| 180 | struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset) |
| 181 | { |
| 182 | return vmalloc_to_page((void *)ring->dirty_gfns + offset * PAGE_SIZE); |
| 183 | } |
| 184 | |
| 185 | void kvm_dirty_ring_free(struct kvm_dirty_ring *ring) |
| 186 | { |
| 187 | vfree(ring->dirty_gfns); |
| 188 | ring->dirty_gfns = NULL; |
| 189 | } |