Thomas Gleixner | c942fdd | 2019-05-27 08:55:06 +0200 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 2 | /* |
| 3 | * Copyright 2013 Red Hat Inc. |
| 4 | * |
Jérôme Glisse | f813f21 | 2018-10-30 15:04:06 -0700 | [diff] [blame] | 5 | * Authors: Jérôme Glisse <jglisse@redhat.com> |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 6 | */ |
| 7 | /* |
| 8 | * Heterogeneous Memory Management (HMM) |
| 9 | * |
Mike Rapoport | ad56b73 | 2018-03-21 21:22:47 +0200 | [diff] [blame] | 10 | * See Documentation/vm/hmm.rst for reasons and overview of what HMM is and it |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 11 | * is for. Here we focus on the HMM API description, with some explanation of |
| 12 | * the underlying implementation. |
| 13 | * |
| 14 | * Short description: HMM provides a set of helpers to share a virtual address |
| 15 | * space between CPU and a device, so that the device can access any valid |
| 16 | * address of the process (while still obeying memory protection). HMM also |
| 17 | * provides helpers to migrate process memory to device memory, and back. Each |
| 18 | * set of functionality (address space mirroring, and migration to and from |
| 19 | * device memory) can be used independently of the other. |
| 20 | * |
| 21 | * |
| 22 | * HMM address space mirroring API: |
| 23 | * |
Ralph Campbell | 085ea25 | 2019-05-06 16:29:39 -0700 | [diff] [blame] | 24 | * Use HMM address space mirroring if you want to mirror a range of the CPU |
| 25 | * page tables of a process into a device page table. Here, "mirror" means "keep |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 26 | * synchronized". Prerequisites: the device must provide the ability to write- |
| 27 | * protect its page tables (at PAGE_SIZE granularity), and must be able to |
| 28 | * recover from the resulting potential page faults. |
| 29 | * |
| 30 | * HMM guarantees that at any point in time, a given virtual address points to |
| 31 | * either the same memory in both CPU and device page tables (that is: CPU and |
| 32 | * device page tables each point to the same pages), or that one page table (CPU |
| 33 | * or device) points to no entry, while the other still points to the old page |
| 34 | * for the address. The latter case happens when the CPU page table update |
| 35 | * happens first, and then the update is mirrored over to the device page table. |
| 36 | * This does not cause any issue, because the CPU page table cannot start |
| 37 | * pointing to a new page until the device page table is invalidated. |
| 38 | * |
| 39 | * HMM uses mmu_notifiers to monitor the CPU page tables, and forwards any |
| 40 | * updates to each device driver that has registered a mirror. It also provides |
| 41 | * some API calls to help with taking a snapshot of the CPU page table, and to |
| 42 | * synchronize with any updates that might happen concurrently. |
| 43 | * |
| 44 | * |
| 45 | * HMM migration to and from device memory: |
| 46 | * |
| 47 | * HMM provides a set of helpers to hotplug device memory as ZONE_DEVICE, with |
| 48 | * a new MEMORY_DEVICE_PRIVATE type. This provides a struct page for each page |
| 49 | * of the device memory, and allows the device driver to manage its memory |
| 50 | * using those struct pages. Having struct pages for device memory makes |
| 51 | * migration easier. Because that memory is not addressable by the CPU it must |
| 52 | * never be pinned to the device; in other words, any CPU page fault can always |
| 53 | * cause the device memory to be migrated (copied/moved) back to regular memory. |
| 54 | * |
| 55 | * A new migrate helper (migrate_vma()) has been added (see mm/migrate.c) that |
| 56 | * allows use of a device DMA engine to perform the copy operation between |
| 57 | * regular system memory and device memory. |
| 58 | */ |
| 59 | #ifndef LINUX_HMM_H |
| 60 | #define LINUX_HMM_H |
| 61 | |
| 62 | #include <linux/kconfig.h> |
Dan Williams | 063a7d1 | 2018-12-28 00:39:46 -0800 | [diff] [blame] | 63 | #include <asm/pgtable.h> |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 64 | |
Christoph Hellwig | 43535b0 | 2019-06-26 14:27:23 +0200 | [diff] [blame] | 65 | #ifdef CONFIG_HMM_MIRROR |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 66 | |
Jérôme Glisse | 858b54d | 2017-09-08 16:12:02 -0700 | [diff] [blame] | 67 | #include <linux/device.h> |
Jérôme Glisse | 4ef589d | 2017-09-08 16:11:58 -0700 | [diff] [blame] | 68 | #include <linux/migrate.h> |
| 69 | #include <linux/memremap.h> |
| 70 | #include <linux/completion.h> |
Jérôme Glisse | a3e0d41 | 2019-05-13 17:20:01 -0700 | [diff] [blame] | 71 | #include <linux/mmu_notifier.h> |
Jérôme Glisse | 4ef589d | 2017-09-08 16:11:58 -0700 | [diff] [blame] | 72 | |
Jérôme Glisse | a3e0d41 | 2019-05-13 17:20:01 -0700 | [diff] [blame] | 73 | |
| 74 | /* |
| 75 | * struct hmm - HMM per mm struct |
| 76 | * |
| 77 | * @mm: mm struct this HMM struct is bound to |
| 78 | * @lock: lock protecting ranges list |
| 79 | * @ranges: list of range being snapshotted |
| 80 | * @mirrors: list of mirrors for this mm |
| 81 | * @mmu_notifier: mmu notifier to track updates to CPU page table |
| 82 | * @mirrors_sem: read/write semaphore protecting the mirrors list |
| 83 | * @wq: wait queue for user waiting on a range invalidation |
| 84 | * @notifiers: count of active mmu notifiers |
Jérôme Glisse | a3e0d41 | 2019-05-13 17:20:01 -0700 | [diff] [blame] | 85 | */ |
| 86 | struct hmm { |
Jason Gunthorpe | c7d8b78 | 2019-08-06 20:15:42 -0300 | [diff] [blame] | 87 | struct mmu_notifier mmu_notifier; |
Jason Gunthorpe | 5a136b4 | 2019-06-07 12:10:33 -0300 | [diff] [blame] | 88 | spinlock_t ranges_lock; |
Jérôme Glisse | a3e0d41 | 2019-05-13 17:20:01 -0700 | [diff] [blame] | 89 | struct list_head ranges; |
| 90 | struct list_head mirrors; |
Jérôme Glisse | a3e0d41 | 2019-05-13 17:20:01 -0700 | [diff] [blame] | 91 | struct rw_semaphore mirrors_sem; |
| 92 | wait_queue_head_t wq; |
| 93 | long notifiers; |
Jérôme Glisse | a3e0d41 | 2019-05-13 17:20:01 -0700 | [diff] [blame] | 94 | }; |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 95 | |
| 96 | /* |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 97 | * hmm_pfn_flag_e - HMM flag enums |
| 98 | * |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 99 | * Flags: |
Jérôme Glisse | 86586a4 | 2018-04-10 16:28:34 -0700 | [diff] [blame] | 100 | * HMM_PFN_VALID: pfn is valid. It has, at least, read permission. |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 101 | * HMM_PFN_WRITE: CPU page table has write permission set |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 102 | * HMM_PFN_DEVICE_PRIVATE: private device memory (ZONE_DEVICE) |
| 103 | * |
Ralph Campbell | 085ea25 | 2019-05-06 16:29:39 -0700 | [diff] [blame] | 104 | * The driver provides a flags array for mapping page protections to device |
| 105 | * PTE bits. If the driver valid bit for an entry is bit 3, |
| 106 | * i.e., (entry & (1 << 3)), then the driver must provide |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 107 | * an array in hmm_range.flags with hmm_range.flags[HMM_PFN_VALID] == 1 << 3. |
Ralph Campbell | 085ea25 | 2019-05-06 16:29:39 -0700 | [diff] [blame] | 108 | * Same logic apply to all flags. This is the same idea as vm_page_prot in vma |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 109 | * except that this is per device driver rather than per architecture. |
| 110 | */ |
| 111 | enum hmm_pfn_flag_e { |
| 112 | HMM_PFN_VALID = 0, |
| 113 | HMM_PFN_WRITE, |
| 114 | HMM_PFN_DEVICE_PRIVATE, |
| 115 | HMM_PFN_FLAG_MAX |
| 116 | }; |
| 117 | |
| 118 | /* |
| 119 | * hmm_pfn_value_e - HMM pfn special value |
| 120 | * |
| 121 | * Flags: |
Jérôme Glisse | da4c3c7 | 2017-09-08 16:11:31 -0700 | [diff] [blame] | 122 | * HMM_PFN_ERROR: corresponding CPU page table entry points to poisoned memory |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 123 | * HMM_PFN_NONE: corresponding CPU page table entry is pte_none() |
Jérôme Glisse | da4c3c7 | 2017-09-08 16:11:31 -0700 | [diff] [blame] | 124 | * HMM_PFN_SPECIAL: corresponding CPU page table entry is special; i.e., the |
Matthew Wilcox | 67fa166 | 2018-10-26 15:04:26 -0700 | [diff] [blame] | 125 | * result of vmf_insert_pfn() or vm_insert_page(). Therefore, it should not |
Jérôme Glisse | da4c3c7 | 2017-09-08 16:11:31 -0700 | [diff] [blame] | 126 | * be mirrored by a device, because the entry will never have HMM_PFN_VALID |
| 127 | * set and the pfn value is undefined. |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 128 | * |
Ralph Campbell | 085ea25 | 2019-05-06 16:29:39 -0700 | [diff] [blame] | 129 | * Driver provides values for none entry, error entry, and special entry. |
| 130 | * Driver can alias (i.e., use same value) error and special, but |
| 131 | * it should not alias none with error or special. |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 132 | * |
| 133 | * HMM pfn value returned by hmm_vma_get_pfns() or hmm_vma_fault() will be: |
| 134 | * hmm_range.values[HMM_PFN_ERROR] if CPU page table entry is poisonous, |
Ralph Campbell | 085ea25 | 2019-05-06 16:29:39 -0700 | [diff] [blame] | 135 | * hmm_range.values[HMM_PFN_NONE] if there is no CPU page table entry, |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 136 | * hmm_range.values[HMM_PFN_SPECIAL] if CPU page table entry is a special one |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 137 | */ |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 138 | enum hmm_pfn_value_e { |
| 139 | HMM_PFN_ERROR, |
| 140 | HMM_PFN_NONE, |
| 141 | HMM_PFN_SPECIAL, |
| 142 | HMM_PFN_VALUE_MAX |
| 143 | }; |
| 144 | |
| 145 | /* |
| 146 | * struct hmm_range - track invalidation lock on virtual address range |
| 147 | * |
Jason Gunthorpe | 04ec32f | 2019-11-12 16:22:20 -0400 | [diff] [blame^] | 148 | * @notifier: an optional mmu_interval_notifier |
| 149 | * @notifier_seq: when notifier is used this is the result of |
| 150 | * mmu_interval_read_begin() |
Jérôme Glisse | 704f3f2 | 2019-05-13 17:19:48 -0700 | [diff] [blame] | 151 | * @hmm: the core HMM structure this range is active against |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 152 | * @vma: the vm area struct for the range |
| 153 | * @list: all range lock are on a list |
| 154 | * @start: range virtual start address (inclusive) |
| 155 | * @end: range virtual end address (exclusive) |
| 156 | * @pfns: array of pfns (big enough for the range) |
| 157 | * @flags: pfn flags to match device driver page table |
| 158 | * @values: pfn value for some special case (none, special, error, ...) |
Jérôme Glisse | 023a019 | 2019-05-13 17:20:05 -0700 | [diff] [blame] | 159 | * @default_flags: default flags for the range (write, read, ... see hmm doc) |
| 160 | * @pfn_flags_mask: allows to mask pfn flags so that only default_flags matter |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 161 | * @pfn_shifts: pfn shift value (should be <= PAGE_SHIFT) |
| 162 | * @valid: pfns array did not change since it has been fill by an HMM function |
| 163 | */ |
| 164 | struct hmm_range { |
Jason Gunthorpe | 04ec32f | 2019-11-12 16:22:20 -0400 | [diff] [blame^] | 165 | struct mmu_interval_notifier *notifier; |
| 166 | unsigned long notifier_seq; |
Jérôme Glisse | 704f3f2 | 2019-05-13 17:19:48 -0700 | [diff] [blame] | 167 | struct hmm *hmm; |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 168 | struct list_head list; |
| 169 | unsigned long start; |
| 170 | unsigned long end; |
| 171 | uint64_t *pfns; |
| 172 | const uint64_t *flags; |
| 173 | const uint64_t *values; |
Jérôme Glisse | 023a019 | 2019-05-13 17:20:05 -0700 | [diff] [blame] | 174 | uint64_t default_flags; |
| 175 | uint64_t pfn_flags_mask; |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 176 | uint8_t pfn_shift; |
| 177 | bool valid; |
| 178 | }; |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 179 | |
| 180 | /* |
Jérôme Glisse | a3e0d41 | 2019-05-13 17:20:01 -0700 | [diff] [blame] | 181 | * hmm_range_wait_until_valid() - wait for range to be valid |
| 182 | * @range: range affected by invalidation to wait on |
| 183 | * @timeout: time out for wait in ms (ie abort wait after that period of time) |
Ralph Campbell | 085ea25 | 2019-05-06 16:29:39 -0700 | [diff] [blame] | 184 | * Return: true if the range is valid, false otherwise. |
Jérôme Glisse | a3e0d41 | 2019-05-13 17:20:01 -0700 | [diff] [blame] | 185 | */ |
| 186 | static inline bool hmm_range_wait_until_valid(struct hmm_range *range, |
| 187 | unsigned long timeout) |
| 188 | { |
Jason Gunthorpe | 378a604 | 2019-05-23 11:17:22 -0300 | [diff] [blame] | 189 | return wait_event_timeout(range->hmm->wq, range->valid, |
| 190 | msecs_to_jiffies(timeout)) != 0; |
Jérôme Glisse | a3e0d41 | 2019-05-13 17:20:01 -0700 | [diff] [blame] | 191 | } |
| 192 | |
| 193 | /* |
| 194 | * hmm_range_valid() - test if a range is valid or not |
| 195 | * @range: range |
Ralph Campbell | 085ea25 | 2019-05-06 16:29:39 -0700 | [diff] [blame] | 196 | * Return: true if the range is valid, false otherwise. |
Jérôme Glisse | a3e0d41 | 2019-05-13 17:20:01 -0700 | [diff] [blame] | 197 | */ |
| 198 | static inline bool hmm_range_valid(struct hmm_range *range) |
| 199 | { |
| 200 | return range->valid; |
| 201 | } |
| 202 | |
| 203 | /* |
Jérôme Glisse | 391aab1 | 2019-05-13 17:20:31 -0700 | [diff] [blame] | 204 | * hmm_device_entry_to_page() - return struct page pointed to by a device entry |
| 205 | * @range: range use to decode device entry value |
| 206 | * @entry: device entry value to get corresponding struct page from |
Ralph Campbell | 085ea25 | 2019-05-06 16:29:39 -0700 | [diff] [blame] | 207 | * Return: struct page pointer if entry is a valid, NULL otherwise |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 208 | * |
Jérôme Glisse | 391aab1 | 2019-05-13 17:20:31 -0700 | [diff] [blame] | 209 | * If the device entry is valid (ie valid flag set) then return the struct page |
| 210 | * matching the entry value. Otherwise return NULL. |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 211 | */ |
Jérôme Glisse | 391aab1 | 2019-05-13 17:20:31 -0700 | [diff] [blame] | 212 | static inline struct page *hmm_device_entry_to_page(const struct hmm_range *range, |
| 213 | uint64_t entry) |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 214 | { |
Jérôme Glisse | 391aab1 | 2019-05-13 17:20:31 -0700 | [diff] [blame] | 215 | if (entry == range->values[HMM_PFN_NONE]) |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 216 | return NULL; |
Jérôme Glisse | 391aab1 | 2019-05-13 17:20:31 -0700 | [diff] [blame] | 217 | if (entry == range->values[HMM_PFN_ERROR]) |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 218 | return NULL; |
Jérôme Glisse | 391aab1 | 2019-05-13 17:20:31 -0700 | [diff] [blame] | 219 | if (entry == range->values[HMM_PFN_SPECIAL]) |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 220 | return NULL; |
Jérôme Glisse | 391aab1 | 2019-05-13 17:20:31 -0700 | [diff] [blame] | 221 | if (!(entry & range->flags[HMM_PFN_VALID])) |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 222 | return NULL; |
Jérôme Glisse | 391aab1 | 2019-05-13 17:20:31 -0700 | [diff] [blame] | 223 | return pfn_to_page(entry >> range->pfn_shift); |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 224 | } |
| 225 | |
| 226 | /* |
Jérôme Glisse | 391aab1 | 2019-05-13 17:20:31 -0700 | [diff] [blame] | 227 | * hmm_device_entry_to_pfn() - return pfn value store in a device entry |
| 228 | * @range: range use to decode device entry value |
| 229 | * @entry: device entry to extract pfn from |
Ralph Campbell | 085ea25 | 2019-05-06 16:29:39 -0700 | [diff] [blame] | 230 | * Return: pfn value if device entry is valid, -1UL otherwise |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 231 | */ |
Jérôme Glisse | 391aab1 | 2019-05-13 17:20:31 -0700 | [diff] [blame] | 232 | static inline unsigned long |
| 233 | hmm_device_entry_to_pfn(const struct hmm_range *range, uint64_t pfn) |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 234 | { |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 235 | if (pfn == range->values[HMM_PFN_NONE]) |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 236 | return -1UL; |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 237 | if (pfn == range->values[HMM_PFN_ERROR]) |
| 238 | return -1UL; |
| 239 | if (pfn == range->values[HMM_PFN_SPECIAL]) |
| 240 | return -1UL; |
| 241 | if (!(pfn & range->flags[HMM_PFN_VALID])) |
| 242 | return -1UL; |
| 243 | return (pfn >> range->pfn_shift); |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 244 | } |
| 245 | |
| 246 | /* |
Jérôme Glisse | 391aab1 | 2019-05-13 17:20:31 -0700 | [diff] [blame] | 247 | * hmm_device_entry_from_page() - create a valid device entry for a page |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 248 | * @range: range use to encode HMM pfn value |
Jérôme Glisse | 391aab1 | 2019-05-13 17:20:31 -0700 | [diff] [blame] | 249 | * @page: page for which to create the device entry |
Ralph Campbell | 085ea25 | 2019-05-06 16:29:39 -0700 | [diff] [blame] | 250 | * Return: valid device entry for the page |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 251 | */ |
Jérôme Glisse | 391aab1 | 2019-05-13 17:20:31 -0700 | [diff] [blame] | 252 | static inline uint64_t hmm_device_entry_from_page(const struct hmm_range *range, |
| 253 | struct page *page) |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 254 | { |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 255 | return (page_to_pfn(page) << range->pfn_shift) | |
| 256 | range->flags[HMM_PFN_VALID]; |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 257 | } |
| 258 | |
| 259 | /* |
Jérôme Glisse | 391aab1 | 2019-05-13 17:20:31 -0700 | [diff] [blame] | 260 | * hmm_device_entry_from_pfn() - create a valid device entry value from pfn |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 261 | * @range: range use to encode HMM pfn value |
Jérôme Glisse | 391aab1 | 2019-05-13 17:20:31 -0700 | [diff] [blame] | 262 | * @pfn: pfn value for which to create the device entry |
Ralph Campbell | 085ea25 | 2019-05-06 16:29:39 -0700 | [diff] [blame] | 263 | * Return: valid device entry for the pfn |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 264 | */ |
Jérôme Glisse | 391aab1 | 2019-05-13 17:20:31 -0700 | [diff] [blame] | 265 | static inline uint64_t hmm_device_entry_from_pfn(const struct hmm_range *range, |
| 266 | unsigned long pfn) |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 267 | { |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 268 | return (pfn << range->pfn_shift) | |
| 269 | range->flags[HMM_PFN_VALID]; |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 270 | } |
| 271 | |
Jérôme Glisse | 391aab1 | 2019-05-13 17:20:31 -0700 | [diff] [blame] | 272 | /* |
Jérôme Glisse | c0b1240 | 2017-09-08 16:11:27 -0700 | [diff] [blame] | 273 | * Mirroring: how to synchronize device page table with CPU page table. |
| 274 | * |
| 275 | * A device driver that is participating in HMM mirroring must always |
| 276 | * synchronize with CPU page table updates. For this, device drivers can either |
| 277 | * directly use mmu_notifier APIs or they can use the hmm_mirror API. Device |
| 278 | * drivers can decide to register one mirror per device per process, or just |
| 279 | * one mirror per process for a group of devices. The pattern is: |
| 280 | * |
| 281 | * int device_bind_address_space(..., struct mm_struct *mm, ...) |
| 282 | * { |
| 283 | * struct device_address_space *das; |
| 284 | * |
| 285 | * // Device driver specific initialization, and allocation of das |
| 286 | * // which contains an hmm_mirror struct as one of its fields. |
| 287 | * ... |
| 288 | * |
| 289 | * ret = hmm_mirror_register(&das->mirror, mm, &device_mirror_ops); |
| 290 | * if (ret) { |
| 291 | * // Cleanup on error |
| 292 | * return ret; |
| 293 | * } |
| 294 | * |
| 295 | * // Other device driver specific initialization |
| 296 | * ... |
| 297 | * } |
| 298 | * |
| 299 | * Once an hmm_mirror is registered for an address space, the device driver |
| 300 | * will get callbacks through sync_cpu_device_pagetables() operation (see |
| 301 | * hmm_mirror_ops struct). |
| 302 | * |
| 303 | * Device driver must not free the struct containing the hmm_mirror struct |
| 304 | * before calling hmm_mirror_unregister(). The expected usage is to do that when |
| 305 | * the device driver is unbinding from an address space. |
| 306 | * |
| 307 | * |
| 308 | * void device_unbind_address_space(struct device_address_space *das) |
| 309 | * { |
| 310 | * // Device driver specific cleanup |
| 311 | * ... |
| 312 | * |
| 313 | * hmm_mirror_unregister(&das->mirror); |
| 314 | * |
| 315 | * // Other device driver specific cleanup, and now das can be freed |
| 316 | * ... |
| 317 | * } |
| 318 | */ |
| 319 | |
| 320 | struct hmm_mirror; |
| 321 | |
| 322 | /* |
Jérôme Glisse | c0b1240 | 2017-09-08 16:11:27 -0700 | [diff] [blame] | 323 | * struct hmm_mirror_ops - HMM mirror device operations callback |
| 324 | * |
| 325 | * @update: callback to update range on a device |
| 326 | */ |
| 327 | struct hmm_mirror_ops { |
Ralph Campbell | e140151 | 2018-04-10 16:28:19 -0700 | [diff] [blame] | 328 | /* release() - release hmm_mirror |
| 329 | * |
| 330 | * @mirror: pointer to struct hmm_mirror |
| 331 | * |
Ralph Campbell | 2076e5c | 2019-05-06 16:29:38 -0700 | [diff] [blame] | 332 | * This is called when the mm_struct is being released. The callback |
| 333 | * must ensure that all access to any pages obtained from this mirror |
| 334 | * is halted before the callback returns. All future access should |
| 335 | * fault. |
Ralph Campbell | e140151 | 2018-04-10 16:28:19 -0700 | [diff] [blame] | 336 | */ |
| 337 | void (*release)(struct hmm_mirror *mirror); |
| 338 | |
Jérôme Glisse | c0b1240 | 2017-09-08 16:11:27 -0700 | [diff] [blame] | 339 | /* sync_cpu_device_pagetables() - synchronize page tables |
| 340 | * |
| 341 | * @mirror: pointer to struct hmm_mirror |
Ralph Campbell | 1f96180 | 2019-07-25 17:56:44 -0700 | [diff] [blame] | 342 | * @update: update information (see struct mmu_notifier_range) |
| 343 | * Return: -EAGAIN if mmu_notifier_range_blockable(update) is false |
| 344 | * and callback needs to block, 0 otherwise. |
Jérôme Glisse | c0b1240 | 2017-09-08 16:11:27 -0700 | [diff] [blame] | 345 | * |
| 346 | * This callback ultimately originates from mmu_notifiers when the CPU |
| 347 | * page table is updated. The device driver must update its page table |
| 348 | * in response to this callback. The update argument tells what action |
| 349 | * to perform. |
| 350 | * |
| 351 | * The device driver must not return from this callback until the device |
| 352 | * page tables are completely updated (TLBs flushed, etc); this is a |
| 353 | * synchronous call. |
| 354 | */ |
Ralph Campbell | 1f96180 | 2019-07-25 17:56:44 -0700 | [diff] [blame] | 355 | int (*sync_cpu_device_pagetables)( |
| 356 | struct hmm_mirror *mirror, |
| 357 | const struct mmu_notifier_range *update); |
Jérôme Glisse | c0b1240 | 2017-09-08 16:11:27 -0700 | [diff] [blame] | 358 | }; |
| 359 | |
| 360 | /* |
| 361 | * struct hmm_mirror - mirror struct for a device driver |
| 362 | * |
| 363 | * @hmm: pointer to struct hmm (which is unique per mm_struct) |
| 364 | * @ops: device driver callback for HMM mirror operations |
| 365 | * @list: for list of mirrors of a given mm |
| 366 | * |
| 367 | * Each address space (mm_struct) being mirrored by a device must register one |
| 368 | * instance of an hmm_mirror struct with HMM. HMM will track the list of all |
| 369 | * mirrors for each mm_struct. |
| 370 | */ |
| 371 | struct hmm_mirror { |
| 372 | struct hmm *hmm; |
| 373 | const struct hmm_mirror_ops *ops; |
| 374 | struct list_head list; |
| 375 | }; |
| 376 | |
| 377 | int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm); |
| 378 | void hmm_mirror_unregister(struct hmm_mirror *mirror); |
Jérôme Glisse | da4c3c7 | 2017-09-08 16:11:31 -0700 | [diff] [blame] | 379 | |
Jérôme Glisse | 2023941 | 2019-05-13 17:20:24 -0700 | [diff] [blame] | 380 | /* |
Jérôme Glisse | a3e0d41 | 2019-05-13 17:20:01 -0700 | [diff] [blame] | 381 | * Please see Documentation/vm/hmm.rst for how to use the range API. |
Jérôme Glisse | da4c3c7 | 2017-09-08 16:11:31 -0700 | [diff] [blame] | 382 | */ |
Christoph Hellwig | fac555a | 2019-08-06 19:05:44 +0300 | [diff] [blame] | 383 | int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror); |
Jérôme Glisse | a3e0d41 | 2019-05-13 17:20:01 -0700 | [diff] [blame] | 384 | void hmm_range_unregister(struct hmm_range *range); |
Christoph Hellwig | 9a4903e | 2019-07-25 17:56:46 -0700 | [diff] [blame] | 385 | |
| 386 | /* |
| 387 | * Retry fault if non-blocking, drop mmap_sem and return -EAGAIN in that case. |
| 388 | */ |
| 389 | #define HMM_FAULT_ALLOW_RETRY (1 << 0) |
| 390 | |
Christoph Hellwig | d45d464 | 2019-07-25 17:56:47 -0700 | [diff] [blame] | 391 | /* Don't fault in missing PTEs, just snapshot the current state. */ |
| 392 | #define HMM_FAULT_SNAPSHOT (1 << 1) |
| 393 | |
Christoph Hellwig | 9a4903e | 2019-07-25 17:56:46 -0700 | [diff] [blame] | 394 | long hmm_range_fault(struct hmm_range *range, unsigned int flags); |
| 395 | |
Jérôme Glisse | 55c0ece | 2019-05-13 17:20:28 -0700 | [diff] [blame] | 396 | long hmm_range_dma_map(struct hmm_range *range, |
| 397 | struct device *device, |
| 398 | dma_addr_t *daddrs, |
Christoph Hellwig | 9a4903e | 2019-07-25 17:56:46 -0700 | [diff] [blame] | 399 | unsigned int flags); |
Jérôme Glisse | 55c0ece | 2019-05-13 17:20:28 -0700 | [diff] [blame] | 400 | long hmm_range_dma_unmap(struct hmm_range *range, |
Jérôme Glisse | 55c0ece | 2019-05-13 17:20:28 -0700 | [diff] [blame] | 401 | struct device *device, |
| 402 | dma_addr_t *daddrs, |
| 403 | bool dirty); |
Jérôme Glisse | 74eee18 | 2017-09-08 16:11:35 -0700 | [diff] [blame] | 404 | |
| 405 | /* |
Jérôme Glisse | a3e0d41 | 2019-05-13 17:20:01 -0700 | [diff] [blame] | 406 | * HMM_RANGE_DEFAULT_TIMEOUT - default timeout (ms) when waiting for a range |
Jérôme Glisse | 74eee18 | 2017-09-08 16:11:35 -0700 | [diff] [blame] | 407 | * |
Jérôme Glisse | a3e0d41 | 2019-05-13 17:20:01 -0700 | [diff] [blame] | 408 | * When waiting for mmu notifiers we need some kind of time out otherwise we |
| 409 | * could potentialy wait for ever, 1000ms ie 1s sounds like a long time to |
| 410 | * wait already. |
Jérôme Glisse | 74eee18 | 2017-09-08 16:11:35 -0700 | [diff] [blame] | 411 | */ |
Jérôme Glisse | a3e0d41 | 2019-05-13 17:20:01 -0700 | [diff] [blame] | 412 | #define HMM_RANGE_DEFAULT_TIMEOUT 1000 |
| 413 | |
Arnd Bergmann | 9d8a463 | 2018-04-10 16:29:13 -0700 | [diff] [blame] | 414 | #endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */ |
Jérôme Glisse | c0b1240 | 2017-09-08 16:11:27 -0700 | [diff] [blame] | 415 | |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 416 | #endif /* LINUX_HMM_H */ |