Thomas Gleixner | c942fdd | 2019-05-27 08:55:06 +0200 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 2 | /* |
| 3 | * Copyright 2013 Red Hat Inc. |
| 4 | * |
Jérôme Glisse | f813f21 | 2018-10-30 15:04:06 -0700 | [diff] [blame] | 5 | * Authors: Jérôme Glisse <jglisse@redhat.com> |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 6 | * |
Jason Gunthorpe | f970b97 | 2020-03-27 17:00:15 -0300 | [diff] [blame^] | 7 | * See Documentation/vm/hmm.rst for reasons and overview of what HMM is. |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 8 | */ |
| 9 | #ifndef LINUX_HMM_H |
| 10 | #define LINUX_HMM_H |
| 11 | |
| 12 | #include <linux/kconfig.h> |
Dan Williams | 063a7d1 | 2018-12-28 00:39:46 -0800 | [diff] [blame] | 13 | #include <asm/pgtable.h> |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 14 | |
Jérôme Glisse | 858b54d | 2017-09-08 16:12:02 -0700 | [diff] [blame] | 15 | #include <linux/device.h> |
Jérôme Glisse | 4ef589d | 2017-09-08 16:11:58 -0700 | [diff] [blame] | 16 | #include <linux/migrate.h> |
| 17 | #include <linux/memremap.h> |
| 18 | #include <linux/completion.h> |
Jérôme Glisse | a3e0d41 | 2019-05-13 17:20:01 -0700 | [diff] [blame] | 19 | #include <linux/mmu_notifier.h> |
Jérôme Glisse | 4ef589d | 2017-09-08 16:11:58 -0700 | [diff] [blame] | 20 | |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 21 | /* |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 22 | * hmm_pfn_flag_e - HMM flag enums |
| 23 | * |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 24 | * Flags: |
Jérôme Glisse | 86586a4 | 2018-04-10 16:28:34 -0700 | [diff] [blame] | 25 | * HMM_PFN_VALID: pfn is valid. It has, at least, read permission. |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 26 | * HMM_PFN_WRITE: CPU page table has write permission set |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 27 | * |
Ralph Campbell | 085ea25 | 2019-05-06 16:29:39 -0700 | [diff] [blame] | 28 | * The driver provides a flags array for mapping page protections to device |
| 29 | * PTE bits. If the driver valid bit for an entry is bit 3, |
| 30 | * i.e., (entry & (1 << 3)), then the driver must provide |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 31 | * an array in hmm_range.flags with hmm_range.flags[HMM_PFN_VALID] == 1 << 3. |
Ralph Campbell | 085ea25 | 2019-05-06 16:29:39 -0700 | [diff] [blame] | 32 | * Same logic apply to all flags. This is the same idea as vm_page_prot in vma |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 33 | * except that this is per device driver rather than per architecture. |
| 34 | */ |
| 35 | enum hmm_pfn_flag_e { |
| 36 | HMM_PFN_VALID = 0, |
| 37 | HMM_PFN_WRITE, |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 38 | HMM_PFN_FLAG_MAX |
| 39 | }; |
| 40 | |
| 41 | /* |
| 42 | * hmm_pfn_value_e - HMM pfn special value |
| 43 | * |
| 44 | * Flags: |
Jérôme Glisse | da4c3c7 | 2017-09-08 16:11:31 -0700 | [diff] [blame] | 45 | * HMM_PFN_ERROR: corresponding CPU page table entry points to poisoned memory |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 46 | * HMM_PFN_NONE: corresponding CPU page table entry is pte_none() |
Jérôme Glisse | da4c3c7 | 2017-09-08 16:11:31 -0700 | [diff] [blame] | 47 | * HMM_PFN_SPECIAL: corresponding CPU page table entry is special; i.e., the |
Matthew Wilcox | 67fa166 | 2018-10-26 15:04:26 -0700 | [diff] [blame] | 48 | * result of vmf_insert_pfn() or vm_insert_page(). Therefore, it should not |
Jérôme Glisse | da4c3c7 | 2017-09-08 16:11:31 -0700 | [diff] [blame] | 49 | * be mirrored by a device, because the entry will never have HMM_PFN_VALID |
| 50 | * set and the pfn value is undefined. |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 51 | * |
Ralph Campbell | 085ea25 | 2019-05-06 16:29:39 -0700 | [diff] [blame] | 52 | * Driver provides values for none entry, error entry, and special entry. |
| 53 | * Driver can alias (i.e., use same value) error and special, but |
| 54 | * it should not alias none with error or special. |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 55 | * |
| 56 | * HMM pfn value returned by hmm_vma_get_pfns() or hmm_vma_fault() will be: |
| 57 | * hmm_range.values[HMM_PFN_ERROR] if CPU page table entry is poisonous, |
Ralph Campbell | 085ea25 | 2019-05-06 16:29:39 -0700 | [diff] [blame] | 58 | * hmm_range.values[HMM_PFN_NONE] if there is no CPU page table entry, |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 59 | * hmm_range.values[HMM_PFN_SPECIAL] if CPU page table entry is a special one |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 60 | */ |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 61 | enum hmm_pfn_value_e { |
| 62 | HMM_PFN_ERROR, |
| 63 | HMM_PFN_NONE, |
| 64 | HMM_PFN_SPECIAL, |
| 65 | HMM_PFN_VALUE_MAX |
| 66 | }; |
| 67 | |
| 68 | /* |
| 69 | * struct hmm_range - track invalidation lock on virtual address range |
| 70 | * |
Jason Gunthorpe | a22dd50 | 2019-11-12 16:22:30 -0400 | [diff] [blame] | 71 | * @notifier: a mmu_interval_notifier that includes the start/end |
| 72 | * @notifier_seq: result of mmu_interval_read_begin() |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 73 | * @start: range virtual start address (inclusive) |
| 74 | * @end: range virtual end address (exclusive) |
| 75 | * @pfns: array of pfns (big enough for the range) |
| 76 | * @flags: pfn flags to match device driver page table |
| 77 | * @values: pfn value for some special case (none, special, error, ...) |
Jérôme Glisse | 023a019 | 2019-05-13 17:20:05 -0700 | [diff] [blame] | 78 | * @default_flags: default flags for the range (write, read, ... see hmm doc) |
| 79 | * @pfn_flags_mask: allows to mask pfn flags so that only default_flags matter |
Jason Gunthorpe | f970b97 | 2020-03-27 17:00:15 -0300 | [diff] [blame^] | 80 | * @pfn_shift: pfn shift value (should be <= PAGE_SHIFT) |
Christoph Hellwig | 08ddddd | 2020-03-16 20:32:16 +0100 | [diff] [blame] | 81 | * @dev_private_owner: owner of device private pages |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 82 | */ |
| 83 | struct hmm_range { |
Jason Gunthorpe | 04ec32f | 2019-11-12 16:22:20 -0400 | [diff] [blame] | 84 | struct mmu_interval_notifier *notifier; |
| 85 | unsigned long notifier_seq; |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 86 | unsigned long start; |
| 87 | unsigned long end; |
| 88 | uint64_t *pfns; |
| 89 | const uint64_t *flags; |
| 90 | const uint64_t *values; |
Jérôme Glisse | 023a019 | 2019-05-13 17:20:05 -0700 | [diff] [blame] | 91 | uint64_t default_flags; |
| 92 | uint64_t pfn_flags_mask; |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 93 | uint8_t pfn_shift; |
Christoph Hellwig | 08ddddd | 2020-03-16 20:32:16 +0100 | [diff] [blame] | 94 | void *dev_private_owner; |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 95 | }; |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 96 | |
| 97 | /* |
Jérôme Glisse | 391aab1 | 2019-05-13 17:20:31 -0700 | [diff] [blame] | 98 | * hmm_device_entry_to_page() - return struct page pointed to by a device entry |
| 99 | * @range: range use to decode device entry value |
| 100 | * @entry: device entry value to get corresponding struct page from |
Ralph Campbell | 085ea25 | 2019-05-06 16:29:39 -0700 | [diff] [blame] | 101 | * Return: struct page pointer if entry is a valid, NULL otherwise |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 102 | * |
Jérôme Glisse | 391aab1 | 2019-05-13 17:20:31 -0700 | [diff] [blame] | 103 | * If the device entry is valid (ie valid flag set) then return the struct page |
| 104 | * matching the entry value. Otherwise return NULL. |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 105 | */ |
Jérôme Glisse | 391aab1 | 2019-05-13 17:20:31 -0700 | [diff] [blame] | 106 | static inline struct page *hmm_device_entry_to_page(const struct hmm_range *range, |
| 107 | uint64_t entry) |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 108 | { |
Jérôme Glisse | 391aab1 | 2019-05-13 17:20:31 -0700 | [diff] [blame] | 109 | if (entry == range->values[HMM_PFN_NONE]) |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 110 | return NULL; |
Jérôme Glisse | 391aab1 | 2019-05-13 17:20:31 -0700 | [diff] [blame] | 111 | if (entry == range->values[HMM_PFN_ERROR]) |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 112 | return NULL; |
Jérôme Glisse | 391aab1 | 2019-05-13 17:20:31 -0700 | [diff] [blame] | 113 | if (entry == range->values[HMM_PFN_SPECIAL]) |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 114 | return NULL; |
Jérôme Glisse | 391aab1 | 2019-05-13 17:20:31 -0700 | [diff] [blame] | 115 | if (!(entry & range->flags[HMM_PFN_VALID])) |
Jérôme Glisse | f88a1e9 | 2018-04-10 16:29:06 -0700 | [diff] [blame] | 116 | return NULL; |
Jérôme Glisse | 391aab1 | 2019-05-13 17:20:31 -0700 | [diff] [blame] | 117 | return pfn_to_page(entry >> range->pfn_shift); |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 118 | } |
| 119 | |
Jason Gunthorpe | 107e899 | 2019-11-12 16:22:21 -0400 | [diff] [blame] | 120 | /* Don't fault in missing PTEs, just snapshot the current state. */ |
| 121 | #define HMM_FAULT_SNAPSHOT (1 << 1) |
| 122 | |
Jérôme Glisse | 2023941 | 2019-05-13 17:20:24 -0700 | [diff] [blame] | 123 | /* |
Jérôme Glisse | a3e0d41 | 2019-05-13 17:20:01 -0700 | [diff] [blame] | 124 | * Please see Documentation/vm/hmm.rst for how to use the range API. |
Jérôme Glisse | da4c3c7 | 2017-09-08 16:11:31 -0700 | [diff] [blame] | 125 | */ |
Christoph Hellwig | 9a4903e | 2019-07-25 17:56:46 -0700 | [diff] [blame] | 126 | long hmm_range_fault(struct hmm_range *range, unsigned int flags); |
Jérôme Glisse | 74eee18 | 2017-09-08 16:11:35 -0700 | [diff] [blame] | 127 | |
| 128 | /* |
Jérôme Glisse | a3e0d41 | 2019-05-13 17:20:01 -0700 | [diff] [blame] | 129 | * HMM_RANGE_DEFAULT_TIMEOUT - default timeout (ms) when waiting for a range |
Jérôme Glisse | 74eee18 | 2017-09-08 16:11:35 -0700 | [diff] [blame] | 130 | * |
Jérôme Glisse | a3e0d41 | 2019-05-13 17:20:01 -0700 | [diff] [blame] | 131 | * When waiting for mmu notifiers we need some kind of time out otherwise we |
| 132 | * could potentialy wait for ever, 1000ms ie 1s sounds like a long time to |
| 133 | * wait already. |
Jérôme Glisse | 74eee18 | 2017-09-08 16:11:35 -0700 | [diff] [blame] | 134 | */ |
Jérôme Glisse | a3e0d41 | 2019-05-13 17:20:01 -0700 | [diff] [blame] | 135 | #define HMM_RANGE_DEFAULT_TIMEOUT 1000 |
| 136 | |
Jérôme Glisse | 133ff0e | 2017-09-08 16:11:23 -0700 | [diff] [blame] | 137 | #endif /* LINUX_HMM_H */ |