Jesper Dangaard Brouer | ff7d6b2 | 2018-04-17 16:46:17 +0200 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0 |
| 2 | * |
| 3 | * page_pool.h |
| 4 | * Author: Jesper Dangaard Brouer <netoptimizer@brouer.com> |
| 5 | * Copyright (C) 2016 Red Hat, Inc. |
| 6 | */ |
| 7 | |
| 8 | /** |
| 9 | * DOC: page_pool allocator |
| 10 | * |
| 11 | * This page_pool allocator is optimized for the XDP mode that |
| 12 | * uses one-frame-per-page, but have fallbacks that act like the |
| 13 | * regular page allocator APIs. |
| 14 | * |
| 15 | * Basic use involve replacing alloc_pages() calls with the |
| 16 | * page_pool_alloc_pages() call. Drivers should likely use |
| 17 | * page_pool_dev_alloc_pages() replacing dev_alloc_pages(). |
| 18 | * |
Jesper Dangaard Brouer | 99c07c4 | 2019-06-18 15:05:47 +0200 | [diff] [blame] | 19 | * API keeps track of in-flight pages, in-order to let API user know |
| 20 | * when it is safe to dealloactor page_pool object. Thus, API users |
| 21 | * must make sure to call page_pool_release_page() when a page is |
| 22 | * "leaving" the page_pool. Or call page_pool_put_page() where |
| 23 | * appropiate. For maintaining correct accounting. |
Jesper Dangaard Brouer | ff7d6b2 | 2018-04-17 16:46:17 +0200 | [diff] [blame] | 24 | * |
Jesper Dangaard Brouer | 99c07c4 | 2019-06-18 15:05:47 +0200 | [diff] [blame] | 25 | * API user must only call page_pool_put_page() once on a page, as it |
| 26 | * will either recycle the page, or in case of elevated refcnt, it |
| 27 | * will release the DMA mapping and in-flight state accounting. We |
| 28 | * hope to lift this requirement in the future. |
Jesper Dangaard Brouer | ff7d6b2 | 2018-04-17 16:46:17 +0200 | [diff] [blame] | 29 | */ |
| 30 | #ifndef _NET_PAGE_POOL_H |
| 31 | #define _NET_PAGE_POOL_H |
| 32 | |
| 33 | #include <linux/mm.h> /* Needed by ptr_ring */ |
| 34 | #include <linux/ptr_ring.h> |
| 35 | #include <linux/dma-direction.h> |
| 36 | |
Lorenzo Bianconi | e68bc75 | 2019-11-20 16:54:18 +0200 | [diff] [blame] | 37 | #define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA |
| 38 | * map/unmap |
| 39 | */ |
| 40 | #define PP_FLAG_DMA_SYNC_DEV BIT(1) /* If set all pages that the driver gets |
| 41 | * from page_pool will be |
| 42 | * DMA-synced-for-device according to |
| 43 | * the length provided by the device |
| 44 | * driver. |
| 45 | * Please note DMA-sync-for-CPU is still |
| 46 | * device driver responsibility |
| 47 | */ |
| 48 | #define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV) |
Jesper Dangaard Brouer | ff7d6b2 | 2018-04-17 16:46:17 +0200 | [diff] [blame] | 49 | |
| 50 | /* |
| 51 | * Fast allocation side cache array/stack |
| 52 | * |
| 53 | * The cache size and refill watermark is related to the network |
| 54 | * use-case. The NAPI budget is 64 packets. After a NAPI poll the RX |
| 55 | * ring is usually refilled and the max consumed elements will be 64, |
| 56 | * thus a natural max size of objects needed in the cache. |
| 57 | * |
| 58 | * Keeping room for more objects, is due to XDP_DROP use-case. As |
| 59 | * XDP_DROP allows the opportunity to recycle objects directly into |
| 60 | * this array, as it shares the same softirq/NAPI protection. If |
| 61 | * cache is already full (or partly full) then the XDP_DROP recycles |
| 62 | * would have to take a slower code path. |
| 63 | */ |
| 64 | #define PP_ALLOC_CACHE_SIZE 128 |
| 65 | #define PP_ALLOC_CACHE_REFILL 64 |
| 66 | struct pp_alloc_cache { |
| 67 | u32 count; |
| 68 | void *cache[PP_ALLOC_CACHE_SIZE]; |
| 69 | }; |
| 70 | |
| 71 | struct page_pool_params { |
| 72 | unsigned int flags; |
| 73 | unsigned int order; |
| 74 | unsigned int pool_size; |
| 75 | int nid; /* Numa node id to allocate from pages from */ |
| 76 | struct device *dev; /* device, for DMA pre-mapping purposes */ |
| 77 | enum dma_data_direction dma_dir; /* DMA mapping direction */ |
Lorenzo Bianconi | e68bc75 | 2019-11-20 16:54:18 +0200 | [diff] [blame] | 78 | unsigned int max_len; /* max DMA sync memory size */ |
| 79 | unsigned int offset; /* DMA addr offset */ |
Jesper Dangaard Brouer | ff7d6b2 | 2018-04-17 16:46:17 +0200 | [diff] [blame] | 80 | }; |
| 81 | |
| 82 | struct page_pool { |
Jesper Dangaard Brouer | ff7d6b2 | 2018-04-17 16:46:17 +0200 | [diff] [blame] | 83 | struct page_pool_params p; |
| 84 | |
Jonathan Lemon | c3f812c | 2019-11-14 14:13:00 -0800 | [diff] [blame] | 85 | struct delayed_work release_dw; |
| 86 | void (*disconnect)(void *); |
| 87 | unsigned long defer_start; |
| 88 | unsigned long defer_warn; |
| 89 | |
| 90 | u32 pages_state_hold_cnt; |
Jesper Dangaard Brouer | 99c07c4 | 2019-06-18 15:05:47 +0200 | [diff] [blame] | 91 | |
Jesper Dangaard Brouer | ff7d6b2 | 2018-04-17 16:46:17 +0200 | [diff] [blame] | 92 | /* |
| 93 | * Data structure for allocation side |
| 94 | * |
| 95 | * Drivers allocation side usually already perform some kind |
| 96 | * of resource protection. Piggyback on this protection, and |
| 97 | * require driver to protect allocation side. |
| 98 | * |
| 99 | * For NIC drivers this means, allocate a page_pool per |
| 100 | * RX-queue. As the RX-queue is already protected by |
| 101 | * Softirq/BH scheduling and napi_schedule. NAPI schedule |
| 102 | * guarantee that a single napi_struct will only be scheduled |
| 103 | * on a single CPU (see napi_schedule). |
| 104 | */ |
| 105 | struct pp_alloc_cache alloc ____cacheline_aligned_in_smp; |
| 106 | |
| 107 | /* Data structure for storing recycled pages. |
| 108 | * |
| 109 | * Returning/freeing pages is more complicated synchronization |
| 110 | * wise, because free's can happen on remote CPUs, with no |
| 111 | * association with allocation resource. |
| 112 | * |
| 113 | * Use ptr_ring, as it separates consumer and producer |
| 114 | * effeciently, it a way that doesn't bounce cache-lines. |
| 115 | * |
| 116 | * TODO: Implement bulk return pages into this structure. |
| 117 | */ |
| 118 | struct ptr_ring ring; |
Jesper Dangaard Brouer | 99c07c4 | 2019-06-18 15:05:47 +0200 | [diff] [blame] | 119 | |
| 120 | atomic_t pages_state_release_cnt; |
Ivan Khoronzhuk | 1da4bbe | 2019-07-09 00:34:28 +0300 | [diff] [blame] | 121 | |
| 122 | /* A page_pool is strictly tied to a single RX-queue being |
| 123 | * protected by NAPI, due to above pp_alloc_cache. This |
| 124 | * refcnt serves purpose is to simplify drivers error handling. |
| 125 | */ |
| 126 | refcount_t user_cnt; |
Jesper Dangaard Brouer | 7c9e694 | 2019-11-16 12:22:43 +0100 | [diff] [blame] | 127 | |
| 128 | u64 destroy_cnt; |
Jesper Dangaard Brouer | ff7d6b2 | 2018-04-17 16:46:17 +0200 | [diff] [blame] | 129 | }; |
| 130 | |
| 131 | struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); |
| 132 | |
| 133 | static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) |
| 134 | { |
| 135 | gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); |
| 136 | |
| 137 | return page_pool_alloc_pages(pool, gfp); |
| 138 | } |
| 139 | |
Ilias Apalodimas | bb005f2 | 2019-06-29 08:23:24 +0300 | [diff] [blame] | 140 | /* get the stored dma direction. A driver might decide to treat this locally and |
| 141 | * avoid the extra cache line from page_pool to determine the direction |
| 142 | */ |
| 143 | static |
| 144 | inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool) |
| 145 | { |
| 146 | return pool->p.dma_dir; |
| 147 | } |
| 148 | |
Jesper Dangaard Brouer | ff7d6b2 | 2018-04-17 16:46:17 +0200 | [diff] [blame] | 149 | struct page_pool *page_pool_create(const struct page_pool_params *params); |
| 150 | |
Jesper Dangaard Brouer | e54cfd7 | 2019-06-18 15:05:37 +0200 | [diff] [blame] | 151 | #ifdef CONFIG_PAGE_POOL |
Jonathan Lemon | c3f812c | 2019-11-14 14:13:00 -0800 | [diff] [blame] | 152 | void page_pool_destroy(struct page_pool *pool); |
| 153 | void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *)); |
Ilias Apalodimas | 458de8a | 2020-02-20 09:41:55 +0200 | [diff] [blame] | 154 | void page_pool_release_page(struct page_pool *pool, struct page *page); |
Jonathan Lemon | c3f812c | 2019-11-14 14:13:00 -0800 | [diff] [blame] | 155 | #else |
Ivan Khoronzhuk | 1da4bbe | 2019-07-09 00:34:28 +0300 | [diff] [blame] | 156 | static inline void page_pool_destroy(struct page_pool *pool) |
| 157 | { |
Ivan Khoronzhuk | 1da4bbe | 2019-07-09 00:34:28 +0300 | [diff] [blame] | 158 | } |
| 159 | |
Jonathan Lemon | c3f812c | 2019-11-14 14:13:00 -0800 | [diff] [blame] | 160 | static inline void page_pool_use_xdp_mem(struct page_pool *pool, |
| 161 | void (*disconnect)(void *)) |
| 162 | { |
| 163 | } |
Ilias Apalodimas | 458de8a | 2020-02-20 09:41:55 +0200 | [diff] [blame] | 164 | static inline void page_pool_release_page(struct page_pool *pool, |
| 165 | struct page *page) |
| 166 | { |
| 167 | } |
Jonathan Lemon | c3f812c | 2019-11-14 14:13:00 -0800 | [diff] [blame] | 168 | #endif |
| 169 | |
Ilias Apalodimas | 458de8a | 2020-02-20 09:41:55 +0200 | [diff] [blame] | 170 | void page_pool_put_page(struct page_pool *pool, struct page *page, |
| 171 | unsigned int dma_sync_size, bool allow_direct); |
Jesper Dangaard Brouer | ff7d6b2 | 2018-04-17 16:46:17 +0200 | [diff] [blame] | 172 | |
Ilias Apalodimas | 458de8a | 2020-02-20 09:41:55 +0200 | [diff] [blame] | 173 | /* Same as above but will try to sync the entire area pool->max_len */ |
| 174 | static inline void page_pool_put_full_page(struct page_pool *pool, |
| 175 | struct page *page, bool allow_direct) |
Jesper Dangaard Brouer | ff7d6b2 | 2018-04-17 16:46:17 +0200 | [diff] [blame] | 176 | { |
Jesper Dangaard Brouer | 57d0a1c | 2018-04-17 16:46:22 +0200 | [diff] [blame] | 177 | /* When page_pool isn't compiled-in, net/core/xdp.c doesn't |
| 178 | * allow registering MEM_TYPE_PAGE_POOL, but shield linker. |
| 179 | */ |
| 180 | #ifdef CONFIG_PAGE_POOL |
Ilias Apalodimas | 458de8a | 2020-02-20 09:41:55 +0200 | [diff] [blame] | 181 | page_pool_put_page(pool, page, -1, allow_direct); |
Jesper Dangaard Brouer | 57d0a1c | 2018-04-17 16:46:22 +0200 | [diff] [blame] | 182 | #endif |
Jesper Dangaard Brouer | ff7d6b2 | 2018-04-17 16:46:17 +0200 | [diff] [blame] | 183 | } |
Ilias Apalodimas | 458de8a | 2020-02-20 09:41:55 +0200 | [diff] [blame] | 184 | |
| 185 | /* Same as above but the caller must guarantee safe context. e.g NAPI */ |
Jesper Dangaard Brouer | ff7d6b2 | 2018-04-17 16:46:17 +0200 | [diff] [blame] | 186 | static inline void page_pool_recycle_direct(struct page_pool *pool, |
| 187 | struct page *page) |
| 188 | { |
Ilias Apalodimas | 458de8a | 2020-02-20 09:41:55 +0200 | [diff] [blame] | 189 | page_pool_put_full_page(pool, page, true); |
Jesper Dangaard Brouer | 6bf071b | 2019-06-18 15:05:27 +0200 | [diff] [blame] | 190 | } |
| 191 | |
Ilias Apalodimas | 0afdeee | 2019-06-18 15:05:12 +0200 | [diff] [blame] | 192 | static inline dma_addr_t page_pool_get_dma_addr(struct page *page) |
| 193 | { |
Matthew Wilcox (Oracle) | cfddf6a | 2021-05-14 17:27:24 -0700 | [diff] [blame] | 194 | dma_addr_t ret = page->dma_addr[0]; |
| 195 | if (sizeof(dma_addr_t) > sizeof(unsigned long)) |
| 196 | ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16; |
| 197 | return ret; |
| 198 | } |
| 199 | |
| 200 | static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr) |
| 201 | { |
| 202 | page->dma_addr[0] = addr; |
| 203 | if (sizeof(dma_addr_t) > sizeof(unsigned long)) |
| 204 | page->dma_addr[1] = upper_32_bits(addr); |
Ilias Apalodimas | 0afdeee | 2019-06-18 15:05:12 +0200 | [diff] [blame] | 205 | } |
| 206 | |
Jesper Dangaard Brouer | 57d0a1c | 2018-04-17 16:46:22 +0200 | [diff] [blame] | 207 | static inline bool is_page_pool_compiled_in(void) |
| 208 | { |
| 209 | #ifdef CONFIG_PAGE_POOL |
| 210 | return true; |
| 211 | #else |
| 212 | return false; |
| 213 | #endif |
| 214 | } |
| 215 | |
Ivan Khoronzhuk | 1da4bbe | 2019-07-09 00:34:28 +0300 | [diff] [blame] | 216 | static inline bool page_pool_put(struct page_pool *pool) |
| 217 | { |
| 218 | return refcount_dec_and_test(&pool->user_cnt); |
| 219 | } |
| 220 | |
Saeed Mahameed | bc83674 | 2019-11-20 00:15:17 +0000 | [diff] [blame] | 221 | /* Caller must provide appropriate safe context, e.g. NAPI. */ |
| 222 | void page_pool_update_nid(struct page_pool *pool, int new_nid); |
| 223 | static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid) |
| 224 | { |
| 225 | if (unlikely(pool->p.nid != new_nid)) |
| 226 | page_pool_update_nid(pool, new_nid); |
| 227 | } |
Jesper Dangaard Brouer | ff7d6b2 | 2018-04-17 16:46:17 +0200 | [diff] [blame] | 228 | #endif /* _NET_PAGE_POOL_H */ |