Jesper Dangaard Brouer | ff7d6b2 | 2018-04-17 16:46:17 +0200 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0 |
| 2 | * |
| 3 | * page_pool.h |
| 4 | * Author: Jesper Dangaard Brouer <netoptimizer@brouer.com> |
| 5 | * Copyright (C) 2016 Red Hat, Inc. |
| 6 | */ |
| 7 | |
| 8 | /** |
| 9 | * DOC: page_pool allocator |
| 10 | * |
| 11 | * This page_pool allocator is optimized for the XDP mode that |
| 12 | * uses one-frame-per-page, but have fallbacks that act like the |
| 13 | * regular page allocator APIs. |
| 14 | * |
| 15 | * Basic use involve replacing alloc_pages() calls with the |
| 16 | * page_pool_alloc_pages() call. Drivers should likely use |
| 17 | * page_pool_dev_alloc_pages() replacing dev_alloc_pages(). |
| 18 | * |
Jesper Dangaard Brouer | 99c07c4 | 2019-06-18 15:05:47 +0200 | [diff] [blame] | 19 | * API keeps track of in-flight pages, in-order to let API user know |
| 20 | * when it is safe to dealloactor page_pool object. Thus, API users |
| 21 | * must make sure to call page_pool_release_page() when a page is |
| 22 | * "leaving" the page_pool. Or call page_pool_put_page() where |
| 23 | * appropiate. For maintaining correct accounting. |
Jesper Dangaard Brouer | ff7d6b2 | 2018-04-17 16:46:17 +0200 | [diff] [blame] | 24 | * |
Jesper Dangaard Brouer | 99c07c4 | 2019-06-18 15:05:47 +0200 | [diff] [blame] | 25 | * API user must only call page_pool_put_page() once on a page, as it |
| 26 | * will either recycle the page, or in case of elevated refcnt, it |
| 27 | * will release the DMA mapping and in-flight state accounting. We |
| 28 | * hope to lift this requirement in the future. |
Jesper Dangaard Brouer | ff7d6b2 | 2018-04-17 16:46:17 +0200 | [diff] [blame] | 29 | */ |
| 30 | #ifndef _NET_PAGE_POOL_H |
| 31 | #define _NET_PAGE_POOL_H |
| 32 | |
| 33 | #include <linux/mm.h> /* Needed by ptr_ring */ |
| 34 | #include <linux/ptr_ring.h> |
| 35 | #include <linux/dma-direction.h> |
| 36 | |
| 37 | #define PP_FLAG_DMA_MAP 1 /* Should page_pool do the DMA map/unmap */ |
| 38 | #define PP_FLAG_ALL PP_FLAG_DMA_MAP |
| 39 | |
| 40 | /* |
| 41 | * Fast allocation side cache array/stack |
| 42 | * |
| 43 | * The cache size and refill watermark is related to the network |
| 44 | * use-case. The NAPI budget is 64 packets. After a NAPI poll the RX |
| 45 | * ring is usually refilled and the max consumed elements will be 64, |
| 46 | * thus a natural max size of objects needed in the cache. |
| 47 | * |
| 48 | * Keeping room for more objects, is due to XDP_DROP use-case. As |
| 49 | * XDP_DROP allows the opportunity to recycle objects directly into |
| 50 | * this array, as it shares the same softirq/NAPI protection. If |
| 51 | * cache is already full (or partly full) then the XDP_DROP recycles |
| 52 | * would have to take a slower code path. |
| 53 | */ |
| 54 | #define PP_ALLOC_CACHE_SIZE 128 |
| 55 | #define PP_ALLOC_CACHE_REFILL 64 |
| 56 | struct pp_alloc_cache { |
| 57 | u32 count; |
| 58 | void *cache[PP_ALLOC_CACHE_SIZE]; |
| 59 | }; |
| 60 | |
| 61 | struct page_pool_params { |
| 62 | unsigned int flags; |
| 63 | unsigned int order; |
| 64 | unsigned int pool_size; |
| 65 | int nid; /* Numa node id to allocate from pages from */ |
| 66 | struct device *dev; /* device, for DMA pre-mapping purposes */ |
| 67 | enum dma_data_direction dma_dir; /* DMA mapping direction */ |
| 68 | }; |
| 69 | |
| 70 | struct page_pool { |
Jesper Dangaard Brouer | ff7d6b2 | 2018-04-17 16:46:17 +0200 | [diff] [blame] | 71 | struct page_pool_params p; |
| 72 | |
Jonathan Lemon | c3f812c | 2019-11-14 14:13:00 -0800 | [diff] [blame] | 73 | struct delayed_work release_dw; |
| 74 | void (*disconnect)(void *); |
| 75 | unsigned long defer_start; |
| 76 | unsigned long defer_warn; |
| 77 | |
| 78 | u32 pages_state_hold_cnt; |
Jesper Dangaard Brouer | 99c07c4 | 2019-06-18 15:05:47 +0200 | [diff] [blame] | 79 | |
Jesper Dangaard Brouer | ff7d6b2 | 2018-04-17 16:46:17 +0200 | [diff] [blame] | 80 | /* |
| 81 | * Data structure for allocation side |
| 82 | * |
| 83 | * Drivers allocation side usually already perform some kind |
| 84 | * of resource protection. Piggyback on this protection, and |
| 85 | * require driver to protect allocation side. |
| 86 | * |
| 87 | * For NIC drivers this means, allocate a page_pool per |
| 88 | * RX-queue. As the RX-queue is already protected by |
| 89 | * Softirq/BH scheduling and napi_schedule. NAPI schedule |
| 90 | * guarantee that a single napi_struct will only be scheduled |
| 91 | * on a single CPU (see napi_schedule). |
| 92 | */ |
| 93 | struct pp_alloc_cache alloc ____cacheline_aligned_in_smp; |
| 94 | |
| 95 | /* Data structure for storing recycled pages. |
| 96 | * |
| 97 | * Returning/freeing pages is more complicated synchronization |
| 98 | * wise, because free's can happen on remote CPUs, with no |
| 99 | * association with allocation resource. |
| 100 | * |
| 101 | * Use ptr_ring, as it separates consumer and producer |
| 102 | * effeciently, it a way that doesn't bounce cache-lines. |
| 103 | * |
| 104 | * TODO: Implement bulk return pages into this structure. |
| 105 | */ |
| 106 | struct ptr_ring ring; |
Jesper Dangaard Brouer | 99c07c4 | 2019-06-18 15:05:47 +0200 | [diff] [blame] | 107 | |
| 108 | atomic_t pages_state_release_cnt; |
Ivan Khoronzhuk | 1da4bbe | 2019-07-09 00:34:28 +0300 | [diff] [blame] | 109 | |
| 110 | /* A page_pool is strictly tied to a single RX-queue being |
| 111 | * protected by NAPI, due to above pp_alloc_cache. This |
| 112 | * refcnt serves purpose is to simplify drivers error handling. |
| 113 | */ |
| 114 | refcount_t user_cnt; |
Jesper Dangaard Brouer | 7c9e694 | 2019-11-16 12:22:43 +0100 | [diff] [blame] | 115 | |
| 116 | u64 destroy_cnt; |
Jesper Dangaard Brouer | ff7d6b2 | 2018-04-17 16:46:17 +0200 | [diff] [blame] | 117 | }; |
| 118 | |
| 119 | struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); |
| 120 | |
| 121 | static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) |
| 122 | { |
| 123 | gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); |
| 124 | |
| 125 | return page_pool_alloc_pages(pool, gfp); |
| 126 | } |
| 127 | |
Ilias Apalodimas | bb005f2 | 2019-06-29 08:23:24 +0300 | [diff] [blame] | 128 | /* get the stored dma direction. A driver might decide to treat this locally and |
| 129 | * avoid the extra cache line from page_pool to determine the direction |
| 130 | */ |
| 131 | static |
| 132 | inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool) |
| 133 | { |
| 134 | return pool->p.dma_dir; |
| 135 | } |
| 136 | |
Jesper Dangaard Brouer | ff7d6b2 | 2018-04-17 16:46:17 +0200 | [diff] [blame] | 137 | struct page_pool *page_pool_create(const struct page_pool_params *params); |
| 138 | |
Jesper Dangaard Brouer | e54cfd7 | 2019-06-18 15:05:37 +0200 | [diff] [blame] | 139 | #ifdef CONFIG_PAGE_POOL |
Jonathan Lemon | c3f812c | 2019-11-14 14:13:00 -0800 | [diff] [blame] | 140 | void page_pool_destroy(struct page_pool *pool); |
| 141 | void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *)); |
| 142 | #else |
Ivan Khoronzhuk | 1da4bbe | 2019-07-09 00:34:28 +0300 | [diff] [blame] | 143 | static inline void page_pool_destroy(struct page_pool *pool) |
| 144 | { |
Ivan Khoronzhuk | 1da4bbe | 2019-07-09 00:34:28 +0300 | [diff] [blame] | 145 | } |
| 146 | |
Jonathan Lemon | c3f812c | 2019-11-14 14:13:00 -0800 | [diff] [blame] | 147 | static inline void page_pool_use_xdp_mem(struct page_pool *pool, |
| 148 | void (*disconnect)(void *)) |
| 149 | { |
| 150 | } |
| 151 | #endif |
| 152 | |
Jesper Dangaard Brouer | ff7d6b2 | 2018-04-17 16:46:17 +0200 | [diff] [blame] | 153 | /* Never call this directly, use helpers below */ |
| 154 | void __page_pool_put_page(struct page_pool *pool, |
| 155 | struct page *page, bool allow_direct); |
| 156 | |
Jesper Dangaard Brouer | 389ab7f | 2018-05-24 16:46:07 +0200 | [diff] [blame] | 157 | static inline void page_pool_put_page(struct page_pool *pool, |
| 158 | struct page *page, bool allow_direct) |
Jesper Dangaard Brouer | ff7d6b2 | 2018-04-17 16:46:17 +0200 | [diff] [blame] | 159 | { |
Jesper Dangaard Brouer | 57d0a1c | 2018-04-17 16:46:22 +0200 | [diff] [blame] | 160 | /* When page_pool isn't compiled-in, net/core/xdp.c doesn't |
| 161 | * allow registering MEM_TYPE_PAGE_POOL, but shield linker. |
| 162 | */ |
| 163 | #ifdef CONFIG_PAGE_POOL |
Jesper Dangaard Brouer | 389ab7f | 2018-05-24 16:46:07 +0200 | [diff] [blame] | 164 | __page_pool_put_page(pool, page, allow_direct); |
Jesper Dangaard Brouer | 57d0a1c | 2018-04-17 16:46:22 +0200 | [diff] [blame] | 165 | #endif |
Jesper Dangaard Brouer | ff7d6b2 | 2018-04-17 16:46:17 +0200 | [diff] [blame] | 166 | } |
| 167 | /* Very limited use-cases allow recycle direct */ |
| 168 | static inline void page_pool_recycle_direct(struct page_pool *pool, |
| 169 | struct page *page) |
| 170 | { |
| 171 | __page_pool_put_page(pool, page, true); |
| 172 | } |
| 173 | |
Jesper Dangaard Brouer | 6bf071b | 2019-06-18 15:05:27 +0200 | [diff] [blame] | 174 | /* Disconnects a page (from a page_pool). API users can have a need |
| 175 | * to disconnect a page (from a page_pool), to allow it to be used as |
| 176 | * a regular page (that will eventually be returned to the normal |
| 177 | * page-allocator via put_page). |
| 178 | */ |
| 179 | void page_pool_unmap_page(struct page_pool *pool, struct page *page); |
| 180 | static inline void page_pool_release_page(struct page_pool *pool, |
| 181 | struct page *page) |
| 182 | { |
| 183 | #ifdef CONFIG_PAGE_POOL |
| 184 | page_pool_unmap_page(pool, page); |
| 185 | #endif |
| 186 | } |
| 187 | |
Ilias Apalodimas | 0afdeee | 2019-06-18 15:05:12 +0200 | [diff] [blame] | 188 | static inline dma_addr_t page_pool_get_dma_addr(struct page *page) |
| 189 | { |
| 190 | return page->dma_addr; |
| 191 | } |
| 192 | |
Jesper Dangaard Brouer | 57d0a1c | 2018-04-17 16:46:22 +0200 | [diff] [blame] | 193 | static inline bool is_page_pool_compiled_in(void) |
| 194 | { |
| 195 | #ifdef CONFIG_PAGE_POOL |
| 196 | return true; |
| 197 | #else |
| 198 | return false; |
| 199 | #endif |
| 200 | } |
| 201 | |
Ivan Khoronzhuk | 1da4bbe | 2019-07-09 00:34:28 +0300 | [diff] [blame] | 202 | static inline bool page_pool_put(struct page_pool *pool) |
| 203 | { |
| 204 | return refcount_dec_and_test(&pool->user_cnt); |
| 205 | } |
| 206 | |
Saeed Mahameed | bc83674 | 2019-11-20 00:15:17 +0000 | [diff] [blame] | 207 | /* Caller must provide appropriate safe context, e.g. NAPI. */ |
| 208 | void page_pool_update_nid(struct page_pool *pool, int new_nid); |
| 209 | static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid) |
| 210 | { |
| 211 | if (unlikely(pool->p.nid != new_nid)) |
| 212 | page_pool_update_nid(pool, new_nid); |
| 213 | } |
Jesper Dangaard Brouer | ff7d6b2 | 2018-04-17 16:46:17 +0200 | [diff] [blame] | 214 | #endif /* _NET_PAGE_POOL_H */ |