Ilias Apalodimas | f1d97dd | 2020-02-21 11:15:19 +0200 | [diff] [blame] | 1 | .. SPDX-License-Identifier: GPL-2.0 |
| 2 | |
| 3 | ============= |
| 4 | Page Pool API |
| 5 | ============= |
| 6 | |
| 7 | The page_pool allocator is optimized for the XDP mode that uses one frame |
| 8 | per-page, but it can fallback on the regular page allocator APIs. |
| 9 | |
| 10 | Basic use involves replacing alloc_pages() calls with the |
| 11 | page_pool_alloc_pages() call. Drivers should use page_pool_dev_alloc_pages() |
| 12 | replacing dev_alloc_pages(). |
| 13 | |
| 14 | API keeps track of inflight pages, in order to let API user know |
| 15 | when it is safe to free a page_pool object. Thus, API users |
| 16 | must run page_pool_release_page() when a page is leaving the page_pool or |
| 17 | call page_pool_put_page() where appropriate in order to maintain correct |
| 18 | accounting. |
| 19 | |
| 20 | API user must call page_pool_put_page() once on a page, as it |
| 21 | will either recycle the page, or in case of refcnt > 1, it will |
| 22 | release the DMA mapping and inflight state accounting. |
| 23 | |
| 24 | Architecture overview |
| 25 | ===================== |
| 26 | |
| 27 | .. code-block:: none |
| 28 | |
| 29 | +------------------+ |
| 30 | | Driver | |
| 31 | +------------------+ |
| 32 | ^ |
| 33 | | |
| 34 | | |
| 35 | | |
| 36 | v |
| 37 | +--------------------------------------------+ |
| 38 | | request memory | |
| 39 | +--------------------------------------------+ |
| 40 | ^ ^ |
| 41 | | | |
| 42 | | Pool empty | Pool has entries |
| 43 | | | |
| 44 | v v |
| 45 | +-----------------------+ +------------------------+ |
| 46 | | alloc (and map) pages | | get page from cache | |
| 47 | +-----------------------+ +------------------------+ |
| 48 | ^ ^ |
| 49 | | | |
| 50 | | cache available | No entries, refill |
| 51 | | | from ptr-ring |
| 52 | | | |
| 53 | v v |
| 54 | +-----------------+ +------------------+ |
| 55 | | Fast cache | | ptr-ring cache | |
| 56 | +-----------------+ +------------------+ |
| 57 | |
| 58 | API interface |
| 59 | ============= |
| 60 | The number of pools created **must** match the number of hardware queues |
| 61 | unless hardware restrictions make that impossible. This would otherwise beat the |
| 62 | purpose of page pool, which is allocate pages fast from cache without locking. |
| 63 | This lockless guarantee naturally comes from running under a NAPI softirq. |
| 64 | The protection doesn't strictly have to be NAPI, any guarantee that allocating |
| 65 | a page will cause no race conditions is enough. |
| 66 | |
| 67 | * page_pool_create(): Create a pool. |
| 68 | * flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV |
| 69 | * order: 2^order pages on allocation |
| 70 | * pool_size: size of the ptr_ring |
| 71 | * nid: preferred NUMA node for allocation |
| 72 | * dev: struct device. Used on DMA operations |
| 73 | * dma_dir: DMA direction |
| 74 | * max_len: max DMA sync memory size |
| 75 | * offset: DMA address offset |
| 76 | |
| 77 | * page_pool_put_page(): The outcome of this depends on the page refcnt. If the |
| 78 | driver bumps the refcnt > 1 this will unmap the page. If the page refcnt is 1 |
| 79 | the allocator owns the page and will try to recycle it in one of the pool |
| 80 | caches. If PP_FLAG_DMA_SYNC_DEV is set, the page will be synced for_device |
| 81 | using dma_sync_single_range_for_device(). |
| 82 | |
| 83 | * page_pool_put_full_page(): Similar to page_pool_put_page(), but will DMA sync |
| 84 | for the entire memory area configured in area pool->max_len. |
| 85 | |
| 86 | * page_pool_recycle_direct(): Similar to page_pool_put_full_page() but caller |
| 87 | must guarantee safe context (e.g NAPI), since it will recycle the page |
| 88 | directly into the pool fast cache. |
| 89 | |
| 90 | * page_pool_release_page(): Unmap the page (if mapped) and account for it on |
| 91 | inflight counters. |
| 92 | |
| 93 | * page_pool_dev_alloc_pages(): Get a page from the page allocator or page_pool |
| 94 | caches. |
| 95 | |
| 96 | * page_pool_get_dma_addr(): Retrieve the stored DMA address. |
| 97 | |
| 98 | * page_pool_get_dma_dir(): Retrieve the stored DMA direction. |
| 99 | |
| 100 | Coding examples |
| 101 | =============== |
| 102 | |
| 103 | Registration |
| 104 | ------------ |
| 105 | |
| 106 | .. code-block:: c |
| 107 | |
| 108 | /* Page pool registration */ |
| 109 | struct page_pool_params pp_params = { 0 }; |
| 110 | struct xdp_rxq_info xdp_rxq; |
| 111 | int err; |
| 112 | |
| 113 | pp_params.order = 0; |
| 114 | /* internal DMA mapping in page_pool */ |
| 115 | pp_params.flags = PP_FLAG_DMA_MAP; |
| 116 | pp_params.pool_size = DESC_NUM; |
| 117 | pp_params.nid = NUMA_NO_NODE; |
| 118 | pp_params.dev = priv->dev; |
| 119 | pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; |
| 120 | page_pool = page_pool_create(&pp_params); |
| 121 | |
| 122 | err = xdp_rxq_info_reg(&xdp_rxq, ndev, 0); |
| 123 | if (err) |
| 124 | goto err_out; |
| 125 | |
| 126 | err = xdp_rxq_info_reg_mem_model(&xdp_rxq, MEM_TYPE_PAGE_POOL, page_pool); |
| 127 | if (err) |
| 128 | goto err_out; |
| 129 | |
| 130 | NAPI poller |
| 131 | ----------- |
| 132 | |
| 133 | |
| 134 | .. code-block:: c |
| 135 | |
| 136 | /* NAPI Rx poller */ |
| 137 | enum dma_data_direction dma_dir; |
| 138 | |
| 139 | dma_dir = page_pool_get_dma_dir(dring->page_pool); |
| 140 | while (done < budget) { |
| 141 | if (some error) |
| 142 | page_pool_recycle_direct(page_pool, page); |
| 143 | if (packet_is_xdp) { |
| 144 | if XDP_DROP: |
| 145 | page_pool_recycle_direct(page_pool, page); |
| 146 | } else (packet_is_skb) { |
| 147 | page_pool_release_page(page_pool, page); |
| 148 | new_page = page_pool_dev_alloc_pages(page_pool); |
| 149 | } |
| 150 | } |
| 151 | |
| 152 | Driver unload |
| 153 | ------------- |
| 154 | |
| 155 | .. code-block:: c |
| 156 | |
| 157 | /* Driver unload */ |
| 158 | page_pool_put_full_page(page_pool, page, false); |
| 159 | xdp_rxq_info_unreg(&xdp_rxq); |