Logan Gunthorpe | 5291698 | 2018-10-04 15:27:35 -0600 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * PCI Peer 2 Peer DMA support. |
| 4 | * |
| 5 | * Copyright (c) 2016-2018, Logan Gunthorpe |
| 6 | * Copyright (c) 2016-2017, Microsemi Corporation |
| 7 | * Copyright (c) 2017, Christoph Hellwig |
| 8 | * Copyright (c) 2018, Eideticom Inc. |
| 9 | */ |
| 10 | |
Logan Gunthorpe | 2d7bc01 | 2018-10-04 15:27:38 -0600 | [diff] [blame] | 11 | #define pr_fmt(fmt) "pci-p2pdma: " fmt |
| 12 | #include <linux/ctype.h> |
Logan Gunthorpe | 5291698 | 2018-10-04 15:27:35 -0600 | [diff] [blame] | 13 | #include <linux/pci-p2pdma.h> |
| 14 | #include <linux/module.h> |
| 15 | #include <linux/slab.h> |
| 16 | #include <linux/genalloc.h> |
| 17 | #include <linux/memremap.h> |
| 18 | #include <linux/percpu-refcount.h> |
| 19 | #include <linux/random.h> |
| 20 | #include <linux/seq_buf.h> |
| 21 | |
| 22 | struct pci_p2pdma { |
| 23 | struct percpu_ref devmap_ref; |
| 24 | struct completion devmap_ref_done; |
| 25 | struct gen_pool *pool; |
| 26 | bool p2pmem_published; |
| 27 | }; |
| 28 | |
Logan Gunthorpe | cbb8ca6 | 2018-10-04 15:27:36 -0600 | [diff] [blame] | 29 | static ssize_t size_show(struct device *dev, struct device_attribute *attr, |
| 30 | char *buf) |
| 31 | { |
| 32 | struct pci_dev *pdev = to_pci_dev(dev); |
| 33 | size_t size = 0; |
| 34 | |
| 35 | if (pdev->p2pdma->pool) |
| 36 | size = gen_pool_size(pdev->p2pdma->pool); |
| 37 | |
| 38 | return snprintf(buf, PAGE_SIZE, "%zd\n", size); |
| 39 | } |
| 40 | static DEVICE_ATTR_RO(size); |
| 41 | |
| 42 | static ssize_t available_show(struct device *dev, struct device_attribute *attr, |
| 43 | char *buf) |
| 44 | { |
| 45 | struct pci_dev *pdev = to_pci_dev(dev); |
| 46 | size_t avail = 0; |
| 47 | |
| 48 | if (pdev->p2pdma->pool) |
| 49 | avail = gen_pool_avail(pdev->p2pdma->pool); |
| 50 | |
| 51 | return snprintf(buf, PAGE_SIZE, "%zd\n", avail); |
| 52 | } |
| 53 | static DEVICE_ATTR_RO(available); |
| 54 | |
| 55 | static ssize_t published_show(struct device *dev, struct device_attribute *attr, |
| 56 | char *buf) |
| 57 | { |
| 58 | struct pci_dev *pdev = to_pci_dev(dev); |
| 59 | |
| 60 | return snprintf(buf, PAGE_SIZE, "%d\n", |
| 61 | pdev->p2pdma->p2pmem_published); |
| 62 | } |
| 63 | static DEVICE_ATTR_RO(published); |
| 64 | |
| 65 | static struct attribute *p2pmem_attrs[] = { |
| 66 | &dev_attr_size.attr, |
| 67 | &dev_attr_available.attr, |
| 68 | &dev_attr_published.attr, |
| 69 | NULL, |
| 70 | }; |
| 71 | |
| 72 | static const struct attribute_group p2pmem_group = { |
| 73 | .attrs = p2pmem_attrs, |
| 74 | .name = "p2pmem", |
| 75 | }; |
| 76 | |
Logan Gunthorpe | 5291698 | 2018-10-04 15:27:35 -0600 | [diff] [blame] | 77 | static void pci_p2pdma_percpu_release(struct percpu_ref *ref) |
| 78 | { |
| 79 | struct pci_p2pdma *p2p = |
| 80 | container_of(ref, struct pci_p2pdma, devmap_ref); |
| 81 | |
| 82 | complete_all(&p2p->devmap_ref_done); |
| 83 | } |
| 84 | |
Dan Williams | 02917e9 | 2018-12-28 00:35:15 -0800 | [diff] [blame] | 85 | static void pci_p2pdma_percpu_kill(struct percpu_ref *ref) |
Logan Gunthorpe | 5291698 | 2018-10-04 15:27:35 -0600 | [diff] [blame] | 86 | { |
Logan Gunthorpe | 5291698 | 2018-10-04 15:27:35 -0600 | [diff] [blame] | 87 | /* |
| 88 | * pci_p2pdma_add_resource() may be called multiple times |
| 89 | * by a driver and may register the percpu_kill devm action multiple |
| 90 | * times. We only want the first action to actually kill the |
| 91 | * percpu_ref. |
| 92 | */ |
| 93 | if (percpu_ref_is_dying(ref)) |
| 94 | return; |
| 95 | |
| 96 | percpu_ref_kill(ref); |
| 97 | } |
| 98 | |
| 99 | static void pci_p2pdma_release(void *data) |
| 100 | { |
| 101 | struct pci_dev *pdev = data; |
| 102 | |
| 103 | if (!pdev->p2pdma) |
| 104 | return; |
| 105 | |
| 106 | wait_for_completion(&pdev->p2pdma->devmap_ref_done); |
| 107 | percpu_ref_exit(&pdev->p2pdma->devmap_ref); |
| 108 | |
| 109 | gen_pool_destroy(pdev->p2pdma->pool); |
Logan Gunthorpe | cbb8ca6 | 2018-10-04 15:27:36 -0600 | [diff] [blame] | 110 | sysfs_remove_group(&pdev->dev.kobj, &p2pmem_group); |
Logan Gunthorpe | 5291698 | 2018-10-04 15:27:35 -0600 | [diff] [blame] | 111 | pdev->p2pdma = NULL; |
| 112 | } |
| 113 | |
| 114 | static int pci_p2pdma_setup(struct pci_dev *pdev) |
| 115 | { |
| 116 | int error = -ENOMEM; |
| 117 | struct pci_p2pdma *p2p; |
| 118 | |
| 119 | p2p = devm_kzalloc(&pdev->dev, sizeof(*p2p), GFP_KERNEL); |
| 120 | if (!p2p) |
| 121 | return -ENOMEM; |
| 122 | |
| 123 | p2p->pool = gen_pool_create(PAGE_SHIFT, dev_to_node(&pdev->dev)); |
| 124 | if (!p2p->pool) |
| 125 | goto out; |
| 126 | |
| 127 | init_completion(&p2p->devmap_ref_done); |
| 128 | error = percpu_ref_init(&p2p->devmap_ref, |
| 129 | pci_p2pdma_percpu_release, 0, GFP_KERNEL); |
| 130 | if (error) |
| 131 | goto out_pool_destroy; |
| 132 | |
| 133 | error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_release, pdev); |
| 134 | if (error) |
| 135 | goto out_pool_destroy; |
| 136 | |
| 137 | pdev->p2pdma = p2p; |
| 138 | |
Logan Gunthorpe | cbb8ca6 | 2018-10-04 15:27:36 -0600 | [diff] [blame] | 139 | error = sysfs_create_group(&pdev->dev.kobj, &p2pmem_group); |
| 140 | if (error) |
| 141 | goto out_pool_destroy; |
| 142 | |
Logan Gunthorpe | 5291698 | 2018-10-04 15:27:35 -0600 | [diff] [blame] | 143 | return 0; |
| 144 | |
| 145 | out_pool_destroy: |
Logan Gunthorpe | cbb8ca6 | 2018-10-04 15:27:36 -0600 | [diff] [blame] | 146 | pdev->p2pdma = NULL; |
Logan Gunthorpe | 5291698 | 2018-10-04 15:27:35 -0600 | [diff] [blame] | 147 | gen_pool_destroy(p2p->pool); |
| 148 | out: |
| 149 | devm_kfree(&pdev->dev, p2p); |
| 150 | return error; |
| 151 | } |
| 152 | |
| 153 | /** |
| 154 | * pci_p2pdma_add_resource - add memory for use as p2p memory |
| 155 | * @pdev: the device to add the memory to |
| 156 | * @bar: PCI BAR to add |
| 157 | * @size: size of the memory to add, may be zero to use the whole BAR |
| 158 | * @offset: offset into the PCI BAR |
| 159 | * |
| 160 | * The memory will be given ZONE_DEVICE struct pages so that it may |
| 161 | * be used with any DMA request. |
| 162 | */ |
| 163 | int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, |
| 164 | u64 offset) |
| 165 | { |
| 166 | struct dev_pagemap *pgmap; |
| 167 | void *addr; |
| 168 | int error; |
| 169 | |
| 170 | if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) |
| 171 | return -EINVAL; |
| 172 | |
| 173 | if (offset >= pci_resource_len(pdev, bar)) |
| 174 | return -EINVAL; |
| 175 | |
| 176 | if (!size) |
| 177 | size = pci_resource_len(pdev, bar) - offset; |
| 178 | |
| 179 | if (size + offset > pci_resource_len(pdev, bar)) |
| 180 | return -EINVAL; |
| 181 | |
| 182 | if (!pdev->p2pdma) { |
| 183 | error = pci_p2pdma_setup(pdev); |
| 184 | if (error) |
| 185 | return error; |
| 186 | } |
| 187 | |
| 188 | pgmap = devm_kzalloc(&pdev->dev, sizeof(*pgmap), GFP_KERNEL); |
| 189 | if (!pgmap) |
| 190 | return -ENOMEM; |
| 191 | |
| 192 | pgmap->res.start = pci_resource_start(pdev, bar) + offset; |
| 193 | pgmap->res.end = pgmap->res.start + size - 1; |
| 194 | pgmap->res.flags = pci_resource_flags(pdev, bar); |
| 195 | pgmap->ref = &pdev->p2pdma->devmap_ref; |
| 196 | pgmap->type = MEMORY_DEVICE_PCI_P2PDMA; |
Logan Gunthorpe | 977196b | 2018-10-04 15:27:37 -0600 | [diff] [blame] | 197 | pgmap->pci_p2pdma_bus_offset = pci_bus_address(pdev, bar) - |
| 198 | pci_resource_start(pdev, bar); |
Dan Williams | 02917e9 | 2018-12-28 00:35:15 -0800 | [diff] [blame] | 199 | pgmap->kill = pci_p2pdma_percpu_kill; |
Logan Gunthorpe | 5291698 | 2018-10-04 15:27:35 -0600 | [diff] [blame] | 200 | |
| 201 | addr = devm_memremap_pages(&pdev->dev, pgmap); |
| 202 | if (IS_ERR(addr)) { |
| 203 | error = PTR_ERR(addr); |
| 204 | goto pgmap_free; |
| 205 | } |
| 206 | |
| 207 | error = gen_pool_add_virt(pdev->p2pdma->pool, (unsigned long)addr, |
| 208 | pci_bus_address(pdev, bar) + offset, |
| 209 | resource_size(&pgmap->res), dev_to_node(&pdev->dev)); |
| 210 | if (error) |
| 211 | goto pgmap_free; |
| 212 | |
Logan Gunthorpe | 5291698 | 2018-10-04 15:27:35 -0600 | [diff] [blame] | 213 | pci_info(pdev, "added peer-to-peer DMA memory %pR\n", |
| 214 | &pgmap->res); |
| 215 | |
| 216 | return 0; |
| 217 | |
| 218 | pgmap_free: |
| 219 | devm_kfree(&pdev->dev, pgmap); |
| 220 | return error; |
| 221 | } |
| 222 | EXPORT_SYMBOL_GPL(pci_p2pdma_add_resource); |
| 223 | |
| 224 | /* |
| 225 | * Note this function returns the parent PCI device with a |
| 226 | * reference taken. It is the caller's responsibily to drop |
| 227 | * the reference. |
| 228 | */ |
| 229 | static struct pci_dev *find_parent_pci_dev(struct device *dev) |
| 230 | { |
| 231 | struct device *parent; |
| 232 | |
| 233 | dev = get_device(dev); |
| 234 | |
| 235 | while (dev) { |
| 236 | if (dev_is_pci(dev)) |
| 237 | return to_pci_dev(dev); |
| 238 | |
| 239 | parent = get_device(dev->parent); |
| 240 | put_device(dev); |
| 241 | dev = parent; |
| 242 | } |
| 243 | |
| 244 | return NULL; |
| 245 | } |
| 246 | |
| 247 | /* |
| 248 | * Check if a PCI bridge has its ACS redirection bits set to redirect P2P |
| 249 | * TLPs upstream via ACS. Returns 1 if the packets will be redirected |
| 250 | * upstream, 0 otherwise. |
| 251 | */ |
| 252 | static int pci_bridge_has_acs_redir(struct pci_dev *pdev) |
| 253 | { |
| 254 | int pos; |
| 255 | u16 ctrl; |
| 256 | |
| 257 | pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ACS); |
| 258 | if (!pos) |
| 259 | return 0; |
| 260 | |
| 261 | pci_read_config_word(pdev, pos + PCI_ACS_CTRL, &ctrl); |
| 262 | |
| 263 | if (ctrl & (PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_EC)) |
| 264 | return 1; |
| 265 | |
| 266 | return 0; |
| 267 | } |
| 268 | |
| 269 | static void seq_buf_print_bus_devfn(struct seq_buf *buf, struct pci_dev *pdev) |
| 270 | { |
| 271 | if (!buf) |
| 272 | return; |
| 273 | |
| 274 | seq_buf_printf(buf, "%s;", pci_name(pdev)); |
| 275 | } |
| 276 | |
| 277 | /* |
| 278 | * Find the distance through the nearest common upstream bridge between |
| 279 | * two PCI devices. |
| 280 | * |
| 281 | * If the two devices are the same device then 0 will be returned. |
| 282 | * |
| 283 | * If there are two virtual functions of the same device behind the same |
| 284 | * bridge port then 2 will be returned (one step down to the PCIe switch, |
| 285 | * then one step back to the same device). |
| 286 | * |
| 287 | * In the case where two devices are connected to the same PCIe switch, the |
| 288 | * value 4 will be returned. This corresponds to the following PCI tree: |
| 289 | * |
| 290 | * -+ Root Port |
| 291 | * \+ Switch Upstream Port |
| 292 | * +-+ Switch Downstream Port |
| 293 | * + \- Device A |
| 294 | * \-+ Switch Downstream Port |
| 295 | * \- Device B |
| 296 | * |
| 297 | * The distance is 4 because we traverse from Device A through the downstream |
| 298 | * port of the switch, to the common upstream port, back up to the second |
| 299 | * downstream port and then to Device B. |
| 300 | * |
| 301 | * Any two devices that don't have a common upstream bridge will return -1. |
| 302 | * In this way devices on separate PCIe root ports will be rejected, which |
| 303 | * is what we want for peer-to-peer seeing each PCIe root port defines a |
| 304 | * separate hierarchy domain and there's no way to determine whether the root |
| 305 | * complex supports forwarding between them. |
| 306 | * |
| 307 | * In the case where two devices are connected to different PCIe switches, |
| 308 | * this function will still return a positive distance as long as both |
| 309 | * switches eventually have a common upstream bridge. Note this covers |
| 310 | * the case of using multiple PCIe switches to achieve a desired level of |
| 311 | * fan-out from a root port. The exact distance will be a function of the |
| 312 | * number of switches between Device A and Device B. |
| 313 | * |
| 314 | * If a bridge which has any ACS redirection bits set is in the path |
| 315 | * then this functions will return -2. This is so we reject any |
| 316 | * cases where the TLPs are forwarded up into the root complex. |
| 317 | * In this case, a list of all infringing bridge addresses will be |
| 318 | * populated in acs_list (assuming it's non-null) for printk purposes. |
| 319 | */ |
| 320 | static int upstream_bridge_distance(struct pci_dev *a, |
| 321 | struct pci_dev *b, |
| 322 | struct seq_buf *acs_list) |
| 323 | { |
| 324 | int dist_a = 0; |
| 325 | int dist_b = 0; |
| 326 | struct pci_dev *bb = NULL; |
| 327 | int acs_cnt = 0; |
| 328 | |
| 329 | /* |
| 330 | * Note, we don't need to take references to devices returned by |
| 331 | * pci_upstream_bridge() seeing we hold a reference to a child |
| 332 | * device which will already hold a reference to the upstream bridge. |
| 333 | */ |
| 334 | |
| 335 | while (a) { |
| 336 | dist_b = 0; |
| 337 | |
| 338 | if (pci_bridge_has_acs_redir(a)) { |
| 339 | seq_buf_print_bus_devfn(acs_list, a); |
| 340 | acs_cnt++; |
| 341 | } |
| 342 | |
| 343 | bb = b; |
| 344 | |
| 345 | while (bb) { |
| 346 | if (a == bb) |
| 347 | goto check_b_path_acs; |
| 348 | |
| 349 | bb = pci_upstream_bridge(bb); |
| 350 | dist_b++; |
| 351 | } |
| 352 | |
| 353 | a = pci_upstream_bridge(a); |
| 354 | dist_a++; |
| 355 | } |
| 356 | |
| 357 | return -1; |
| 358 | |
| 359 | check_b_path_acs: |
| 360 | bb = b; |
| 361 | |
| 362 | while (bb) { |
| 363 | if (a == bb) |
| 364 | break; |
| 365 | |
| 366 | if (pci_bridge_has_acs_redir(bb)) { |
| 367 | seq_buf_print_bus_devfn(acs_list, bb); |
| 368 | acs_cnt++; |
| 369 | } |
| 370 | |
| 371 | bb = pci_upstream_bridge(bb); |
| 372 | } |
| 373 | |
| 374 | if (acs_cnt) |
| 375 | return -2; |
| 376 | |
| 377 | return dist_a + dist_b; |
| 378 | } |
| 379 | |
| 380 | static int upstream_bridge_distance_warn(struct pci_dev *provider, |
| 381 | struct pci_dev *client) |
| 382 | { |
| 383 | struct seq_buf acs_list; |
| 384 | int ret; |
| 385 | |
| 386 | seq_buf_init(&acs_list, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE); |
| 387 | if (!acs_list.buffer) |
| 388 | return -ENOMEM; |
| 389 | |
| 390 | ret = upstream_bridge_distance(provider, client, &acs_list); |
| 391 | if (ret == -2) { |
| 392 | pci_warn(client, "cannot be used for peer-to-peer DMA as ACS redirect is set between the client and provider (%s)\n", |
| 393 | pci_name(provider)); |
| 394 | /* Drop final semicolon */ |
| 395 | acs_list.buffer[acs_list.len-1] = 0; |
| 396 | pci_warn(client, "to disable ACS redirect for this path, add the kernel parameter: pci=disable_acs_redir=%s\n", |
| 397 | acs_list.buffer); |
| 398 | |
| 399 | } else if (ret < 0) { |
| 400 | pci_warn(client, "cannot be used for peer-to-peer DMA as the client and provider (%s) do not share an upstream bridge\n", |
| 401 | pci_name(provider)); |
| 402 | } |
| 403 | |
| 404 | kfree(acs_list.buffer); |
| 405 | |
| 406 | return ret; |
| 407 | } |
| 408 | |
| 409 | /** |
| 410 | * pci_p2pdma_distance_many - Determive the cumulative distance between |
| 411 | * a p2pdma provider and the clients in use. |
| 412 | * @provider: p2pdma provider to check against the client list |
| 413 | * @clients: array of devices to check (NULL-terminated) |
| 414 | * @num_clients: number of clients in the array |
| 415 | * @verbose: if true, print warnings for devices when we return -1 |
| 416 | * |
| 417 | * Returns -1 if any of the clients are not compatible (behind the same |
| 418 | * root port as the provider), otherwise returns a positive number where |
Randy Dunlap | fcf9ab3 | 2018-12-01 09:31:34 -0800 | [diff] [blame] | 419 | * a lower number is the preferable choice. (If there's one client |
Logan Gunthorpe | 5291698 | 2018-10-04 15:27:35 -0600 | [diff] [blame] | 420 | * that's the same as the provider it will return 0, which is best choice). |
| 421 | * |
| 422 | * For now, "compatible" means the provider and the clients are all behind |
| 423 | * the same PCI root port. This cuts out cases that may work but is safest |
| 424 | * for the user. Future work can expand this to white-list root complexes that |
| 425 | * can safely forward between each ports. |
| 426 | */ |
| 427 | int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients, |
| 428 | int num_clients, bool verbose) |
| 429 | { |
| 430 | bool not_supported = false; |
| 431 | struct pci_dev *pci_client; |
| 432 | int distance = 0; |
| 433 | int i, ret; |
| 434 | |
| 435 | if (num_clients == 0) |
| 436 | return -1; |
| 437 | |
| 438 | for (i = 0; i < num_clients; i++) { |
| 439 | pci_client = find_parent_pci_dev(clients[i]); |
| 440 | if (!pci_client) { |
| 441 | if (verbose) |
| 442 | dev_warn(clients[i], |
| 443 | "cannot be used for peer-to-peer DMA as it is not a PCI device\n"); |
| 444 | return -1; |
| 445 | } |
| 446 | |
| 447 | if (verbose) |
| 448 | ret = upstream_bridge_distance_warn(provider, |
| 449 | pci_client); |
| 450 | else |
| 451 | ret = upstream_bridge_distance(provider, pci_client, |
| 452 | NULL); |
| 453 | |
| 454 | pci_dev_put(pci_client); |
| 455 | |
| 456 | if (ret < 0) |
| 457 | not_supported = true; |
| 458 | |
| 459 | if (not_supported && !verbose) |
| 460 | break; |
| 461 | |
| 462 | distance += ret; |
| 463 | } |
| 464 | |
| 465 | if (not_supported) |
| 466 | return -1; |
| 467 | |
| 468 | return distance; |
| 469 | } |
| 470 | EXPORT_SYMBOL_GPL(pci_p2pdma_distance_many); |
| 471 | |
| 472 | /** |
| 473 | * pci_has_p2pmem - check if a given PCI device has published any p2pmem |
| 474 | * @pdev: PCI device to check |
| 475 | */ |
| 476 | bool pci_has_p2pmem(struct pci_dev *pdev) |
| 477 | { |
| 478 | return pdev->p2pdma && pdev->p2pdma->p2pmem_published; |
| 479 | } |
| 480 | EXPORT_SYMBOL_GPL(pci_has_p2pmem); |
| 481 | |
| 482 | /** |
| 483 | * pci_p2pmem_find - find a peer-to-peer DMA memory device compatible with |
| 484 | * the specified list of clients and shortest distance (as determined |
| 485 | * by pci_p2pmem_dma()) |
| 486 | * @clients: array of devices to check (NULL-terminated) |
| 487 | * @num_clients: number of client devices in the list |
| 488 | * |
| 489 | * If multiple devices are behind the same switch, the one "closest" to the |
Randy Dunlap | fcf9ab3 | 2018-12-01 09:31:34 -0800 | [diff] [blame] | 490 | * client devices in use will be chosen first. (So if one of the providers is |
Logan Gunthorpe | 5291698 | 2018-10-04 15:27:35 -0600 | [diff] [blame] | 491 | * the same as one of the clients, that provider will be used ahead of any |
| 492 | * other providers that are unrelated). If multiple providers are an equal |
| 493 | * distance away, one will be chosen at random. |
| 494 | * |
| 495 | * Returns a pointer to the PCI device with a reference taken (use pci_dev_put |
| 496 | * to return the reference) or NULL if no compatible device is found. The |
| 497 | * found provider will also be assigned to the client list. |
| 498 | */ |
| 499 | struct pci_dev *pci_p2pmem_find_many(struct device **clients, int num_clients) |
| 500 | { |
| 501 | struct pci_dev *pdev = NULL; |
| 502 | int distance; |
| 503 | int closest_distance = INT_MAX; |
| 504 | struct pci_dev **closest_pdevs; |
| 505 | int dev_cnt = 0; |
| 506 | const int max_devs = PAGE_SIZE / sizeof(*closest_pdevs); |
| 507 | int i; |
| 508 | |
| 509 | closest_pdevs = kmalloc(PAGE_SIZE, GFP_KERNEL); |
| 510 | if (!closest_pdevs) |
| 511 | return NULL; |
| 512 | |
| 513 | while ((pdev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pdev))) { |
| 514 | if (!pci_has_p2pmem(pdev)) |
| 515 | continue; |
| 516 | |
| 517 | distance = pci_p2pdma_distance_many(pdev, clients, |
| 518 | num_clients, false); |
| 519 | if (distance < 0 || distance > closest_distance) |
| 520 | continue; |
| 521 | |
| 522 | if (distance == closest_distance && dev_cnt >= max_devs) |
| 523 | continue; |
| 524 | |
| 525 | if (distance < closest_distance) { |
| 526 | for (i = 0; i < dev_cnt; i++) |
| 527 | pci_dev_put(closest_pdevs[i]); |
| 528 | |
| 529 | dev_cnt = 0; |
| 530 | closest_distance = distance; |
| 531 | } |
| 532 | |
| 533 | closest_pdevs[dev_cnt++] = pci_dev_get(pdev); |
| 534 | } |
| 535 | |
| 536 | if (dev_cnt) |
| 537 | pdev = pci_dev_get(closest_pdevs[prandom_u32_max(dev_cnt)]); |
| 538 | |
| 539 | for (i = 0; i < dev_cnt; i++) |
| 540 | pci_dev_put(closest_pdevs[i]); |
| 541 | |
| 542 | kfree(closest_pdevs); |
| 543 | return pdev; |
| 544 | } |
| 545 | EXPORT_SYMBOL_GPL(pci_p2pmem_find_many); |
| 546 | |
| 547 | /** |
| 548 | * pci_alloc_p2p_mem - allocate peer-to-peer DMA memory |
| 549 | * @pdev: the device to allocate memory from |
| 550 | * @size: number of bytes to allocate |
| 551 | * |
| 552 | * Returns the allocated memory or NULL on error. |
| 553 | */ |
| 554 | void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size) |
| 555 | { |
| 556 | void *ret; |
| 557 | |
| 558 | if (unlikely(!pdev->p2pdma)) |
| 559 | return NULL; |
| 560 | |
| 561 | if (unlikely(!percpu_ref_tryget_live(&pdev->p2pdma->devmap_ref))) |
| 562 | return NULL; |
| 563 | |
| 564 | ret = (void *)gen_pool_alloc(pdev->p2pdma->pool, size); |
| 565 | |
| 566 | if (unlikely(!ret)) |
| 567 | percpu_ref_put(&pdev->p2pdma->devmap_ref); |
| 568 | |
| 569 | return ret; |
| 570 | } |
| 571 | EXPORT_SYMBOL_GPL(pci_alloc_p2pmem); |
| 572 | |
| 573 | /** |
| 574 | * pci_free_p2pmem - free peer-to-peer DMA memory |
| 575 | * @pdev: the device the memory was allocated from |
| 576 | * @addr: address of the memory that was allocated |
Randy Dunlap | fcf9ab3 | 2018-12-01 09:31:34 -0800 | [diff] [blame] | 577 | * @size: number of bytes that were allocated |
Logan Gunthorpe | 5291698 | 2018-10-04 15:27:35 -0600 | [diff] [blame] | 578 | */ |
| 579 | void pci_free_p2pmem(struct pci_dev *pdev, void *addr, size_t size) |
| 580 | { |
| 581 | gen_pool_free(pdev->p2pdma->pool, (uintptr_t)addr, size); |
| 582 | percpu_ref_put(&pdev->p2pdma->devmap_ref); |
| 583 | } |
| 584 | EXPORT_SYMBOL_GPL(pci_free_p2pmem); |
| 585 | |
| 586 | /** |
| 587 | * pci_virt_to_bus - return the PCI bus address for a given virtual |
| 588 | * address obtained with pci_alloc_p2pmem() |
| 589 | * @pdev: the device the memory was allocated from |
| 590 | * @addr: address of the memory that was allocated |
| 591 | */ |
| 592 | pci_bus_addr_t pci_p2pmem_virt_to_bus(struct pci_dev *pdev, void *addr) |
| 593 | { |
| 594 | if (!addr) |
| 595 | return 0; |
| 596 | if (!pdev->p2pdma) |
| 597 | return 0; |
| 598 | |
| 599 | /* |
| 600 | * Note: when we added the memory to the pool we used the PCI |
| 601 | * bus address as the physical address. So gen_pool_virt_to_phys() |
| 602 | * actually returns the bus address despite the misleading name. |
| 603 | */ |
| 604 | return gen_pool_virt_to_phys(pdev->p2pdma->pool, (unsigned long)addr); |
| 605 | } |
| 606 | EXPORT_SYMBOL_GPL(pci_p2pmem_virt_to_bus); |
| 607 | |
| 608 | /** |
| 609 | * pci_p2pmem_alloc_sgl - allocate peer-to-peer DMA memory in a scatterlist |
| 610 | * @pdev: the device to allocate memory from |
| 611 | * @nents: the number of SG entries in the list |
| 612 | * @length: number of bytes to allocate |
| 613 | * |
Randy Dunlap | fcf9ab3 | 2018-12-01 09:31:34 -0800 | [diff] [blame] | 614 | * Return: %NULL on error or &struct scatterlist pointer and @nents on success |
Logan Gunthorpe | 5291698 | 2018-10-04 15:27:35 -0600 | [diff] [blame] | 615 | */ |
| 616 | struct scatterlist *pci_p2pmem_alloc_sgl(struct pci_dev *pdev, |
| 617 | unsigned int *nents, u32 length) |
| 618 | { |
| 619 | struct scatterlist *sg; |
| 620 | void *addr; |
| 621 | |
| 622 | sg = kzalloc(sizeof(*sg), GFP_KERNEL); |
| 623 | if (!sg) |
| 624 | return NULL; |
| 625 | |
| 626 | sg_init_table(sg, 1); |
| 627 | |
| 628 | addr = pci_alloc_p2pmem(pdev, length); |
| 629 | if (!addr) |
| 630 | goto out_free_sg; |
| 631 | |
| 632 | sg_set_buf(sg, addr, length); |
| 633 | *nents = 1; |
| 634 | return sg; |
| 635 | |
| 636 | out_free_sg: |
| 637 | kfree(sg); |
| 638 | return NULL; |
| 639 | } |
| 640 | EXPORT_SYMBOL_GPL(pci_p2pmem_alloc_sgl); |
| 641 | |
| 642 | /** |
| 643 | * pci_p2pmem_free_sgl - free a scatterlist allocated by pci_p2pmem_alloc_sgl() |
| 644 | * @pdev: the device to allocate memory from |
| 645 | * @sgl: the allocated scatterlist |
| 646 | */ |
| 647 | void pci_p2pmem_free_sgl(struct pci_dev *pdev, struct scatterlist *sgl) |
| 648 | { |
| 649 | struct scatterlist *sg; |
| 650 | int count; |
| 651 | |
| 652 | for_each_sg(sgl, sg, INT_MAX, count) { |
| 653 | if (!sg) |
| 654 | break; |
| 655 | |
| 656 | pci_free_p2pmem(pdev, sg_virt(sg), sg->length); |
| 657 | } |
| 658 | kfree(sgl); |
| 659 | } |
| 660 | EXPORT_SYMBOL_GPL(pci_p2pmem_free_sgl); |
| 661 | |
| 662 | /** |
| 663 | * pci_p2pmem_publish - publish the peer-to-peer DMA memory for use by |
| 664 | * other devices with pci_p2pmem_find() |
| 665 | * @pdev: the device with peer-to-peer DMA memory to publish |
| 666 | * @publish: set to true to publish the memory, false to unpublish it |
| 667 | * |
| 668 | * Published memory can be used by other PCI device drivers for |
| 669 | * peer-2-peer DMA operations. Non-published memory is reserved for |
Randy Dunlap | fcf9ab3 | 2018-12-01 09:31:34 -0800 | [diff] [blame] | 670 | * exclusive use of the device driver that registers the peer-to-peer |
Logan Gunthorpe | 5291698 | 2018-10-04 15:27:35 -0600 | [diff] [blame] | 671 | * memory. |
| 672 | */ |
| 673 | void pci_p2pmem_publish(struct pci_dev *pdev, bool publish) |
| 674 | { |
| 675 | if (pdev->p2pdma) |
| 676 | pdev->p2pdma->p2pmem_published = publish; |
| 677 | } |
| 678 | EXPORT_SYMBOL_GPL(pci_p2pmem_publish); |
Logan Gunthorpe | 977196b | 2018-10-04 15:27:37 -0600 | [diff] [blame] | 679 | |
| 680 | /** |
| 681 | * pci_p2pdma_map_sg - map a PCI peer-to-peer scatterlist for DMA |
| 682 | * @dev: device doing the DMA request |
| 683 | * @sg: scatter list to map |
| 684 | * @nents: elements in the scatterlist |
| 685 | * @dir: DMA direction |
| 686 | * |
| 687 | * Scatterlists mapped with this function should not be unmapped in any way. |
| 688 | * |
| 689 | * Returns the number of SG entries mapped or 0 on error. |
| 690 | */ |
| 691 | int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, int nents, |
| 692 | enum dma_data_direction dir) |
| 693 | { |
| 694 | struct dev_pagemap *pgmap; |
| 695 | struct scatterlist *s; |
| 696 | phys_addr_t paddr; |
| 697 | int i; |
| 698 | |
| 699 | /* |
| 700 | * p2pdma mappings are not compatible with devices that use |
| 701 | * dma_virt_ops. If the upper layers do the right thing |
| 702 | * this should never happen because it will be prevented |
| 703 | * by the check in pci_p2pdma_add_client() |
| 704 | */ |
| 705 | if (WARN_ON_ONCE(IS_ENABLED(CONFIG_DMA_VIRT_OPS) && |
| 706 | dev->dma_ops == &dma_virt_ops)) |
| 707 | return 0; |
| 708 | |
| 709 | for_each_sg(sg, s, nents, i) { |
| 710 | pgmap = sg_page(s)->pgmap; |
| 711 | paddr = sg_phys(s); |
| 712 | |
| 713 | s->dma_address = paddr - pgmap->pci_p2pdma_bus_offset; |
| 714 | sg_dma_len(s) = s->length; |
| 715 | } |
| 716 | |
| 717 | return nents; |
| 718 | } |
| 719 | EXPORT_SYMBOL_GPL(pci_p2pdma_map_sg); |
Logan Gunthorpe | 2d7bc01 | 2018-10-04 15:27:38 -0600 | [diff] [blame] | 720 | |
| 721 | /** |
| 722 | * pci_p2pdma_enable_store - parse a configfs/sysfs attribute store |
| 723 | * to enable p2pdma |
| 724 | * @page: contents of the value to be stored |
| 725 | * @p2p_dev: returns the PCI device that was selected to be used |
| 726 | * (if one was specified in the stored value) |
| 727 | * @use_p2pdma: returns whether to enable p2pdma or not |
| 728 | * |
| 729 | * Parses an attribute value to decide whether to enable p2pdma. |
Randy Dunlap | fcf9ab3 | 2018-12-01 09:31:34 -0800 | [diff] [blame] | 730 | * The value can select a PCI device (using its full BDF device |
Logan Gunthorpe | 2d7bc01 | 2018-10-04 15:27:38 -0600 | [diff] [blame] | 731 | * name) or a boolean (in any format strtobool() accepts). A false |
| 732 | * value disables p2pdma, a true value expects the caller |
| 733 | * to automatically find a compatible device and specifying a PCI device |
| 734 | * expects the caller to use the specific provider. |
| 735 | * |
| 736 | * pci_p2pdma_enable_show() should be used as the show operation for |
| 737 | * the attribute. |
| 738 | * |
| 739 | * Returns 0 on success |
| 740 | */ |
| 741 | int pci_p2pdma_enable_store(const char *page, struct pci_dev **p2p_dev, |
| 742 | bool *use_p2pdma) |
| 743 | { |
| 744 | struct device *dev; |
| 745 | |
| 746 | dev = bus_find_device_by_name(&pci_bus_type, NULL, page); |
| 747 | if (dev) { |
| 748 | *use_p2pdma = true; |
| 749 | *p2p_dev = to_pci_dev(dev); |
| 750 | |
| 751 | if (!pci_has_p2pmem(*p2p_dev)) { |
| 752 | pci_err(*p2p_dev, |
| 753 | "PCI device has no peer-to-peer memory: %s\n", |
| 754 | page); |
| 755 | pci_dev_put(*p2p_dev); |
| 756 | return -ENODEV; |
| 757 | } |
| 758 | |
| 759 | return 0; |
| 760 | } else if ((page[0] == '0' || page[0] == '1') && !iscntrl(page[1])) { |
| 761 | /* |
| 762 | * If the user enters a PCI device that doesn't exist |
| 763 | * like "0000:01:00.1", we don't want strtobool to think |
| 764 | * it's a '0' when it's clearly not what the user wanted. |
| 765 | * So we require 0's and 1's to be exactly one character. |
| 766 | */ |
| 767 | } else if (!strtobool(page, use_p2pdma)) { |
| 768 | return 0; |
| 769 | } |
| 770 | |
| 771 | pr_err("No such PCI device: %.*s\n", (int)strcspn(page, "\n"), page); |
| 772 | return -ENODEV; |
| 773 | } |
| 774 | EXPORT_SYMBOL_GPL(pci_p2pdma_enable_store); |
| 775 | |
| 776 | /** |
| 777 | * pci_p2pdma_enable_show - show a configfs/sysfs attribute indicating |
| 778 | * whether p2pdma is enabled |
| 779 | * @page: contents of the stored value |
| 780 | * @p2p_dev: the selected p2p device (NULL if no device is selected) |
Randy Dunlap | fcf9ab3 | 2018-12-01 09:31:34 -0800 | [diff] [blame] | 781 | * @use_p2pdma: whether p2pdma has been enabled |
Logan Gunthorpe | 2d7bc01 | 2018-10-04 15:27:38 -0600 | [diff] [blame] | 782 | * |
| 783 | * Attributes that use pci_p2pdma_enable_store() should use this function |
| 784 | * to show the value of the attribute. |
| 785 | * |
| 786 | * Returns 0 on success |
| 787 | */ |
| 788 | ssize_t pci_p2pdma_enable_show(char *page, struct pci_dev *p2p_dev, |
| 789 | bool use_p2pdma) |
| 790 | { |
| 791 | if (!use_p2pdma) |
| 792 | return sprintf(page, "0\n"); |
| 793 | |
| 794 | if (!p2p_dev) |
| 795 | return sprintf(page, "1\n"); |
| 796 | |
| 797 | return sprintf(page, "%s\n", pci_name(p2p_dev)); |
| 798 | } |
| 799 | EXPORT_SYMBOL_GPL(pci_p2pdma_enable_show); |