Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * This file implements the error recovery as a core part of PCIe error |
| 4 | * reporting. When a PCIe error is delivered, an error message will be |
| 5 | * collected and printed to console, then, an error recovery procedure |
| 6 | * will be executed by following the PCI error recovery rules. |
| 7 | * |
| 8 | * Copyright (C) 2006 Intel Corp. |
| 9 | * Tom Long Nguyen (tom.l.nguyen@intel.com) |
| 10 | * Zhang Yanmin (yanmin.zhang@intel.com) |
| 11 | */ |
| 12 | |
Bjorn Helgaas | 8d077c3 | 2019-12-13 16:46:05 -0600 | [diff] [blame] | 13 | #define dev_fmt(fmt) "AER: " fmt |
| 14 | |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 15 | #include <linux/pci.h> |
| 16 | #include <linux/module.h> |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 17 | #include <linux/kernel.h> |
| 18 | #include <linux/errno.h> |
| 19 | #include <linux/aer.h> |
| 20 | #include "portdrv.h" |
| 21 | #include "../pci.h" |
| 22 | |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 23 | static pci_ers_result_t merge_result(enum pci_ers_result orig, |
| 24 | enum pci_ers_result new) |
| 25 | { |
| 26 | if (new == PCI_ERS_RESULT_NO_AER_DRIVER) |
| 27 | return PCI_ERS_RESULT_NO_AER_DRIVER; |
| 28 | |
| 29 | if (new == PCI_ERS_RESULT_NONE) |
| 30 | return orig; |
| 31 | |
| 32 | switch (orig) { |
| 33 | case PCI_ERS_RESULT_CAN_RECOVER: |
| 34 | case PCI_ERS_RESULT_RECOVERED: |
| 35 | orig = new; |
| 36 | break; |
| 37 | case PCI_ERS_RESULT_DISCONNECT: |
| 38 | if (new == PCI_ERS_RESULT_NEED_RESET) |
| 39 | orig = PCI_ERS_RESULT_NEED_RESET; |
| 40 | break; |
| 41 | default: |
| 42 | break; |
| 43 | } |
| 44 | |
| 45 | return orig; |
| 46 | } |
| 47 | |
Keith Busch | 542aeb9 | 2018-09-20 10:27:14 -0600 | [diff] [blame] | 48 | static int report_error_detected(struct pci_dev *dev, |
Luc Van Oostenryck | 16d79cd | 2020-07-02 18:26:49 +0200 | [diff] [blame] | 49 | pci_channel_state_t state, |
Keith Busch | 542aeb9 | 2018-09-20 10:27:14 -0600 | [diff] [blame] | 50 | enum pci_ers_result *result) |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 51 | { |
| 52 | pci_ers_result_t vote; |
| 53 | const struct pci_error_handlers *err_handler; |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 54 | |
| 55 | device_lock(&dev->dev); |
Keith Busch | a6bd101 | 2018-09-20 10:27:16 -0600 | [diff] [blame] | 56 | if (!pci_dev_set_io_state(dev, state) || |
| 57 | !dev->driver || |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 58 | !dev->driver->err_handler || |
| 59 | !dev->driver->err_handler->error_detected) { |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 60 | /* |
Keith Busch | bfcb79fc | 2018-09-20 10:27:13 -0600 | [diff] [blame] | 61 | * If any device in the subtree does not have an error_detected |
| 62 | * callback, PCI_ERS_RESULT_NO_AER_DRIVER prevents subsequent |
| 63 | * error callbacks of "any" device in the subtree, and will |
| 64 | * exit in the disconnected error state. |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 65 | */ |
Yicong Yang | 01daacf | 2019-12-13 19:44:34 +0800 | [diff] [blame] | 66 | if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) { |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 67 | vote = PCI_ERS_RESULT_NO_AER_DRIVER; |
Bjorn Helgaas | 8d077c3 | 2019-12-13 16:46:05 -0600 | [diff] [blame] | 68 | pci_info(dev, "can't recover (no error_detected callback)\n"); |
Yicong Yang | 01daacf | 2019-12-13 19:44:34 +0800 | [diff] [blame] | 69 | } else { |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 70 | vote = PCI_ERS_RESULT_NONE; |
Yicong Yang | 01daacf | 2019-12-13 19:44:34 +0800 | [diff] [blame] | 71 | } |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 72 | } else { |
| 73 | err_handler = dev->driver->err_handler; |
Keith Busch | 542aeb9 | 2018-09-20 10:27:14 -0600 | [diff] [blame] | 74 | vote = err_handler->error_detected(dev, state); |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 75 | } |
Keith Busch | 7b42d97 | 2018-09-20 10:27:15 -0600 | [diff] [blame] | 76 | pci_uevent_ers(dev, vote); |
Keith Busch | 542aeb9 | 2018-09-20 10:27:14 -0600 | [diff] [blame] | 77 | *result = merge_result(*result, vote); |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 78 | device_unlock(&dev->dev); |
| 79 | return 0; |
| 80 | } |
| 81 | |
Keith Busch | 542aeb9 | 2018-09-20 10:27:14 -0600 | [diff] [blame] | 82 | static int report_frozen_detected(struct pci_dev *dev, void *data) |
| 83 | { |
| 84 | return report_error_detected(dev, pci_channel_io_frozen, data); |
| 85 | } |
| 86 | |
| 87 | static int report_normal_detected(struct pci_dev *dev, void *data) |
| 88 | { |
| 89 | return report_error_detected(dev, pci_channel_io_normal, data); |
| 90 | } |
| 91 | |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 92 | static int report_mmio_enabled(struct pci_dev *dev, void *data) |
| 93 | { |
Keith Busch | 542aeb9 | 2018-09-20 10:27:14 -0600 | [diff] [blame] | 94 | pci_ers_result_t vote, *result = data; |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 95 | const struct pci_error_handlers *err_handler; |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 96 | |
| 97 | device_lock(&dev->dev); |
| 98 | if (!dev->driver || |
| 99 | !dev->driver->err_handler || |
| 100 | !dev->driver->err_handler->mmio_enabled) |
| 101 | goto out; |
| 102 | |
| 103 | err_handler = dev->driver->err_handler; |
| 104 | vote = err_handler->mmio_enabled(dev); |
Keith Busch | 542aeb9 | 2018-09-20 10:27:14 -0600 | [diff] [blame] | 105 | *result = merge_result(*result, vote); |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 106 | out: |
| 107 | device_unlock(&dev->dev); |
| 108 | return 0; |
| 109 | } |
| 110 | |
| 111 | static int report_slot_reset(struct pci_dev *dev, void *data) |
| 112 | { |
Keith Busch | 542aeb9 | 2018-09-20 10:27:14 -0600 | [diff] [blame] | 113 | pci_ers_result_t vote, *result = data; |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 114 | const struct pci_error_handlers *err_handler; |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 115 | |
| 116 | device_lock(&dev->dev); |
| 117 | if (!dev->driver || |
| 118 | !dev->driver->err_handler || |
| 119 | !dev->driver->err_handler->slot_reset) |
| 120 | goto out; |
| 121 | |
| 122 | err_handler = dev->driver->err_handler; |
| 123 | vote = err_handler->slot_reset(dev); |
Keith Busch | 542aeb9 | 2018-09-20 10:27:14 -0600 | [diff] [blame] | 124 | *result = merge_result(*result, vote); |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 125 | out: |
| 126 | device_unlock(&dev->dev); |
| 127 | return 0; |
| 128 | } |
| 129 | |
| 130 | static int report_resume(struct pci_dev *dev, void *data) |
| 131 | { |
| 132 | const struct pci_error_handlers *err_handler; |
| 133 | |
| 134 | device_lock(&dev->dev); |
Keith Busch | a6bd101 | 2018-09-20 10:27:16 -0600 | [diff] [blame] | 135 | if (!pci_dev_set_io_state(dev, pci_channel_io_normal) || |
| 136 | !dev->driver || |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 137 | !dev->driver->err_handler || |
| 138 | !dev->driver->err_handler->resume) |
| 139 | goto out; |
| 140 | |
| 141 | err_handler = dev->driver->err_handler; |
| 142 | err_handler->resume(dev); |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 143 | out: |
Keith Busch | 7b42d97 | 2018-09-20 10:27:15 -0600 | [diff] [blame] | 144 | pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 145 | device_unlock(&dev->dev); |
| 146 | return 0; |
| 147 | } |
| 148 | |
Sean V Kelley | 05e9ae1 | 2020-11-20 16:10:30 -0800 | [diff] [blame] | 149 | /** |
| 150 | * pci_walk_bridge - walk bridges potentially AER affected |
Qiuxu Zhuo | 57908622 | 2020-11-20 16:10:33 -0800 | [diff] [blame] | 151 | * @bridge: bridge which may be a Port, an RCEC, or an RCiEP |
Sean V Kelley | 05e9ae1 | 2020-11-20 16:10:30 -0800 | [diff] [blame] | 152 | * @cb: callback to be called for each device found |
| 153 | * @userdata: arbitrary pointer to be passed to callback |
| 154 | * |
| 155 | * If the device provided is a bridge, walk the subordinate bus, including |
| 156 | * any bridged devices on buses under this bus. Call the provided callback |
| 157 | * on each device found. |
Sean V Kelley | a175102 | 2020-12-02 11:26:29 -0600 | [diff] [blame] | 158 | * |
Qiuxu Zhuo | 57908622 | 2020-11-20 16:10:33 -0800 | [diff] [blame] | 159 | * If the device provided has no subordinate bus, e.g., an RCEC or RCiEP, |
| 160 | * call the callback on the device itself. |
Sean V Kelley | 05e9ae1 | 2020-11-20 16:10:30 -0800 | [diff] [blame] | 161 | */ |
| 162 | static void pci_walk_bridge(struct pci_dev *bridge, |
| 163 | int (*cb)(struct pci_dev *, void *), |
| 164 | void *userdata) |
| 165 | { |
| 166 | if (bridge->subordinate) |
| 167 | pci_walk_bus(bridge->subordinate, cb, userdata); |
Sean V Kelley | a175102 | 2020-12-02 11:26:29 -0600 | [diff] [blame] | 168 | else |
| 169 | cb(bridge, userdata); |
Sean V Kelley | 05e9ae1 | 2020-11-20 16:10:30 -0800 | [diff] [blame] | 170 | } |
| 171 | |
Kuppuswamy Sathyanarayanan | e8e5ff2 | 2020-03-23 17:26:03 -0700 | [diff] [blame] | 172 | pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, |
Sean V Kelley | 8f1bbfb | 2020-11-20 16:10:25 -0800 | [diff] [blame] | 173 | pci_channel_state_t state, |
| 174 | pci_ers_result_t (*reset_subordinates)(struct pci_dev *pdev)) |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 175 | { |
Sean V Kelley | 480ef7c | 2020-11-20 16:10:27 -0800 | [diff] [blame] | 176 | int type = pci_pcie_type(dev); |
Sean V Kelley | 0791721 | 2020-11-20 16:10:28 -0800 | [diff] [blame] | 177 | struct pci_dev *bridge; |
Sean V Kelley | 0791721 | 2020-11-20 16:10:28 -0800 | [diff] [blame] | 178 | pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; |
Sean V Kelley | aa344bc | 2020-11-24 10:55:30 -0600 | [diff] [blame] | 179 | struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 180 | |
Keith Busch | bfcb79fc | 2018-09-20 10:27:13 -0600 | [diff] [blame] | 181 | /* |
Qiuxu Zhuo | 57908622 | 2020-11-20 16:10:33 -0800 | [diff] [blame] | 182 | * If the error was detected by a Root Port, Downstream Port, RCEC, |
| 183 | * or RCiEP, recovery runs on the device itself. For Ports, that |
| 184 | * also includes any subordinate devices. |
Sean V Kelley | a175102 | 2020-12-02 11:26:29 -0600 | [diff] [blame] | 185 | * |
| 186 | * If it was detected by another device (Endpoint, etc), recovery |
| 187 | * runs on the device and anything else under the same Port, i.e., |
| 188 | * everything under "bridge". |
Keith Busch | bfcb79fc | 2018-09-20 10:27:13 -0600 | [diff] [blame] | 189 | */ |
Sean V Kelley | 3d7d8fc | 2020-11-20 16:10:29 -0800 | [diff] [blame] | 190 | if (type == PCI_EXP_TYPE_ROOT_PORT || |
Sean V Kelley | a175102 | 2020-12-02 11:26:29 -0600 | [diff] [blame] | 191 | type == PCI_EXP_TYPE_DOWNSTREAM || |
Qiuxu Zhuo | 57908622 | 2020-11-20 16:10:33 -0800 | [diff] [blame] | 192 | type == PCI_EXP_TYPE_RC_EC || |
| 193 | type == PCI_EXP_TYPE_RC_END) |
Sean V Kelley | 0791721 | 2020-11-20 16:10:28 -0800 | [diff] [blame] | 194 | bridge = dev; |
Sean V Kelley | 3d7d8fc | 2020-11-20 16:10:29 -0800 | [diff] [blame] | 195 | else |
| 196 | bridge = pci_upstream_bridge(dev); |
Keith Busch | bfcb79fc | 2018-09-20 10:27:13 -0600 | [diff] [blame] | 197 | |
Sean V Kelley | 0791721 | 2020-11-20 16:10:28 -0800 | [diff] [blame] | 198 | pci_dbg(bridge, "broadcast error_detected message\n"); |
Kuppuswamy Sathyanarayanan | b5dfbea | 2020-03-27 17:33:24 -0500 | [diff] [blame] | 199 | if (state == pci_channel_io_frozen) { |
Sean V Kelley | 05e9ae1 | 2020-11-20 16:10:30 -0800 | [diff] [blame] | 200 | pci_walk_bridge(bridge, report_frozen_detected, &status); |
Keith Busch | 387c72c | 2021-01-04 15:02:58 -0800 | [diff] [blame] | 201 | if (reset_subordinates(bridge) != PCI_ERS_RESULT_RECOVERED) { |
Sean V Kelley | 0791721 | 2020-11-20 16:10:28 -0800 | [diff] [blame] | 202 | pci_warn(bridge, "subordinate device reset failed\n"); |
Kuppuswamy Sathyanarayanan | b5dfbea | 2020-03-27 17:33:24 -0500 | [diff] [blame] | 203 | goto failed; |
Kuppuswamy Sathyanarayanan | b6cf1a4 | 2020-03-23 17:26:02 -0700 | [diff] [blame] | 204 | } |
Kuppuswamy Sathyanarayanan | b5dfbea | 2020-03-27 17:33:24 -0500 | [diff] [blame] | 205 | } else { |
Sean V Kelley | 05e9ae1 | 2020-11-20 16:10:30 -0800 | [diff] [blame] | 206 | pci_walk_bridge(bridge, report_normal_detected, &status); |
Kuppuswamy Sathyanarayanan | b5dfbea | 2020-03-27 17:33:24 -0500 | [diff] [blame] | 207 | } |
Keith Busch | bdb5ac85 | 2018-09-20 10:27:12 -0600 | [diff] [blame] | 208 | |
Keith Busch | 542aeb9 | 2018-09-20 10:27:14 -0600 | [diff] [blame] | 209 | if (status == PCI_ERS_RESULT_CAN_RECOVER) { |
| 210 | status = PCI_ERS_RESULT_RECOVERED; |
Sean V Kelley | 0791721 | 2020-11-20 16:10:28 -0800 | [diff] [blame] | 211 | pci_dbg(bridge, "broadcast mmio_enabled message\n"); |
Sean V Kelley | 05e9ae1 | 2020-11-20 16:10:30 -0800 | [diff] [blame] | 212 | pci_walk_bridge(bridge, report_mmio_enabled, &status); |
Keith Busch | 542aeb9 | 2018-09-20 10:27:14 -0600 | [diff] [blame] | 213 | } |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 214 | |
| 215 | if (status == PCI_ERS_RESULT_NEED_RESET) { |
| 216 | /* |
| 217 | * TODO: Should call platform-specific |
| 218 | * functions to reset slot before calling |
| 219 | * drivers' slot_reset callbacks? |
| 220 | */ |
Keith Busch | 542aeb9 | 2018-09-20 10:27:14 -0600 | [diff] [blame] | 221 | status = PCI_ERS_RESULT_RECOVERED; |
Sean V Kelley | 0791721 | 2020-11-20 16:10:28 -0800 | [diff] [blame] | 222 | pci_dbg(bridge, "broadcast slot_reset message\n"); |
Sean V Kelley | 05e9ae1 | 2020-11-20 16:10:30 -0800 | [diff] [blame] | 223 | pci_walk_bridge(bridge, report_slot_reset, &status); |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 224 | } |
| 225 | |
| 226 | if (status != PCI_ERS_RESULT_RECOVERED) |
| 227 | goto failed; |
| 228 | |
Sean V Kelley | 0791721 | 2020-11-20 16:10:28 -0800 | [diff] [blame] | 229 | pci_dbg(bridge, "broadcast resume message\n"); |
Sean V Kelley | 05e9ae1 | 2020-11-20 16:10:30 -0800 | [diff] [blame] | 230 | pci_walk_bridge(bridge, report_resume, &status); |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 231 | |
Sean V Kelley | aa344bc | 2020-11-24 10:55:30 -0600 | [diff] [blame] | 232 | /* |
Keith Busch | 7d7cbea | 2021-01-04 15:02:56 -0800 | [diff] [blame] | 233 | * If we have native control of AER, clear error status in the device |
| 234 | * that detected the error. If the platform retained control of AER, |
| 235 | * it is responsible for clearing this status. In that case, the |
| 236 | * signaling device may not even be visible to the OS. |
Sean V Kelley | aa344bc | 2020-11-24 10:55:30 -0600 | [diff] [blame] | 237 | */ |
| 238 | if (host->native_aer || pcie_ports_native) { |
Keith Busch | 7d7cbea | 2021-01-04 15:02:56 -0800 | [diff] [blame] | 239 | pcie_clear_device_status(dev); |
| 240 | pci_aer_clear_nonfatal_status(dev); |
Sean V Kelley | aa344bc | 2020-11-24 10:55:30 -0600 | [diff] [blame] | 241 | } |
Sean V Kelley | 0791721 | 2020-11-20 16:10:28 -0800 | [diff] [blame] | 242 | pci_info(bridge, "device recovery successful\n"); |
Kuppuswamy Sathyanarayanan | e8e5ff2 | 2020-03-23 17:26:03 -0700 | [diff] [blame] | 243 | return status; |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 244 | |
| 245 | failed: |
Sean V Kelley | 0791721 | 2020-11-20 16:10:28 -0800 | [diff] [blame] | 246 | pci_uevent_ers(bridge, PCI_ERS_RESULT_DISCONNECT); |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 247 | |
| 248 | /* TODO: Should kernel panic here? */ |
Sean V Kelley | 0791721 | 2020-11-20 16:10:28 -0800 | [diff] [blame] | 249 | pci_info(bridge, "device recovery failed\n"); |
Kuppuswamy Sathyanarayanan | e8e5ff2 | 2020-03-23 17:26:03 -0700 | [diff] [blame] | 250 | |
| 251 | return status; |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 252 | } |