Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * This file implements the error recovery as a core part of PCIe error |
| 4 | * reporting. When a PCIe error is delivered, an error message will be |
| 5 | * collected and printed to console, then, an error recovery procedure |
| 6 | * will be executed by following the PCI error recovery rules. |
| 7 | * |
| 8 | * Copyright (C) 2006 Intel Corp. |
| 9 | * Tom Long Nguyen (tom.l.nguyen@intel.com) |
| 10 | * Zhang Yanmin (yanmin.zhang@intel.com) |
| 11 | */ |
| 12 | |
| 13 | #include <linux/pci.h> |
| 14 | #include <linux/module.h> |
| 15 | #include <linux/pci.h> |
| 16 | #include <linux/kernel.h> |
| 17 | #include <linux/errno.h> |
| 18 | #include <linux/aer.h> |
| 19 | #include "portdrv.h" |
| 20 | #include "../pci.h" |
| 21 | |
| 22 | struct aer_broadcast_data { |
| 23 | enum pci_channel_state state; |
| 24 | enum pci_ers_result result; |
| 25 | }; |
| 26 | |
| 27 | static pci_ers_result_t merge_result(enum pci_ers_result orig, |
| 28 | enum pci_ers_result new) |
| 29 | { |
| 30 | if (new == PCI_ERS_RESULT_NO_AER_DRIVER) |
| 31 | return PCI_ERS_RESULT_NO_AER_DRIVER; |
| 32 | |
| 33 | if (new == PCI_ERS_RESULT_NONE) |
| 34 | return orig; |
| 35 | |
| 36 | switch (orig) { |
| 37 | case PCI_ERS_RESULT_CAN_RECOVER: |
| 38 | case PCI_ERS_RESULT_RECOVERED: |
| 39 | orig = new; |
| 40 | break; |
| 41 | case PCI_ERS_RESULT_DISCONNECT: |
| 42 | if (new == PCI_ERS_RESULT_NEED_RESET) |
| 43 | orig = PCI_ERS_RESULT_NEED_RESET; |
| 44 | break; |
| 45 | default: |
| 46 | break; |
| 47 | } |
| 48 | |
| 49 | return orig; |
| 50 | } |
| 51 | |
| 52 | static int report_error_detected(struct pci_dev *dev, void *data) |
| 53 | { |
| 54 | pci_ers_result_t vote; |
| 55 | const struct pci_error_handlers *err_handler; |
| 56 | struct aer_broadcast_data *result_data; |
| 57 | |
| 58 | result_data = (struct aer_broadcast_data *) data; |
| 59 | |
| 60 | device_lock(&dev->dev); |
| 61 | dev->error_state = result_data->state; |
| 62 | |
| 63 | if (!dev->driver || |
| 64 | !dev->driver->err_handler || |
| 65 | !dev->driver->err_handler->error_detected) { |
| 66 | if (result_data->state == pci_channel_io_frozen && |
| 67 | dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) { |
| 68 | /* |
| 69 | * In case of fatal recovery, if one of down- |
| 70 | * stream device has no driver. We might be |
| 71 | * unable to recover because a later insmod |
| 72 | * of a driver for this device is unaware of |
| 73 | * its hw state. |
| 74 | */ |
| 75 | pci_printk(KERN_DEBUG, dev, "device has %s\n", |
| 76 | dev->driver ? |
| 77 | "no AER-aware driver" : "no driver"); |
| 78 | } |
| 79 | |
| 80 | /* |
| 81 | * If there's any device in the subtree that does not |
| 82 | * have an error_detected callback, returning |
| 83 | * PCI_ERS_RESULT_NO_AER_DRIVER prevents calling of |
| 84 | * the subsequent mmio_enabled/slot_reset/resume |
| 85 | * callbacks of "any" device in the subtree. All the |
| 86 | * devices in the subtree are left in the error state |
| 87 | * without recovery. |
| 88 | */ |
| 89 | |
| 90 | if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) |
| 91 | vote = PCI_ERS_RESULT_NO_AER_DRIVER; |
| 92 | else |
| 93 | vote = PCI_ERS_RESULT_NONE; |
| 94 | } else { |
| 95 | err_handler = dev->driver->err_handler; |
| 96 | vote = err_handler->error_detected(dev, result_data->state); |
| 97 | pci_uevent_ers(dev, PCI_ERS_RESULT_NONE); |
| 98 | } |
| 99 | |
| 100 | result_data->result = merge_result(result_data->result, vote); |
| 101 | device_unlock(&dev->dev); |
| 102 | return 0; |
| 103 | } |
| 104 | |
| 105 | static int report_mmio_enabled(struct pci_dev *dev, void *data) |
| 106 | { |
| 107 | pci_ers_result_t vote; |
| 108 | const struct pci_error_handlers *err_handler; |
| 109 | struct aer_broadcast_data *result_data; |
| 110 | |
| 111 | result_data = (struct aer_broadcast_data *) data; |
| 112 | |
| 113 | device_lock(&dev->dev); |
| 114 | if (!dev->driver || |
| 115 | !dev->driver->err_handler || |
| 116 | !dev->driver->err_handler->mmio_enabled) |
| 117 | goto out; |
| 118 | |
| 119 | err_handler = dev->driver->err_handler; |
| 120 | vote = err_handler->mmio_enabled(dev); |
| 121 | result_data->result = merge_result(result_data->result, vote); |
| 122 | out: |
| 123 | device_unlock(&dev->dev); |
| 124 | return 0; |
| 125 | } |
| 126 | |
| 127 | static int report_slot_reset(struct pci_dev *dev, void *data) |
| 128 | { |
| 129 | pci_ers_result_t vote; |
| 130 | const struct pci_error_handlers *err_handler; |
| 131 | struct aer_broadcast_data *result_data; |
| 132 | |
| 133 | result_data = (struct aer_broadcast_data *) data; |
| 134 | |
| 135 | device_lock(&dev->dev); |
| 136 | if (!dev->driver || |
| 137 | !dev->driver->err_handler || |
| 138 | !dev->driver->err_handler->slot_reset) |
| 139 | goto out; |
| 140 | |
| 141 | err_handler = dev->driver->err_handler; |
| 142 | vote = err_handler->slot_reset(dev); |
| 143 | result_data->result = merge_result(result_data->result, vote); |
| 144 | out: |
| 145 | device_unlock(&dev->dev); |
| 146 | return 0; |
| 147 | } |
| 148 | |
| 149 | static int report_resume(struct pci_dev *dev, void *data) |
| 150 | { |
| 151 | const struct pci_error_handlers *err_handler; |
| 152 | |
| 153 | device_lock(&dev->dev); |
| 154 | dev->error_state = pci_channel_io_normal; |
| 155 | |
| 156 | if (!dev->driver || |
| 157 | !dev->driver->err_handler || |
| 158 | !dev->driver->err_handler->resume) |
| 159 | goto out; |
| 160 | |
| 161 | err_handler = dev->driver->err_handler; |
| 162 | err_handler->resume(dev); |
| 163 | pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); |
| 164 | out: |
| 165 | device_unlock(&dev->dev); |
| 166 | return 0; |
| 167 | } |
| 168 | |
| 169 | /** |
| 170 | * default_reset_link - default reset function |
| 171 | * @dev: pointer to pci_dev data structure |
| 172 | * |
| 173 | * Invoked when performing link reset on a Downstream Port or a |
| 174 | * Root Port with no aer driver. |
| 175 | */ |
| 176 | static pci_ers_result_t default_reset_link(struct pci_dev *dev) |
| 177 | { |
Sinan Kaya | 1842623 | 2018-07-19 18:04:09 -0500 | [diff] [blame] | 178 | int rc; |
| 179 | |
Keith Busch | c4eed62 | 2018-09-20 10:27:11 -0600 | [diff] [blame] | 180 | rc = pci_bus_error_reset(dev); |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 181 | pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n"); |
Sinan Kaya | 1842623 | 2018-07-19 18:04:09 -0500 | [diff] [blame] | 182 | return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 183 | } |
| 184 | |
Oza Pawandeep | 0b91439 | 2018-05-17 16:44:19 -0500 | [diff] [blame] | 185 | static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service) |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 186 | { |
| 187 | struct pci_dev *udev; |
| 188 | pci_ers_result_t status; |
| 189 | struct pcie_port_service_driver *driver = NULL; |
| 190 | |
| 191 | if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { |
| 192 | /* Reset this port for all subordinates */ |
| 193 | udev = dev; |
| 194 | } else { |
| 195 | /* Reset the upstream component (likely downstream port) */ |
| 196 | udev = dev->bus->self; |
| 197 | } |
| 198 | |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 199 | /* Use the aer driver of the component firstly */ |
Oza Pawandeep | 0b91439 | 2018-05-17 16:44:19 -0500 | [diff] [blame] | 200 | driver = pcie_port_find_service(udev, service); |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 201 | |
| 202 | if (driver && driver->reset_link) { |
| 203 | status = driver->reset_link(udev); |
| 204 | } else if (udev->has_secondary_link) { |
| 205 | status = default_reset_link(udev); |
| 206 | } else { |
| 207 | pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream device %s\n", |
| 208 | pci_name(udev)); |
| 209 | return PCI_ERS_RESULT_DISCONNECT; |
| 210 | } |
| 211 | |
| 212 | if (status != PCI_ERS_RESULT_RECOVERED) { |
| 213 | pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n", |
| 214 | pci_name(udev)); |
| 215 | return PCI_ERS_RESULT_DISCONNECT; |
| 216 | } |
| 217 | |
| 218 | return status; |
| 219 | } |
| 220 | |
| 221 | /** |
| 222 | * broadcast_error_message - handle message broadcast to downstream drivers |
| 223 | * @dev: pointer to from where in a hierarchy message is broadcasted down |
| 224 | * @state: error state |
| 225 | * @error_mesg: message to print |
| 226 | * @cb: callback to be broadcasted |
| 227 | * |
| 228 | * Invoked during error recovery process. Once being invoked, the content |
| 229 | * of error severity will be broadcasted to all downstream drivers in a |
| 230 | * hierarchy in question. |
| 231 | */ |
| 232 | static pci_ers_result_t broadcast_error_message(struct pci_dev *dev, |
| 233 | enum pci_channel_state state, |
| 234 | char *error_mesg, |
| 235 | int (*cb)(struct pci_dev *, void *)) |
| 236 | { |
| 237 | struct aer_broadcast_data result_data; |
| 238 | |
| 239 | pci_printk(KERN_DEBUG, dev, "broadcast %s message\n", error_mesg); |
| 240 | result_data.state = state; |
| 241 | if (cb == report_error_detected) |
| 242 | result_data.result = PCI_ERS_RESULT_CAN_RECOVER; |
| 243 | else |
| 244 | result_data.result = PCI_ERS_RESULT_RECOVERED; |
| 245 | |
| 246 | if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { |
| 247 | /* |
| 248 | * If the error is reported by a bridge, we think this error |
| 249 | * is related to the downstream link of the bridge, so we |
| 250 | * do error recovery on all subordinates of the bridge instead |
| 251 | * of the bridge and clear the error status of the bridge. |
| 252 | */ |
| 253 | if (cb == report_error_detected) |
| 254 | dev->error_state = state; |
| 255 | pci_walk_bus(dev->subordinate, cb, &result_data); |
| 256 | if (cb == report_resume) { |
Oza Pawandeep | ec752f5 | 2018-07-19 17:58:09 -0500 | [diff] [blame] | 257 | pci_aer_clear_device_status(dev); |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 258 | pci_cleanup_aer_uncorrect_error_status(dev); |
| 259 | dev->error_state = pci_channel_io_normal; |
| 260 | } |
| 261 | } else { |
| 262 | /* |
| 263 | * If the error is reported by an end point, we think this |
| 264 | * error is related to the upstream link of the end point. |
Oza Pawandeep | 43ec03a | 2018-07-19 17:58:07 -0500 | [diff] [blame] | 265 | * The error is non fatal so the bus is ok; just invoke |
| 266 | * the callback for the function that logged the error. |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 267 | */ |
Oza Pawandeep | 43ec03a | 2018-07-19 17:58:07 -0500 | [diff] [blame] | 268 | cb(dev, &result_data); |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 269 | } |
| 270 | |
| 271 | return result_data.result; |
| 272 | } |
| 273 | |
Keith Busch | bdb5ac85 | 2018-09-20 10:27:12 -0600 | [diff] [blame^] | 274 | void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, |
| 275 | u32 service) |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 276 | { |
| 277 | pci_ers_result_t status; |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 278 | |
| 279 | status = broadcast_error_message(dev, |
| 280 | state, |
| 281 | "error_detected", |
| 282 | report_error_detected); |
| 283 | |
Keith Busch | bdb5ac85 | 2018-09-20 10:27:12 -0600 | [diff] [blame^] | 284 | if (state == pci_channel_io_frozen && |
| 285 | reset_link(dev, service) != PCI_ERS_RESULT_RECOVERED) |
| 286 | goto failed; |
| 287 | |
Oza Pawandeep | 2e28bc8 | 2018-05-17 16:44:15 -0500 | [diff] [blame] | 288 | if (status == PCI_ERS_RESULT_CAN_RECOVER) |
| 289 | status = broadcast_error_message(dev, |
| 290 | state, |
| 291 | "mmio_enabled", |
| 292 | report_mmio_enabled); |
| 293 | |
| 294 | if (status == PCI_ERS_RESULT_NEED_RESET) { |
| 295 | /* |
| 296 | * TODO: Should call platform-specific |
| 297 | * functions to reset slot before calling |
| 298 | * drivers' slot_reset callbacks? |
| 299 | */ |
| 300 | status = broadcast_error_message(dev, |
| 301 | state, |
| 302 | "slot_reset", |
| 303 | report_slot_reset); |
| 304 | } |
| 305 | |
| 306 | if (status != PCI_ERS_RESULT_RECOVERED) |
| 307 | goto failed; |
| 308 | |
| 309 | broadcast_error_message(dev, |
| 310 | state, |
| 311 | "resume", |
| 312 | report_resume); |
| 313 | |
| 314 | pci_info(dev, "AER: Device recovery successful\n"); |
| 315 | return; |
| 316 | |
| 317 | failed: |
| 318 | pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); |
| 319 | |
| 320 | /* TODO: Should kernel panic here? */ |
| 321 | pci_info(dev, "AER: Device recovery failed\n"); |
| 322 | } |