blob: 644f3f725ef058d187604a1ce3006f5cf8927947 [file] [log] [blame]
Oza Pawandeep2e28bc82018-05-17 16:44:15 -05001// SPDX-License-Identifier: GPL-2.0
2/*
3 * This file implements the error recovery as a core part of PCIe error
4 * reporting. When a PCIe error is delivered, an error message will be
5 * collected and printed to console, then, an error recovery procedure
6 * will be executed by following the PCI error recovery rules.
7 *
8 * Copyright (C) 2006 Intel Corp.
9 * Tom Long Nguyen (tom.l.nguyen@intel.com)
10 * Zhang Yanmin (yanmin.zhang@intel.com)
11 */
12
13#include <linux/pci.h>
14#include <linux/module.h>
15#include <linux/pci.h>
16#include <linux/kernel.h>
17#include <linux/errno.h>
18#include <linux/aer.h>
19#include "portdrv.h"
20#include "../pci.h"
21
22struct aer_broadcast_data {
23 enum pci_channel_state state;
24 enum pci_ers_result result;
25};
26
27static pci_ers_result_t merge_result(enum pci_ers_result orig,
28 enum pci_ers_result new)
29{
30 if (new == PCI_ERS_RESULT_NO_AER_DRIVER)
31 return PCI_ERS_RESULT_NO_AER_DRIVER;
32
33 if (new == PCI_ERS_RESULT_NONE)
34 return orig;
35
36 switch (orig) {
37 case PCI_ERS_RESULT_CAN_RECOVER:
38 case PCI_ERS_RESULT_RECOVERED:
39 orig = new;
40 break;
41 case PCI_ERS_RESULT_DISCONNECT:
42 if (new == PCI_ERS_RESULT_NEED_RESET)
43 orig = PCI_ERS_RESULT_NEED_RESET;
44 break;
45 default:
46 break;
47 }
48
49 return orig;
50}
51
52static int report_error_detected(struct pci_dev *dev, void *data)
53{
54 pci_ers_result_t vote;
55 const struct pci_error_handlers *err_handler;
56 struct aer_broadcast_data *result_data;
57
58 result_data = (struct aer_broadcast_data *) data;
59
60 device_lock(&dev->dev);
61 dev->error_state = result_data->state;
62
63 if (!dev->driver ||
64 !dev->driver->err_handler ||
65 !dev->driver->err_handler->error_detected) {
66 if (result_data->state == pci_channel_io_frozen &&
67 dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
68 /*
69 * In case of fatal recovery, if one of down-
70 * stream device has no driver. We might be
71 * unable to recover because a later insmod
72 * of a driver for this device is unaware of
73 * its hw state.
74 */
75 pci_printk(KERN_DEBUG, dev, "device has %s\n",
76 dev->driver ?
77 "no AER-aware driver" : "no driver");
78 }
79
80 /*
81 * If there's any device in the subtree that does not
82 * have an error_detected callback, returning
83 * PCI_ERS_RESULT_NO_AER_DRIVER prevents calling of
84 * the subsequent mmio_enabled/slot_reset/resume
85 * callbacks of "any" device in the subtree. All the
86 * devices in the subtree are left in the error state
87 * without recovery.
88 */
89
90 if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
91 vote = PCI_ERS_RESULT_NO_AER_DRIVER;
92 else
93 vote = PCI_ERS_RESULT_NONE;
94 } else {
95 err_handler = dev->driver->err_handler;
96 vote = err_handler->error_detected(dev, result_data->state);
97 pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
98 }
99
100 result_data->result = merge_result(result_data->result, vote);
101 device_unlock(&dev->dev);
102 return 0;
103}
104
105static int report_mmio_enabled(struct pci_dev *dev, void *data)
106{
107 pci_ers_result_t vote;
108 const struct pci_error_handlers *err_handler;
109 struct aer_broadcast_data *result_data;
110
111 result_data = (struct aer_broadcast_data *) data;
112
113 device_lock(&dev->dev);
114 if (!dev->driver ||
115 !dev->driver->err_handler ||
116 !dev->driver->err_handler->mmio_enabled)
117 goto out;
118
119 err_handler = dev->driver->err_handler;
120 vote = err_handler->mmio_enabled(dev);
121 result_data->result = merge_result(result_data->result, vote);
122out:
123 device_unlock(&dev->dev);
124 return 0;
125}
126
127static int report_slot_reset(struct pci_dev *dev, void *data)
128{
129 pci_ers_result_t vote;
130 const struct pci_error_handlers *err_handler;
131 struct aer_broadcast_data *result_data;
132
133 result_data = (struct aer_broadcast_data *) data;
134
135 device_lock(&dev->dev);
136 if (!dev->driver ||
137 !dev->driver->err_handler ||
138 !dev->driver->err_handler->slot_reset)
139 goto out;
140
141 err_handler = dev->driver->err_handler;
142 vote = err_handler->slot_reset(dev);
143 result_data->result = merge_result(result_data->result, vote);
144out:
145 device_unlock(&dev->dev);
146 return 0;
147}
148
149static int report_resume(struct pci_dev *dev, void *data)
150{
151 const struct pci_error_handlers *err_handler;
152
153 device_lock(&dev->dev);
154 dev->error_state = pci_channel_io_normal;
155
156 if (!dev->driver ||
157 !dev->driver->err_handler ||
158 !dev->driver->err_handler->resume)
159 goto out;
160
161 err_handler = dev->driver->err_handler;
162 err_handler->resume(dev);
163 pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
164out:
165 device_unlock(&dev->dev);
166 return 0;
167}
168
169/**
170 * default_reset_link - default reset function
171 * @dev: pointer to pci_dev data structure
172 *
173 * Invoked when performing link reset on a Downstream Port or a
174 * Root Port with no aer driver.
175 */
176static pci_ers_result_t default_reset_link(struct pci_dev *dev)
177{
Sinan Kaya18426232018-07-19 18:04:09 -0500178 int rc;
179
Keith Buschc4eed622018-09-20 10:27:11 -0600180 rc = pci_bus_error_reset(dev);
Oza Pawandeep2e28bc82018-05-17 16:44:15 -0500181 pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n");
Sinan Kaya18426232018-07-19 18:04:09 -0500182 return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
Oza Pawandeep2e28bc82018-05-17 16:44:15 -0500183}
184
Oza Pawandeep0b914392018-05-17 16:44:19 -0500185static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service)
Oza Pawandeep2e28bc82018-05-17 16:44:15 -0500186{
187 struct pci_dev *udev;
188 pci_ers_result_t status;
189 struct pcie_port_service_driver *driver = NULL;
190
191 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
192 /* Reset this port for all subordinates */
193 udev = dev;
194 } else {
195 /* Reset the upstream component (likely downstream port) */
196 udev = dev->bus->self;
197 }
198
Oza Pawandeep2e28bc82018-05-17 16:44:15 -0500199 /* Use the aer driver of the component firstly */
Oza Pawandeep0b914392018-05-17 16:44:19 -0500200 driver = pcie_port_find_service(udev, service);
Oza Pawandeep2e28bc82018-05-17 16:44:15 -0500201
202 if (driver && driver->reset_link) {
203 status = driver->reset_link(udev);
204 } else if (udev->has_secondary_link) {
205 status = default_reset_link(udev);
206 } else {
207 pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream device %s\n",
208 pci_name(udev));
209 return PCI_ERS_RESULT_DISCONNECT;
210 }
211
212 if (status != PCI_ERS_RESULT_RECOVERED) {
213 pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n",
214 pci_name(udev));
215 return PCI_ERS_RESULT_DISCONNECT;
216 }
217
218 return status;
219}
220
221/**
222 * broadcast_error_message - handle message broadcast to downstream drivers
223 * @dev: pointer to from where in a hierarchy message is broadcasted down
224 * @state: error state
225 * @error_mesg: message to print
226 * @cb: callback to be broadcasted
227 *
228 * Invoked during error recovery process. Once being invoked, the content
229 * of error severity will be broadcasted to all downstream drivers in a
230 * hierarchy in question.
231 */
232static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
233 enum pci_channel_state state,
234 char *error_mesg,
235 int (*cb)(struct pci_dev *, void *))
236{
237 struct aer_broadcast_data result_data;
238
239 pci_printk(KERN_DEBUG, dev, "broadcast %s message\n", error_mesg);
240 result_data.state = state;
241 if (cb == report_error_detected)
242 result_data.result = PCI_ERS_RESULT_CAN_RECOVER;
243 else
244 result_data.result = PCI_ERS_RESULT_RECOVERED;
245
246 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
247 /*
248 * If the error is reported by a bridge, we think this error
249 * is related to the downstream link of the bridge, so we
250 * do error recovery on all subordinates of the bridge instead
251 * of the bridge and clear the error status of the bridge.
252 */
253 if (cb == report_error_detected)
254 dev->error_state = state;
255 pci_walk_bus(dev->subordinate, cb, &result_data);
256 if (cb == report_resume) {
Oza Pawandeepec752f52018-07-19 17:58:09 -0500257 pci_aer_clear_device_status(dev);
Oza Pawandeep2e28bc82018-05-17 16:44:15 -0500258 pci_cleanup_aer_uncorrect_error_status(dev);
259 dev->error_state = pci_channel_io_normal;
260 }
261 } else {
262 /*
263 * If the error is reported by an end point, we think this
264 * error is related to the upstream link of the end point.
Oza Pawandeep43ec03a2018-07-19 17:58:07 -0500265 * The error is non fatal so the bus is ok; just invoke
266 * the callback for the function that logged the error.
Oza Pawandeep2e28bc82018-05-17 16:44:15 -0500267 */
Oza Pawandeep43ec03a2018-07-19 17:58:07 -0500268 cb(dev, &result_data);
Oza Pawandeep2e28bc82018-05-17 16:44:15 -0500269 }
270
271 return result_data.result;
272}
273
Keith Buschbdb5ac852018-09-20 10:27:12 -0600274void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state,
275 u32 service)
Oza Pawandeep2e28bc82018-05-17 16:44:15 -0500276{
277 pci_ers_result_t status;
Oza Pawandeep2e28bc82018-05-17 16:44:15 -0500278
279 status = broadcast_error_message(dev,
280 state,
281 "error_detected",
282 report_error_detected);
283
Keith Buschbdb5ac852018-09-20 10:27:12 -0600284 if (state == pci_channel_io_frozen &&
285 reset_link(dev, service) != PCI_ERS_RESULT_RECOVERED)
286 goto failed;
287
Oza Pawandeep2e28bc82018-05-17 16:44:15 -0500288 if (status == PCI_ERS_RESULT_CAN_RECOVER)
289 status = broadcast_error_message(dev,
290 state,
291 "mmio_enabled",
292 report_mmio_enabled);
293
294 if (status == PCI_ERS_RESULT_NEED_RESET) {
295 /*
296 * TODO: Should call platform-specific
297 * functions to reset slot before calling
298 * drivers' slot_reset callbacks?
299 */
300 status = broadcast_error_message(dev,
301 state,
302 "slot_reset",
303 report_slot_reset);
304 }
305
306 if (status != PCI_ERS_RESULT_RECOVERED)
307 goto failed;
308
309 broadcast_error_message(dev,
310 state,
311 "resume",
312 report_resume);
313
314 pci_info(dev, "AER: Device recovery successful\n");
315 return;
316
317failed:
318 pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
319
320 /* TODO: Should kernel panic here? */
321 pci_info(dev, "AER: Device recovery failed\n");
322}