blob: e4a1032ba0b58a4e93dc055e94dce2181c351277 [file] [log] [blame]
Thomas Gleixner1ccea772019-05-19 15:51:43 +02001// SPDX-License-Identifier: GPL-2.0-or-later
Loc Ho0d442932015-05-22 17:32:59 -06002/*
3 * APM X-Gene SoC EDAC (error detection and correction)
4 *
5 * Copyright (c) 2015, Applied Micro Circuits Corporation
6 * Author: Feng Kan <fkan@apm.com>
7 * Loc Ho <lho@apm.com>
Loc Ho0d442932015-05-22 17:32:59 -06008 */
9
10#include <linux/ctype.h>
11#include <linux/edac.h>
12#include <linux/interrupt.h>
13#include <linux/mfd/syscon.h>
14#include <linux/module.h>
15#include <linux/of.h>
16#include <linux/of_address.h>
17#include <linux/regmap.h>
18
Borislav Petkov09bd1b42015-09-22 13:13:46 +020019#include "edac_module.h"
Loc Ho0d442932015-05-22 17:32:59 -060020
21#define EDAC_MOD_STR "xgene_edac"
22
23/* Global error configuration status registers (CSR) */
24#define PCPHPERRINTSTS 0x0000
25#define PCPHPERRINTMSK 0x0004
26#define MCU_CTL_ERR_MASK BIT(12)
27#define IOB_PA_ERR_MASK BIT(11)
28#define IOB_BA_ERR_MASK BIT(10)
29#define IOB_XGIC_ERR_MASK BIT(9)
30#define IOB_RB_ERR_MASK BIT(8)
31#define L3C_UNCORR_ERR_MASK BIT(5)
32#define MCU_UNCORR_ERR_MASK BIT(4)
33#define PMD3_MERR_MASK BIT(3)
34#define PMD2_MERR_MASK BIT(2)
35#define PMD1_MERR_MASK BIT(1)
36#define PMD0_MERR_MASK BIT(0)
37#define PCPLPERRINTSTS 0x0008
38#define PCPLPERRINTMSK 0x000C
39#define CSW_SWITCH_TRACE_ERR_MASK BIT(2)
40#define L3C_CORR_ERR_MASK BIT(1)
41#define MCU_CORR_ERR_MASK BIT(0)
42#define MEMERRINTSTS 0x0010
43#define MEMERRINTMSK 0x0014
44
45struct xgene_edac {
46 struct device *dev;
47 struct regmap *csw_map;
48 struct regmap *mcba_map;
49 struct regmap *mcbb_map;
50 struct regmap *efuse_map;
Loc Ho4d67e3c2016-01-22 13:47:04 -070051 struct regmap *rb_map;
Loc Ho0d442932015-05-22 17:32:59 -060052 void __iomem *pcp_csr;
53 spinlock_t lock;
Loc Ho93474732015-09-23 17:40:59 -070054 struct dentry *dfs;
Loc Ho0d442932015-05-22 17:32:59 -060055
56 struct list_head mcus;
57 struct list_head pmds;
Loc Ho93474732015-09-23 17:40:59 -070058 struct list_head l3s;
Loc Hof864b792015-09-23 17:41:00 -070059 struct list_head socs;
Loc Ho0d442932015-05-22 17:32:59 -060060
61 struct mutex mc_lock;
62 int mc_active_mask;
63 int mc_registered_mask;
64};
65
66static void xgene_edac_pcp_rd(struct xgene_edac *edac, u32 reg, u32 *val)
67{
68 *val = readl(edac->pcp_csr + reg);
69}
70
71static void xgene_edac_pcp_clrbits(struct xgene_edac *edac, u32 reg,
72 u32 bits_mask)
73{
74 u32 val;
75
76 spin_lock(&edac->lock);
77 val = readl(edac->pcp_csr + reg);
78 val &= ~bits_mask;
79 writel(val, edac->pcp_csr + reg);
80 spin_unlock(&edac->lock);
81}
82
83static void xgene_edac_pcp_setbits(struct xgene_edac *edac, u32 reg,
84 u32 bits_mask)
85{
86 u32 val;
87
88 spin_lock(&edac->lock);
89 val = readl(edac->pcp_csr + reg);
90 val |= bits_mask;
91 writel(val, edac->pcp_csr + reg);
92 spin_unlock(&edac->lock);
93}
94
95/* Memory controller error CSR */
96#define MCU_MAX_RANK 8
97#define MCU_RANK_STRIDE 0x40
98
99#define MCUGECR 0x0110
100#define MCU_GECR_DEMANDUCINTREN_MASK BIT(0)
101#define MCU_GECR_BACKUCINTREN_MASK BIT(1)
102#define MCU_GECR_CINTREN_MASK BIT(2)
103#define MUC_GECR_MCUADDRERREN_MASK BIT(9)
104#define MCUGESR 0x0114
105#define MCU_GESR_ADDRNOMATCH_ERR_MASK BIT(7)
106#define MCU_GESR_ADDRMULTIMATCH_ERR_MASK BIT(6)
107#define MCU_GESR_PHYP_ERR_MASK BIT(3)
108#define MCUESRR0 0x0314
109#define MCU_ESRR_MULTUCERR_MASK BIT(3)
110#define MCU_ESRR_BACKUCERR_MASK BIT(2)
111#define MCU_ESRR_DEMANDUCERR_MASK BIT(1)
112#define MCU_ESRR_CERR_MASK BIT(0)
113#define MCUESRRA0 0x0318
114#define MCUEBLRR0 0x031c
115#define MCU_EBLRR_ERRBANK_RD(src) (((src) & 0x00000007) >> 0)
116#define MCUERCRR0 0x0320
117#define MCU_ERCRR_ERRROW_RD(src) (((src) & 0xFFFF0000) >> 16)
118#define MCU_ERCRR_ERRCOL_RD(src) ((src) & 0x00000FFF)
119#define MCUSBECNT0 0x0324
120#define MCU_SBECNT_COUNT(src) ((src) & 0xFFFF)
121
122#define CSW_CSWCR 0x0000
123#define CSW_CSWCR_DUALMCB_MASK BIT(0)
124
125#define MCBADDRMR 0x0000
126#define MCBADDRMR_MCU_INTLV_MODE_MASK BIT(3)
127#define MCBADDRMR_DUALMCU_MODE_MASK BIT(2)
128#define MCBADDRMR_MCB_INTLV_MODE_MASK BIT(1)
129#define MCBADDRMR_ADDRESS_MODE_MASK BIT(0)
130
131struct xgene_edac_mc_ctx {
132 struct list_head next;
133 char *name;
134 struct mem_ctl_info *mci;
135 struct xgene_edac *edac;
136 void __iomem *mcu_csr;
137 u32 mcu_id;
138};
139
140static ssize_t xgene_edac_mc_err_inject_write(struct file *file,
141 const char __user *data,
142 size_t count, loff_t *ppos)
143{
144 struct mem_ctl_info *mci = file->private_data;
145 struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
146 int i;
147
148 for (i = 0; i < MCU_MAX_RANK; i++) {
149 writel(MCU_ESRR_MULTUCERR_MASK | MCU_ESRR_BACKUCERR_MASK |
150 MCU_ESRR_DEMANDUCERR_MASK | MCU_ESRR_CERR_MASK,
151 ctx->mcu_csr + MCUESRRA0 + i * MCU_RANK_STRIDE);
152 }
153 return count;
154}
155
156static const struct file_operations xgene_edac_mc_debug_inject_fops = {
157 .open = simple_open,
158 .write = xgene_edac_mc_err_inject_write,
159 .llseek = generic_file_llseek,
160};
161
162static void xgene_edac_mc_create_debugfs_node(struct mem_ctl_info *mci)
163{
164 if (!IS_ENABLED(CONFIG_EDAC_DEBUG))
165 return;
Borislav Petkov09bd1b42015-09-22 13:13:46 +0200166
Loc Ho0d442932015-05-22 17:32:59 -0600167 if (!mci->debugfs)
168 return;
Borislav Petkov09bd1b42015-09-22 13:13:46 +0200169
170 edac_debugfs_create_file("inject_ctrl", S_IWUSR, mci->debugfs, mci,
171 &xgene_edac_mc_debug_inject_fops);
Loc Ho0d442932015-05-22 17:32:59 -0600172}
173
174static void xgene_edac_mc_check(struct mem_ctl_info *mci)
175{
176 struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
177 unsigned int pcp_hp_stat;
178 unsigned int pcp_lp_stat;
179 u32 reg;
180 u32 rank;
181 u32 bank;
182 u32 count;
183 u32 col_row;
184
185 xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
186 xgene_edac_pcp_rd(ctx->edac, PCPLPERRINTSTS, &pcp_lp_stat);
187 if (!((MCU_UNCORR_ERR_MASK & pcp_hp_stat) ||
188 (MCU_CTL_ERR_MASK & pcp_hp_stat) ||
189 (MCU_CORR_ERR_MASK & pcp_lp_stat)))
190 return;
191
192 for (rank = 0; rank < MCU_MAX_RANK; rank++) {
193 reg = readl(ctx->mcu_csr + MCUESRR0 + rank * MCU_RANK_STRIDE);
194
195 /* Detect uncorrectable memory error */
196 if (reg & (MCU_ESRR_DEMANDUCERR_MASK |
197 MCU_ESRR_BACKUCERR_MASK)) {
198 /* Detected uncorrectable memory error */
199 edac_mc_chipset_printk(mci, KERN_ERR, "X-Gene",
200 "MCU uncorrectable error at rank %d\n", rank);
201
202 edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
203 1, 0, 0, 0, 0, 0, -1, mci->ctl_name, "");
204 }
205
206 /* Detect correctable memory error */
207 if (reg & MCU_ESRR_CERR_MASK) {
208 bank = readl(ctx->mcu_csr + MCUEBLRR0 +
209 rank * MCU_RANK_STRIDE);
210 col_row = readl(ctx->mcu_csr + MCUERCRR0 +
211 rank * MCU_RANK_STRIDE);
212 count = readl(ctx->mcu_csr + MCUSBECNT0 +
213 rank * MCU_RANK_STRIDE);
214 edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
215 "MCU correctable error at rank %d bank %d column %d row %d count %d\n",
216 rank, MCU_EBLRR_ERRBANK_RD(bank),
217 MCU_ERCRR_ERRCOL_RD(col_row),
218 MCU_ERCRR_ERRROW_RD(col_row),
219 MCU_SBECNT_COUNT(count));
220
221 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
222 1, 0, 0, 0, 0, 0, -1, mci->ctl_name, "");
223 }
224
225 /* Clear all error registers */
226 writel(0x0, ctx->mcu_csr + MCUEBLRR0 + rank * MCU_RANK_STRIDE);
227 writel(0x0, ctx->mcu_csr + MCUERCRR0 + rank * MCU_RANK_STRIDE);
228 writel(0x0, ctx->mcu_csr + MCUSBECNT0 +
229 rank * MCU_RANK_STRIDE);
230 writel(reg, ctx->mcu_csr + MCUESRR0 + rank * MCU_RANK_STRIDE);
231 }
232
233 /* Detect memory controller error */
234 reg = readl(ctx->mcu_csr + MCUGESR);
235 if (reg) {
236 if (reg & MCU_GESR_ADDRNOMATCH_ERR_MASK)
237 edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
238 "MCU address miss-match error\n");
239 if (reg & MCU_GESR_ADDRMULTIMATCH_ERR_MASK)
240 edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
241 "MCU address multi-match error\n");
242
243 writel(reg, ctx->mcu_csr + MCUGESR);
244 }
245}
246
247static void xgene_edac_mc_irq_ctl(struct mem_ctl_info *mci, bool enable)
248{
249 struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
250 unsigned int val;
251
252 if (edac_op_state != EDAC_OPSTATE_INT)
253 return;
254
255 mutex_lock(&ctx->edac->mc_lock);
256
257 /*
258 * As there is only single bit for enable error and interrupt mask,
259 * we must only enable top level interrupt after all MCUs are
260 * registered. Otherwise, if there is an error and the corresponding
261 * MCU has not registered, the interrupt will never get cleared. To
262 * determine all MCU have registered, we will keep track of active
263 * MCUs and registered MCUs.
264 */
265 if (enable) {
266 /* Set registered MCU bit */
267 ctx->edac->mc_registered_mask |= 1 << ctx->mcu_id;
268
269 /* Enable interrupt after all active MCU registered */
270 if (ctx->edac->mc_registered_mask ==
271 ctx->edac->mc_active_mask) {
272 /* Enable memory controller top level interrupt */
273 xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
274 MCU_UNCORR_ERR_MASK |
275 MCU_CTL_ERR_MASK);
276 xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
277 MCU_CORR_ERR_MASK);
278 }
279
280 /* Enable MCU interrupt and error reporting */
281 val = readl(ctx->mcu_csr + MCUGECR);
282 val |= MCU_GECR_DEMANDUCINTREN_MASK |
283 MCU_GECR_BACKUCINTREN_MASK |
284 MCU_GECR_CINTREN_MASK |
285 MUC_GECR_MCUADDRERREN_MASK;
286 writel(val, ctx->mcu_csr + MCUGECR);
287 } else {
288 /* Disable MCU interrupt */
289 val = readl(ctx->mcu_csr + MCUGECR);
290 val &= ~(MCU_GECR_DEMANDUCINTREN_MASK |
291 MCU_GECR_BACKUCINTREN_MASK |
292 MCU_GECR_CINTREN_MASK |
293 MUC_GECR_MCUADDRERREN_MASK);
294 writel(val, ctx->mcu_csr + MCUGECR);
295
296 /* Disable memory controller top level interrupt */
297 xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
298 MCU_UNCORR_ERR_MASK | MCU_CTL_ERR_MASK);
299 xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
300 MCU_CORR_ERR_MASK);
301
302 /* Clear registered MCU bit */
303 ctx->edac->mc_registered_mask &= ~(1 << ctx->mcu_id);
304 }
305
306 mutex_unlock(&ctx->edac->mc_lock);
307}
308
309static int xgene_edac_mc_is_active(struct xgene_edac_mc_ctx *ctx, int mc_idx)
310{
311 unsigned int reg;
312 u32 mcu_mask;
313
314 if (regmap_read(ctx->edac->csw_map, CSW_CSWCR, &reg))
315 return 0;
316
317 if (reg & CSW_CSWCR_DUALMCB_MASK) {
318 /*
319 * Dual MCB active - Determine if all 4 active or just MCU0
320 * and MCU2 active
321 */
322 if (regmap_read(ctx->edac->mcbb_map, MCBADDRMR, &reg))
323 return 0;
324 mcu_mask = (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0xF : 0x5;
325 } else {
326 /*
327 * Single MCB active - Determine if MCU0/MCU1 or just MCU0
328 * active
329 */
330 if (regmap_read(ctx->edac->mcba_map, MCBADDRMR, &reg))
331 return 0;
332 mcu_mask = (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0x3 : 0x1;
333 }
334
335 /* Save active MC mask if hasn't set already */
336 if (!ctx->edac->mc_active_mask)
337 ctx->edac->mc_active_mask = mcu_mask;
338
339 return (mcu_mask & (1 << mc_idx)) ? 1 : 0;
340}
341
342static int xgene_edac_mc_add(struct xgene_edac *edac, struct device_node *np)
343{
344 struct mem_ctl_info *mci;
345 struct edac_mc_layer layers[2];
346 struct xgene_edac_mc_ctx tmp_ctx;
347 struct xgene_edac_mc_ctx *ctx;
348 struct resource res;
349 int rc;
350
351 memset(&tmp_ctx, 0, sizeof(tmp_ctx));
352 tmp_ctx.edac = edac;
353
354 if (!devres_open_group(edac->dev, xgene_edac_mc_add, GFP_KERNEL))
355 return -ENOMEM;
356
357 rc = of_address_to_resource(np, 0, &res);
358 if (rc < 0) {
359 dev_err(edac->dev, "no MCU resource address\n");
360 goto err_group;
361 }
362 tmp_ctx.mcu_csr = devm_ioremap_resource(edac->dev, &res);
363 if (IS_ERR(tmp_ctx.mcu_csr)) {
364 dev_err(edac->dev, "unable to map MCU resource\n");
365 rc = PTR_ERR(tmp_ctx.mcu_csr);
366 goto err_group;
367 }
368
369 /* Ignore non-active MCU */
370 if (of_property_read_u32(np, "memory-controller", &tmp_ctx.mcu_id)) {
371 dev_err(edac->dev, "no memory-controller property\n");
372 rc = -ENODEV;
373 goto err_group;
374 }
375 if (!xgene_edac_mc_is_active(&tmp_ctx, tmp_ctx.mcu_id)) {
376 rc = -ENODEV;
377 goto err_group;
378 }
379
380 layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
381 layers[0].size = 4;
382 layers[0].is_virt_csrow = true;
383 layers[1].type = EDAC_MC_LAYER_CHANNEL;
384 layers[1].size = 2;
385 layers[1].is_virt_csrow = false;
386 mci = edac_mc_alloc(tmp_ctx.mcu_id, ARRAY_SIZE(layers), layers,
387 sizeof(*ctx));
388 if (!mci) {
389 rc = -ENOMEM;
390 goto err_group;
391 }
392
393 ctx = mci->pvt_info;
394 *ctx = tmp_ctx; /* Copy over resource value */
395 ctx->name = "xgene_edac_mc_err";
396 ctx->mci = mci;
397 mci->pdev = &mci->dev;
398 mci->ctl_name = ctx->name;
399 mci->dev_name = ctx->name;
400
401 mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_RDDR2 | MEM_FLAG_RDDR3 |
402 MEM_FLAG_DDR | MEM_FLAG_DDR2 | MEM_FLAG_DDR3;
403 mci->edac_ctl_cap = EDAC_FLAG_SECDED;
404 mci->edac_cap = EDAC_FLAG_SECDED;
405 mci->mod_name = EDAC_MOD_STR;
Loc Ho0d442932015-05-22 17:32:59 -0600406 mci->ctl_page_to_phys = NULL;
407 mci->scrub_cap = SCRUB_FLAG_HW_SRC;
408 mci->scrub_mode = SCRUB_HW_SRC;
409
410 if (edac_op_state == EDAC_OPSTATE_POLL)
411 mci->edac_check = xgene_edac_mc_check;
412
413 if (edac_mc_add_mc(mci)) {
414 dev_err(edac->dev, "edac_mc_add_mc failed\n");
415 rc = -EINVAL;
416 goto err_free;
417 }
418
419 xgene_edac_mc_create_debugfs_node(mci);
420
421 list_add(&ctx->next, &edac->mcus);
422
423 xgene_edac_mc_irq_ctl(mci, true);
424
425 devres_remove_group(edac->dev, xgene_edac_mc_add);
426
427 dev_info(edac->dev, "X-Gene EDAC MC registered\n");
428 return 0;
429
430err_free:
431 edac_mc_free(mci);
432err_group:
433 devres_release_group(edac->dev, xgene_edac_mc_add);
434 return rc;
435}
436
437static int xgene_edac_mc_remove(struct xgene_edac_mc_ctx *mcu)
438{
439 xgene_edac_mc_irq_ctl(mcu->mci, false);
440 edac_mc_del_mc(&mcu->mci->dev);
441 edac_mc_free(mcu->mci);
442 return 0;
443}
444
445/* CPU L1/L2 error CSR */
446#define MAX_CPU_PER_PMD 2
447#define CPU_CSR_STRIDE 0x00100000
448#define CPU_L2C_PAGE 0x000D0000
449#define CPU_MEMERR_L2C_PAGE 0x000E0000
450#define CPU_MEMERR_CPU_PAGE 0x000F0000
451
452#define MEMERR_CPU_ICFECR_PAGE_OFFSET 0x0000
453#define MEMERR_CPU_ICFESR_PAGE_OFFSET 0x0004
454#define MEMERR_CPU_ICFESR_ERRWAY_RD(src) (((src) & 0xFF000000) >> 24)
455#define MEMERR_CPU_ICFESR_ERRINDEX_RD(src) (((src) & 0x003F0000) >> 16)
456#define MEMERR_CPU_ICFESR_ERRINFO_RD(src) (((src) & 0x0000FF00) >> 8)
457#define MEMERR_CPU_ICFESR_ERRTYPE_RD(src) (((src) & 0x00000070) >> 4)
458#define MEMERR_CPU_ICFESR_MULTCERR_MASK BIT(2)
459#define MEMERR_CPU_ICFESR_CERR_MASK BIT(0)
460#define MEMERR_CPU_LSUESR_PAGE_OFFSET 0x000c
461#define MEMERR_CPU_LSUESR_ERRWAY_RD(src) (((src) & 0xFF000000) >> 24)
462#define MEMERR_CPU_LSUESR_ERRINDEX_RD(src) (((src) & 0x003F0000) >> 16)
463#define MEMERR_CPU_LSUESR_ERRINFO_RD(src) (((src) & 0x0000FF00) >> 8)
464#define MEMERR_CPU_LSUESR_ERRTYPE_RD(src) (((src) & 0x00000070) >> 4)
465#define MEMERR_CPU_LSUESR_MULTCERR_MASK BIT(2)
466#define MEMERR_CPU_LSUESR_CERR_MASK BIT(0)
467#define MEMERR_CPU_LSUECR_PAGE_OFFSET 0x0008
468#define MEMERR_CPU_MMUECR_PAGE_OFFSET 0x0010
469#define MEMERR_CPU_MMUESR_PAGE_OFFSET 0x0014
470#define MEMERR_CPU_MMUESR_ERRWAY_RD(src) (((src) & 0xFF000000) >> 24)
471#define MEMERR_CPU_MMUESR_ERRINDEX_RD(src) (((src) & 0x007F0000) >> 16)
472#define MEMERR_CPU_MMUESR_ERRINFO_RD(src) (((src) & 0x0000FF00) >> 8)
473#define MEMERR_CPU_MMUESR_ERRREQSTR_LSU_MASK BIT(7)
474#define MEMERR_CPU_MMUESR_ERRTYPE_RD(src) (((src) & 0x00000070) >> 4)
475#define MEMERR_CPU_MMUESR_MULTCERR_MASK BIT(2)
476#define MEMERR_CPU_MMUESR_CERR_MASK BIT(0)
477#define MEMERR_CPU_ICFESRA_PAGE_OFFSET 0x0804
478#define MEMERR_CPU_LSUESRA_PAGE_OFFSET 0x080c
479#define MEMERR_CPU_MMUESRA_PAGE_OFFSET 0x0814
480
481#define MEMERR_L2C_L2ECR_PAGE_OFFSET 0x0000
482#define MEMERR_L2C_L2ESR_PAGE_OFFSET 0x0004
483#define MEMERR_L2C_L2ESR_ERRSYN_RD(src) (((src) & 0xFF000000) >> 24)
484#define MEMERR_L2C_L2ESR_ERRWAY_RD(src) (((src) & 0x00FC0000) >> 18)
485#define MEMERR_L2C_L2ESR_ERRCPU_RD(src) (((src) & 0x00020000) >> 17)
486#define MEMERR_L2C_L2ESR_ERRGROUP_RD(src) (((src) & 0x0000E000) >> 13)
487#define MEMERR_L2C_L2ESR_ERRACTION_RD(src) (((src) & 0x00001C00) >> 10)
488#define MEMERR_L2C_L2ESR_ERRTYPE_RD(src) (((src) & 0x00000300) >> 8)
489#define MEMERR_L2C_L2ESR_MULTUCERR_MASK BIT(3)
490#define MEMERR_L2C_L2ESR_MULTICERR_MASK BIT(2)
491#define MEMERR_L2C_L2ESR_UCERR_MASK BIT(1)
492#define MEMERR_L2C_L2ESR_ERR_MASK BIT(0)
493#define MEMERR_L2C_L2EALR_PAGE_OFFSET 0x0008
494#define CPUX_L2C_L2RTOCR_PAGE_OFFSET 0x0010
495#define MEMERR_L2C_L2EAHR_PAGE_OFFSET 0x000c
496#define CPUX_L2C_L2RTOSR_PAGE_OFFSET 0x0014
497#define MEMERR_L2C_L2RTOSR_MULTERR_MASK BIT(1)
498#define MEMERR_L2C_L2RTOSR_ERR_MASK BIT(0)
499#define CPUX_L2C_L2RTOALR_PAGE_OFFSET 0x0018
500#define CPUX_L2C_L2RTOAHR_PAGE_OFFSET 0x001c
501#define MEMERR_L2C_L2ESRA_PAGE_OFFSET 0x0804
502
503/*
504 * Processor Module Domain (PMD) context - Context for a pair of processsors.
505 * Each PMD consists of 2 CPUs and a shared L2 cache. Each CPU consists of
506 * its own L1 cache.
507 */
508struct xgene_edac_pmd_ctx {
509 struct list_head next;
510 struct device ddev;
511 char *name;
512 struct xgene_edac *edac;
513 struct edac_device_ctl_info *edac_dev;
514 void __iomem *pmd_csr;
515 u32 pmd;
Arnd Bergmann451bb7f2015-06-01 16:09:35 -0600516 int version;
Loc Ho0d442932015-05-22 17:32:59 -0600517};
518
519static void xgene_edac_pmd_l1_check(struct edac_device_ctl_info *edac_dev,
520 int cpu_idx)
521{
522 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
523 void __iomem *pg_f;
524 u32 val;
525
526 pg_f = ctx->pmd_csr + cpu_idx * CPU_CSR_STRIDE + CPU_MEMERR_CPU_PAGE;
527
528 val = readl(pg_f + MEMERR_CPU_ICFESR_PAGE_OFFSET);
Loc Ho93474732015-09-23 17:40:59 -0700529 if (!val)
530 goto chk_lsu;
531 dev_err(edac_dev->dev,
532 "CPU%d L1 memory error ICF 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X\n",
533 ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
534 MEMERR_CPU_ICFESR_ERRWAY_RD(val),
535 MEMERR_CPU_ICFESR_ERRINDEX_RD(val),
536 MEMERR_CPU_ICFESR_ERRINFO_RD(val));
537 if (val & MEMERR_CPU_ICFESR_CERR_MASK)
538 dev_err(edac_dev->dev, "One or more correctable error\n");
539 if (val & MEMERR_CPU_ICFESR_MULTCERR_MASK)
540 dev_err(edac_dev->dev, "Multiple correctable error\n");
541 switch (MEMERR_CPU_ICFESR_ERRTYPE_RD(val)) {
542 case 1:
543 dev_err(edac_dev->dev, "L1 TLB multiple hit\n");
544 break;
545 case 2:
546 dev_err(edac_dev->dev, "Way select multiple hit\n");
547 break;
548 case 3:
549 dev_err(edac_dev->dev, "Physical tag parity error\n");
550 break;
551 case 4:
552 case 5:
553 dev_err(edac_dev->dev, "L1 data parity error\n");
554 break;
555 case 6:
556 dev_err(edac_dev->dev, "L1 pre-decode parity error\n");
557 break;
Loc Ho0d442932015-05-22 17:32:59 -0600558 }
559
Loc Ho93474732015-09-23 17:40:59 -0700560 /* Clear any HW errors */
561 writel(val, pg_f + MEMERR_CPU_ICFESR_PAGE_OFFSET);
Loc Ho0d442932015-05-22 17:32:59 -0600562
Loc Ho93474732015-09-23 17:40:59 -0700563 if (val & (MEMERR_CPU_ICFESR_CERR_MASK |
564 MEMERR_CPU_ICFESR_MULTCERR_MASK))
Loc Ho0d442932015-05-22 17:32:59 -0600565 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
Loc Ho93474732015-09-23 17:40:59 -0700566
567chk_lsu:
568 val = readl(pg_f + MEMERR_CPU_LSUESR_PAGE_OFFSET);
569 if (!val)
570 goto chk_mmu;
571 dev_err(edac_dev->dev,
572 "CPU%d memory error LSU 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X\n",
573 ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
574 MEMERR_CPU_LSUESR_ERRWAY_RD(val),
575 MEMERR_CPU_LSUESR_ERRINDEX_RD(val),
576 MEMERR_CPU_LSUESR_ERRINFO_RD(val));
577 if (val & MEMERR_CPU_LSUESR_CERR_MASK)
578 dev_err(edac_dev->dev, "One or more correctable error\n");
579 if (val & MEMERR_CPU_LSUESR_MULTCERR_MASK)
580 dev_err(edac_dev->dev, "Multiple correctable error\n");
581 switch (MEMERR_CPU_LSUESR_ERRTYPE_RD(val)) {
582 case 0:
583 dev_err(edac_dev->dev, "Load tag error\n");
584 break;
585 case 1:
586 dev_err(edac_dev->dev, "Load data error\n");
587 break;
588 case 2:
589 dev_err(edac_dev->dev, "WSL multihit error\n");
590 break;
591 case 3:
592 dev_err(edac_dev->dev, "Store tag error\n");
593 break;
594 case 4:
595 dev_err(edac_dev->dev,
596 "DTB multihit from load pipeline error\n");
597 break;
598 case 5:
599 dev_err(edac_dev->dev,
600 "DTB multihit from store pipeline error\n");
601 break;
Loc Ho0d442932015-05-22 17:32:59 -0600602 }
Loc Ho93474732015-09-23 17:40:59 -0700603
604 /* Clear any HW errors */
605 writel(val, pg_f + MEMERR_CPU_LSUESR_PAGE_OFFSET);
606
607 if (val & (MEMERR_CPU_LSUESR_CERR_MASK |
608 MEMERR_CPU_LSUESR_MULTCERR_MASK))
609 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
610
611chk_mmu:
612 val = readl(pg_f + MEMERR_CPU_MMUESR_PAGE_OFFSET);
613 if (!val)
614 return;
615 dev_err(edac_dev->dev,
616 "CPU%d memory error MMU 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X %s\n",
617 ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
618 MEMERR_CPU_MMUESR_ERRWAY_RD(val),
619 MEMERR_CPU_MMUESR_ERRINDEX_RD(val),
620 MEMERR_CPU_MMUESR_ERRINFO_RD(val),
621 val & MEMERR_CPU_MMUESR_ERRREQSTR_LSU_MASK ? "LSU" : "ICF");
622 if (val & MEMERR_CPU_MMUESR_CERR_MASK)
623 dev_err(edac_dev->dev, "One or more correctable error\n");
624 if (val & MEMERR_CPU_MMUESR_MULTCERR_MASK)
625 dev_err(edac_dev->dev, "Multiple correctable error\n");
626 switch (MEMERR_CPU_MMUESR_ERRTYPE_RD(val)) {
627 case 0:
628 dev_err(edac_dev->dev, "Stage 1 UTB hit error\n");
629 break;
630 case 1:
631 dev_err(edac_dev->dev, "Stage 1 UTB miss error\n");
632 break;
633 case 2:
634 dev_err(edac_dev->dev, "Stage 1 UTB allocate error\n");
635 break;
636 case 3:
637 dev_err(edac_dev->dev, "TMO operation single bank error\n");
638 break;
639 case 4:
640 dev_err(edac_dev->dev, "Stage 2 UTB error\n");
641 break;
642 case 5:
643 dev_err(edac_dev->dev, "Stage 2 UTB miss error\n");
644 break;
645 case 6:
646 dev_err(edac_dev->dev, "Stage 2 UTB allocate error\n");
647 break;
648 case 7:
649 dev_err(edac_dev->dev, "TMO operation multiple bank error\n");
650 break;
651 }
652
653 /* Clear any HW errors */
654 writel(val, pg_f + MEMERR_CPU_MMUESR_PAGE_OFFSET);
655
656 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
Loc Ho0d442932015-05-22 17:32:59 -0600657}
658
659static void xgene_edac_pmd_l2_check(struct edac_device_ctl_info *edac_dev)
660{
661 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
662 void __iomem *pg_d;
663 void __iomem *pg_e;
664 u32 val_hi;
665 u32 val_lo;
666 u32 val;
667
668 /* Check L2 */
669 pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
670 val = readl(pg_e + MEMERR_L2C_L2ESR_PAGE_OFFSET);
Loc Ho93474732015-09-23 17:40:59 -0700671 if (!val)
672 goto chk_l2c;
673 val_lo = readl(pg_e + MEMERR_L2C_L2EALR_PAGE_OFFSET);
674 val_hi = readl(pg_e + MEMERR_L2C_L2EAHR_PAGE_OFFSET);
675 dev_err(edac_dev->dev,
676 "PMD%d memory error L2C L2ESR 0x%08X @ 0x%08X.%08X\n",
677 ctx->pmd, val, val_hi, val_lo);
678 dev_err(edac_dev->dev,
679 "ErrSyndrome 0x%02X ErrWay 0x%02X ErrCpu %d ErrGroup 0x%02X ErrAction 0x%02X\n",
680 MEMERR_L2C_L2ESR_ERRSYN_RD(val),
681 MEMERR_L2C_L2ESR_ERRWAY_RD(val),
682 MEMERR_L2C_L2ESR_ERRCPU_RD(val),
683 MEMERR_L2C_L2ESR_ERRGROUP_RD(val),
684 MEMERR_L2C_L2ESR_ERRACTION_RD(val));
Loc Ho0d442932015-05-22 17:32:59 -0600685
Loc Ho93474732015-09-23 17:40:59 -0700686 if (val & MEMERR_L2C_L2ESR_ERR_MASK)
687 dev_err(edac_dev->dev, "One or more correctable error\n");
688 if (val & MEMERR_L2C_L2ESR_MULTICERR_MASK)
689 dev_err(edac_dev->dev, "Multiple correctable error\n");
690 if (val & MEMERR_L2C_L2ESR_UCERR_MASK)
691 dev_err(edac_dev->dev, "One or more uncorrectable error\n");
692 if (val & MEMERR_L2C_L2ESR_MULTUCERR_MASK)
693 dev_err(edac_dev->dev, "Multiple uncorrectable error\n");
Loc Ho0d442932015-05-22 17:32:59 -0600694
Loc Ho93474732015-09-23 17:40:59 -0700695 switch (MEMERR_L2C_L2ESR_ERRTYPE_RD(val)) {
696 case 0:
697 dev_err(edac_dev->dev, "Outbound SDB parity error\n");
698 break;
699 case 1:
700 dev_err(edac_dev->dev, "Inbound SDB parity error\n");
701 break;
702 case 2:
703 dev_err(edac_dev->dev, "Tag ECC error\n");
704 break;
705 case 3:
706 dev_err(edac_dev->dev, "Data ECC error\n");
707 break;
Loc Ho0d442932015-05-22 17:32:59 -0600708 }
709
Loc Ho93474732015-09-23 17:40:59 -0700710 /* Clear any HW errors */
711 writel(val, pg_e + MEMERR_L2C_L2ESR_PAGE_OFFSET);
712
713 if (val & (MEMERR_L2C_L2ESR_ERR_MASK |
714 MEMERR_L2C_L2ESR_MULTICERR_MASK))
715 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
716 if (val & (MEMERR_L2C_L2ESR_UCERR_MASK |
717 MEMERR_L2C_L2ESR_MULTUCERR_MASK))
718 edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
719
720chk_l2c:
Loc Ho0d442932015-05-22 17:32:59 -0600721 /* Check if any memory request timed out on L2 cache */
722 pg_d = ctx->pmd_csr + CPU_L2C_PAGE;
723 val = readl(pg_d + CPUX_L2C_L2RTOSR_PAGE_OFFSET);
724 if (val) {
725 val_lo = readl(pg_d + CPUX_L2C_L2RTOALR_PAGE_OFFSET);
726 val_hi = readl(pg_d + CPUX_L2C_L2RTOAHR_PAGE_OFFSET);
727 dev_err(edac_dev->dev,
728 "PMD%d L2C error L2C RTOSR 0x%08X @ 0x%08X.%08X\n",
729 ctx->pmd, val, val_hi, val_lo);
730 writel(val, pg_d + CPUX_L2C_L2RTOSR_PAGE_OFFSET);
731 }
732}
733
734static void xgene_edac_pmd_check(struct edac_device_ctl_info *edac_dev)
735{
736 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
737 unsigned int pcp_hp_stat;
738 int i;
739
740 xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
741 if (!((PMD0_MERR_MASK << ctx->pmd) & pcp_hp_stat))
742 return;
743
744 /* Check CPU L1 error */
745 for (i = 0; i < MAX_CPU_PER_PMD; i++)
746 xgene_edac_pmd_l1_check(edac_dev, i);
747
748 /* Check CPU L2 error */
749 xgene_edac_pmd_l2_check(edac_dev);
750}
751
752static void xgene_edac_pmd_cpu_hw_cfg(struct edac_device_ctl_info *edac_dev,
753 int cpu)
754{
755 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
756 void __iomem *pg_f = ctx->pmd_csr + cpu * CPU_CSR_STRIDE +
757 CPU_MEMERR_CPU_PAGE;
758
759 /*
760 * Enable CPU memory error:
761 * MEMERR_CPU_ICFESRA, MEMERR_CPU_LSUESRA, and MEMERR_CPU_MMUESRA
762 */
763 writel(0x00000301, pg_f + MEMERR_CPU_ICFECR_PAGE_OFFSET);
764 writel(0x00000301, pg_f + MEMERR_CPU_LSUECR_PAGE_OFFSET);
765 writel(0x00000101, pg_f + MEMERR_CPU_MMUECR_PAGE_OFFSET);
766}
767
Loc Ho0d442932015-05-22 17:32:59 -0600768static void xgene_edac_pmd_hw_cfg(struct edac_device_ctl_info *edac_dev)
769{
770 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
771 void __iomem *pg_d = ctx->pmd_csr + CPU_L2C_PAGE;
772 void __iomem *pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
773
774 /* Enable PMD memory error - MEMERR_L2C_L2ECR and L2C_L2RTOCR */
775 writel(0x00000703, pg_e + MEMERR_L2C_L2ECR_PAGE_OFFSET);
776 /* Configure L2C HW request time out feature if supported */
Arnd Bergmann451bb7f2015-06-01 16:09:35 -0600777 if (ctx->version > 1)
Loc Ho0d442932015-05-22 17:32:59 -0600778 writel(0x00000119, pg_d + CPUX_L2C_L2RTOCR_PAGE_OFFSET);
779}
780
781static void xgene_edac_pmd_hw_ctl(struct edac_device_ctl_info *edac_dev,
782 bool enable)
783{
784 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
785 int i;
786
787 /* Enable PMD error interrupt */
788 if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
789 if (enable)
790 xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
791 PMD0_MERR_MASK << ctx->pmd);
792 else
793 xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
794 PMD0_MERR_MASK << ctx->pmd);
795 }
796
797 if (enable) {
798 xgene_edac_pmd_hw_cfg(edac_dev);
799
800 /* Two CPUs per a PMD */
801 for (i = 0; i < MAX_CPU_PER_PMD; i++)
802 xgene_edac_pmd_cpu_hw_cfg(edac_dev, i);
803 }
804}
805
806static ssize_t xgene_edac_pmd_l1_inject_ctrl_write(struct file *file,
807 const char __user *data,
808 size_t count, loff_t *ppos)
809{
810 struct edac_device_ctl_info *edac_dev = file->private_data;
811 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
812 void __iomem *cpux_pg_f;
813 int i;
814
815 for (i = 0; i < MAX_CPU_PER_PMD; i++) {
816 cpux_pg_f = ctx->pmd_csr + i * CPU_CSR_STRIDE +
817 CPU_MEMERR_CPU_PAGE;
818
819 writel(MEMERR_CPU_ICFESR_MULTCERR_MASK |
820 MEMERR_CPU_ICFESR_CERR_MASK,
821 cpux_pg_f + MEMERR_CPU_ICFESRA_PAGE_OFFSET);
822 writel(MEMERR_CPU_LSUESR_MULTCERR_MASK |
823 MEMERR_CPU_LSUESR_CERR_MASK,
824 cpux_pg_f + MEMERR_CPU_LSUESRA_PAGE_OFFSET);
825 writel(MEMERR_CPU_MMUESR_MULTCERR_MASK |
826 MEMERR_CPU_MMUESR_CERR_MASK,
827 cpux_pg_f + MEMERR_CPU_MMUESRA_PAGE_OFFSET);
828 }
829 return count;
830}
831
832static ssize_t xgene_edac_pmd_l2_inject_ctrl_write(struct file *file,
833 const char __user *data,
834 size_t count, loff_t *ppos)
835{
836 struct edac_device_ctl_info *edac_dev = file->private_data;
837 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
838 void __iomem *pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
839
840 writel(MEMERR_L2C_L2ESR_MULTUCERR_MASK |
841 MEMERR_L2C_L2ESR_MULTICERR_MASK |
842 MEMERR_L2C_L2ESR_UCERR_MASK |
843 MEMERR_L2C_L2ESR_ERR_MASK,
844 pg_e + MEMERR_L2C_L2ESRA_PAGE_OFFSET);
845 return count;
846}
847
848static const struct file_operations xgene_edac_pmd_debug_inject_fops[] = {
849 {
850 .open = simple_open,
851 .write = xgene_edac_pmd_l1_inject_ctrl_write,
852 .llseek = generic_file_llseek, },
853 {
854 .open = simple_open,
855 .write = xgene_edac_pmd_l2_inject_ctrl_write,
856 .llseek = generic_file_llseek, },
857 { }
858};
859
Loc Ho93474732015-09-23 17:40:59 -0700860static void
861xgene_edac_pmd_create_debugfs_nodes(struct edac_device_ctl_info *edac_dev)
Loc Ho0d442932015-05-22 17:32:59 -0600862{
863 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
Borislav Petkov09bd1b42015-09-22 13:13:46 +0200864 struct dentry *dbgfs_dir;
Loc Ho93474732015-09-23 17:40:59 -0700865 char name[10];
Loc Ho0d442932015-05-22 17:32:59 -0600866
Loc Ho93474732015-09-23 17:40:59 -0700867 if (!IS_ENABLED(CONFIG_EDAC_DEBUG) || !ctx->edac->dfs)
Loc Ho0d442932015-05-22 17:32:59 -0600868 return;
869
Loc Ho9bc1c0c2015-09-24 10:38:07 -0700870 snprintf(name, sizeof(name), "PMD%d", ctx->pmd);
Borislav Petkov09bd1b42015-09-22 13:13:46 +0200871 dbgfs_dir = edac_debugfs_create_dir_at(name, ctx->edac->dfs);
872 if (!dbgfs_dir)
Loc Ho0d442932015-05-22 17:32:59 -0600873 return;
874
Borislav Petkov09bd1b42015-09-22 13:13:46 +0200875 edac_debugfs_create_file("l1_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
876 &xgene_edac_pmd_debug_inject_fops[0]);
877 edac_debugfs_create_file("l2_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
878 &xgene_edac_pmd_debug_inject_fops[1]);
Loc Ho0d442932015-05-22 17:32:59 -0600879}
880
881static int xgene_edac_pmd_available(u32 efuse, int pmd)
882{
883 return (efuse & (1 << pmd)) ? 0 : 1;
884}
885
Arnd Bergmann451bb7f2015-06-01 16:09:35 -0600886static int xgene_edac_pmd_add(struct xgene_edac *edac, struct device_node *np,
887 int version)
Loc Ho0d442932015-05-22 17:32:59 -0600888{
889 struct edac_device_ctl_info *edac_dev;
890 struct xgene_edac_pmd_ctx *ctx;
891 struct resource res;
892 char edac_name[10];
893 u32 pmd;
894 int rc;
895 u32 val;
896
897 if (!devres_open_group(edac->dev, xgene_edac_pmd_add, GFP_KERNEL))
898 return -ENOMEM;
899
900 /* Determine if this PMD is disabled */
901 if (of_property_read_u32(np, "pmd-controller", &pmd)) {
902 dev_err(edac->dev, "no pmd-controller property\n");
903 rc = -ENODEV;
904 goto err_group;
905 }
906 rc = regmap_read(edac->efuse_map, 0, &val);
907 if (rc)
908 goto err_group;
909 if (!xgene_edac_pmd_available(val, pmd)) {
910 rc = -ENODEV;
911 goto err_group;
912 }
913
Loc Ho9bc1c0c2015-09-24 10:38:07 -0700914 snprintf(edac_name, sizeof(edac_name), "l2c%d", pmd);
Loc Ho0d442932015-05-22 17:32:59 -0600915 edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
916 edac_name, 1, "l2c", 1, 2, NULL,
917 0, edac_device_alloc_index());
918 if (!edac_dev) {
919 rc = -ENOMEM;
920 goto err_group;
921 }
922
923 ctx = edac_dev->pvt_info;
924 ctx->name = "xgene_pmd_err";
925 ctx->pmd = pmd;
926 ctx->edac = edac;
927 ctx->edac_dev = edac_dev;
928 ctx->ddev = *edac->dev;
Arnd Bergmann451bb7f2015-06-01 16:09:35 -0600929 ctx->version = version;
Loc Ho0d442932015-05-22 17:32:59 -0600930 edac_dev->dev = &ctx->ddev;
931 edac_dev->ctl_name = ctx->name;
932 edac_dev->dev_name = ctx->name;
933 edac_dev->mod_name = EDAC_MOD_STR;
934
935 rc = of_address_to_resource(np, 0, &res);
936 if (rc < 0) {
937 dev_err(edac->dev, "no PMD resource address\n");
938 goto err_free;
939 }
940 ctx->pmd_csr = devm_ioremap_resource(edac->dev, &res);
941 if (IS_ERR(ctx->pmd_csr)) {
942 dev_err(edac->dev,
943 "devm_ioremap_resource failed for PMD resource address\n");
944 rc = PTR_ERR(ctx->pmd_csr);
945 goto err_free;
946 }
947
948 if (edac_op_state == EDAC_OPSTATE_POLL)
949 edac_dev->edac_check = xgene_edac_pmd_check;
950
951 xgene_edac_pmd_create_debugfs_nodes(edac_dev);
952
953 rc = edac_device_add_device(edac_dev);
954 if (rc > 0) {
955 dev_err(edac->dev, "edac_device_add_device failed\n");
956 rc = -ENOMEM;
957 goto err_free;
958 }
959
960 if (edac_op_state == EDAC_OPSTATE_INT)
961 edac_dev->op_state = OP_RUNNING_INTERRUPT;
962
963 list_add(&ctx->next, &edac->pmds);
964
965 xgene_edac_pmd_hw_ctl(edac_dev, 1);
966
967 devres_remove_group(edac->dev, xgene_edac_pmd_add);
968
969 dev_info(edac->dev, "X-Gene EDAC PMD%d registered\n", ctx->pmd);
970 return 0;
971
972err_free:
973 edac_device_free_ctl_info(edac_dev);
974err_group:
975 devres_release_group(edac->dev, xgene_edac_pmd_add);
976 return rc;
977}
978
979static int xgene_edac_pmd_remove(struct xgene_edac_pmd_ctx *pmd)
980{
981 struct edac_device_ctl_info *edac_dev = pmd->edac_dev;
982
983 xgene_edac_pmd_hw_ctl(edac_dev, 0);
984 edac_device_del_device(edac_dev->dev);
985 edac_device_free_ctl_info(edac_dev);
986 return 0;
987}
988
Loc Ho93474732015-09-23 17:40:59 -0700989/* L3 Error device */
990#define L3C_ESR (0x0A * 4)
991#define L3C_ESR_DATATAG_MASK BIT(9)
992#define L3C_ESR_MULTIHIT_MASK BIT(8)
993#define L3C_ESR_UCEVICT_MASK BIT(6)
994#define L3C_ESR_MULTIUCERR_MASK BIT(5)
995#define L3C_ESR_MULTICERR_MASK BIT(4)
996#define L3C_ESR_UCERR_MASK BIT(3)
997#define L3C_ESR_CERR_MASK BIT(2)
998#define L3C_ESR_UCERRINTR_MASK BIT(1)
999#define L3C_ESR_CERRINTR_MASK BIT(0)
1000#define L3C_ECR (0x0B * 4)
1001#define L3C_ECR_UCINTREN BIT(3)
1002#define L3C_ECR_CINTREN BIT(2)
1003#define L3C_UCERREN BIT(1)
1004#define L3C_CERREN BIT(0)
1005#define L3C_ELR (0x0C * 4)
1006#define L3C_ELR_ERRSYN(src) ((src & 0xFF800000) >> 23)
1007#define L3C_ELR_ERRWAY(src) ((src & 0x007E0000) >> 17)
1008#define L3C_ELR_AGENTID(src) ((src & 0x0001E000) >> 13)
1009#define L3C_ELR_ERRGRP(src) ((src & 0x00000F00) >> 8)
1010#define L3C_ELR_OPTYPE(src) ((src & 0x000000F0) >> 4)
1011#define L3C_ELR_PADDRHIGH(src) (src & 0x0000000F)
1012#define L3C_AELR (0x0D * 4)
1013#define L3C_BELR (0x0E * 4)
1014#define L3C_BELR_BANK(src) (src & 0x0000000F)
1015
1016struct xgene_edac_dev_ctx {
1017 struct list_head next;
1018 struct device ddev;
1019 char *name;
1020 struct xgene_edac *edac;
1021 struct edac_device_ctl_info *edac_dev;
1022 int edac_idx;
1023 void __iomem *dev_csr;
1024 int version;
1025};
1026
1027/*
1028 * Version 1 of the L3 controller has broken single bit correctable logic for
1029 * certain error syndromes. Log them as uncorrectable in that case.
1030 */
1031static bool xgene_edac_l3_promote_to_uc_err(u32 l3cesr, u32 l3celr)
1032{
1033 if (l3cesr & L3C_ESR_DATATAG_MASK) {
1034 switch (L3C_ELR_ERRSYN(l3celr)) {
1035 case 0x13C:
1036 case 0x0B4:
1037 case 0x007:
1038 case 0x00D:
1039 case 0x00E:
1040 case 0x019:
1041 case 0x01A:
1042 case 0x01C:
1043 case 0x04E:
1044 case 0x041:
1045 return true;
1046 }
Loc Ho4d67e3c2016-01-22 13:47:04 -07001047 } else if (L3C_ELR_ERRWAY(l3celr) == 9)
Loc Ho93474732015-09-23 17:40:59 -07001048 return true;
1049
1050 return false;
1051}
1052
1053static void xgene_edac_l3_check(struct edac_device_ctl_info *edac_dev)
1054{
1055 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1056 u32 l3cesr;
1057 u32 l3celr;
1058 u32 l3caelr;
1059 u32 l3cbelr;
1060
1061 l3cesr = readl(ctx->dev_csr + L3C_ESR);
1062 if (!(l3cesr & (L3C_ESR_UCERR_MASK | L3C_ESR_CERR_MASK)))
1063 return;
1064
1065 if (l3cesr & L3C_ESR_UCERR_MASK)
1066 dev_err(edac_dev->dev, "L3C uncorrectable error\n");
1067 if (l3cesr & L3C_ESR_CERR_MASK)
1068 dev_warn(edac_dev->dev, "L3C correctable error\n");
1069
1070 l3celr = readl(ctx->dev_csr + L3C_ELR);
1071 l3caelr = readl(ctx->dev_csr + L3C_AELR);
1072 l3cbelr = readl(ctx->dev_csr + L3C_BELR);
1073 if (l3cesr & L3C_ESR_MULTIHIT_MASK)
1074 dev_err(edac_dev->dev, "L3C multiple hit error\n");
1075 if (l3cesr & L3C_ESR_UCEVICT_MASK)
1076 dev_err(edac_dev->dev,
1077 "L3C dropped eviction of line with error\n");
1078 if (l3cesr & L3C_ESR_MULTIUCERR_MASK)
1079 dev_err(edac_dev->dev, "L3C multiple uncorrectable error\n");
1080 if (l3cesr & L3C_ESR_DATATAG_MASK)
1081 dev_err(edac_dev->dev,
1082 "L3C data error syndrome 0x%X group 0x%X\n",
1083 L3C_ELR_ERRSYN(l3celr), L3C_ELR_ERRGRP(l3celr));
1084 else
1085 dev_err(edac_dev->dev,
1086 "L3C tag error syndrome 0x%X Way of Tag 0x%X Agent ID 0x%X Operation type 0x%X\n",
1087 L3C_ELR_ERRSYN(l3celr), L3C_ELR_ERRWAY(l3celr),
1088 L3C_ELR_AGENTID(l3celr), L3C_ELR_OPTYPE(l3celr));
1089 /*
1090 * NOTE: Address [41:38] in L3C_ELR_PADDRHIGH(l3celr).
1091 * Address [37:6] in l3caelr. Lower 6 bits are zero.
1092 */
1093 dev_err(edac_dev->dev, "L3C error address 0x%08X.%08X bank %d\n",
1094 L3C_ELR_PADDRHIGH(l3celr) << 6 | (l3caelr >> 26),
1095 (l3caelr & 0x3FFFFFFF) << 6, L3C_BELR_BANK(l3cbelr));
1096 dev_err(edac_dev->dev,
1097 "L3C error status register value 0x%X\n", l3cesr);
1098
1099 /* Clear L3C error interrupt */
1100 writel(0, ctx->dev_csr + L3C_ESR);
1101
1102 if (ctx->version <= 1 &&
1103 xgene_edac_l3_promote_to_uc_err(l3cesr, l3celr)) {
1104 edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1105 return;
1106 }
1107 if (l3cesr & L3C_ESR_CERR_MASK)
1108 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1109 if (l3cesr & L3C_ESR_UCERR_MASK)
1110 edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1111}
1112
1113static void xgene_edac_l3_hw_init(struct edac_device_ctl_info *edac_dev,
1114 bool enable)
1115{
1116 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1117 u32 val;
1118
1119 val = readl(ctx->dev_csr + L3C_ECR);
1120 val |= L3C_UCERREN | L3C_CERREN;
1121 /* On disable, we just disable interrupt but keep error enabled */
1122 if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1123 if (enable)
1124 val |= L3C_ECR_UCINTREN | L3C_ECR_CINTREN;
1125 else
1126 val &= ~(L3C_ECR_UCINTREN | L3C_ECR_CINTREN);
1127 }
1128 writel(val, ctx->dev_csr + L3C_ECR);
1129
1130 if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1131 /* Enable/disable L3 error top level interrupt */
1132 if (enable) {
1133 xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
1134 L3C_UNCORR_ERR_MASK);
1135 xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
1136 L3C_CORR_ERR_MASK);
1137 } else {
1138 xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
1139 L3C_UNCORR_ERR_MASK);
1140 xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
1141 L3C_CORR_ERR_MASK);
1142 }
1143 }
1144}
1145
1146static ssize_t xgene_edac_l3_inject_ctrl_write(struct file *file,
1147 const char __user *data,
1148 size_t count, loff_t *ppos)
1149{
1150 struct edac_device_ctl_info *edac_dev = file->private_data;
1151 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1152
1153 /* Generate all errors */
1154 writel(0xFFFFFFFF, ctx->dev_csr + L3C_ESR);
1155 return count;
1156}
1157
1158static const struct file_operations xgene_edac_l3_debug_inject_fops = {
1159 .open = simple_open,
1160 .write = xgene_edac_l3_inject_ctrl_write,
1161 .llseek = generic_file_llseek
1162};
1163
1164static void
1165xgene_edac_l3_create_debugfs_nodes(struct edac_device_ctl_info *edac_dev)
1166{
1167 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1168 struct dentry *dbgfs_dir;
1169 char name[10];
1170
1171 if (!IS_ENABLED(CONFIG_EDAC_DEBUG) || !ctx->edac->dfs)
1172 return;
1173
1174 snprintf(name, sizeof(name), "l3c%d", ctx->edac_idx);
1175 dbgfs_dir = edac_debugfs_create_dir_at(name, ctx->edac->dfs);
1176 if (!dbgfs_dir)
1177 return;
1178
1179 debugfs_create_file("l3_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
1180 &xgene_edac_l3_debug_inject_fops);
1181}
1182
1183static int xgene_edac_l3_add(struct xgene_edac *edac, struct device_node *np,
1184 int version)
1185{
1186 struct edac_device_ctl_info *edac_dev;
1187 struct xgene_edac_dev_ctx *ctx;
1188 struct resource res;
1189 void __iomem *dev_csr;
1190 int edac_idx;
1191 int rc = 0;
1192
1193 if (!devres_open_group(edac->dev, xgene_edac_l3_add, GFP_KERNEL))
1194 return -ENOMEM;
1195
1196 rc = of_address_to_resource(np, 0, &res);
1197 if (rc < 0) {
1198 dev_err(edac->dev, "no L3 resource address\n");
1199 goto err_release_group;
1200 }
1201 dev_csr = devm_ioremap_resource(edac->dev, &res);
1202 if (IS_ERR(dev_csr)) {
1203 dev_err(edac->dev,
1204 "devm_ioremap_resource failed for L3 resource address\n");
1205 rc = PTR_ERR(dev_csr);
1206 goto err_release_group;
1207 }
1208
1209 edac_idx = edac_device_alloc_index();
1210 edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
1211 "l3c", 1, "l3c", 1, 0, NULL, 0,
1212 edac_idx);
1213 if (!edac_dev) {
1214 rc = -ENOMEM;
1215 goto err_release_group;
1216 }
1217
1218 ctx = edac_dev->pvt_info;
1219 ctx->dev_csr = dev_csr;
1220 ctx->name = "xgene_l3_err";
1221 ctx->edac_idx = edac_idx;
1222 ctx->edac = edac;
1223 ctx->edac_dev = edac_dev;
1224 ctx->ddev = *edac->dev;
1225 ctx->version = version;
1226 edac_dev->dev = &ctx->ddev;
1227 edac_dev->ctl_name = ctx->name;
1228 edac_dev->dev_name = ctx->name;
1229 edac_dev->mod_name = EDAC_MOD_STR;
1230
1231 if (edac_op_state == EDAC_OPSTATE_POLL)
1232 edac_dev->edac_check = xgene_edac_l3_check;
1233
1234 xgene_edac_l3_create_debugfs_nodes(edac_dev);
1235
1236 rc = edac_device_add_device(edac_dev);
1237 if (rc > 0) {
1238 dev_err(edac->dev, "failed edac_device_add_device()\n");
1239 rc = -ENOMEM;
1240 goto err_ctl_free;
1241 }
1242
1243 if (edac_op_state == EDAC_OPSTATE_INT)
1244 edac_dev->op_state = OP_RUNNING_INTERRUPT;
1245
1246 list_add(&ctx->next, &edac->l3s);
1247
1248 xgene_edac_l3_hw_init(edac_dev, 1);
1249
1250 devres_remove_group(edac->dev, xgene_edac_l3_add);
1251
1252 dev_info(edac->dev, "X-Gene EDAC L3 registered\n");
1253 return 0;
1254
1255err_ctl_free:
1256 edac_device_free_ctl_info(edac_dev);
1257err_release_group:
1258 devres_release_group(edac->dev, xgene_edac_l3_add);
1259 return rc;
1260}
1261
1262static int xgene_edac_l3_remove(struct xgene_edac_dev_ctx *l3)
1263{
1264 struct edac_device_ctl_info *edac_dev = l3->edac_dev;
1265
1266 xgene_edac_l3_hw_init(edac_dev, 0);
1267 edac_device_del_device(l3->edac->dev);
1268 edac_device_free_ctl_info(edac_dev);
1269 return 0;
1270}
1271
Loc Hof864b792015-09-23 17:41:00 -07001272/* SoC error device */
1273#define IOBAXIS0TRANSERRINTSTS 0x0000
1274#define IOBAXIS0_M_ILLEGAL_ACCESS_MASK BIT(1)
1275#define IOBAXIS0_ILLEGAL_ACCESS_MASK BIT(0)
1276#define IOBAXIS0TRANSERRINTMSK 0x0004
1277#define IOBAXIS0TRANSERRREQINFOL 0x0008
1278#define IOBAXIS0TRANSERRREQINFOH 0x000c
1279#define REQTYPE_RD(src) (((src) & BIT(0)))
1280#define ERRADDRH_RD(src) (((src) & 0xffc00000) >> 22)
1281#define IOBAXIS1TRANSERRINTSTS 0x0010
1282#define IOBAXIS1TRANSERRINTMSK 0x0014
1283#define IOBAXIS1TRANSERRREQINFOL 0x0018
1284#define IOBAXIS1TRANSERRREQINFOH 0x001c
1285#define IOBPATRANSERRINTSTS 0x0020
1286#define IOBPA_M_REQIDRAM_CORRUPT_MASK BIT(7)
1287#define IOBPA_REQIDRAM_CORRUPT_MASK BIT(6)
1288#define IOBPA_M_TRANS_CORRUPT_MASK BIT(5)
1289#define IOBPA_TRANS_CORRUPT_MASK BIT(4)
1290#define IOBPA_M_WDATA_CORRUPT_MASK BIT(3)
1291#define IOBPA_WDATA_CORRUPT_MASK BIT(2)
1292#define IOBPA_M_RDATA_CORRUPT_MASK BIT(1)
1293#define IOBPA_RDATA_CORRUPT_MASK BIT(0)
1294#define IOBBATRANSERRINTSTS 0x0030
1295#define M_ILLEGAL_ACCESS_MASK BIT(15)
1296#define ILLEGAL_ACCESS_MASK BIT(14)
1297#define M_WIDRAM_CORRUPT_MASK BIT(13)
1298#define WIDRAM_CORRUPT_MASK BIT(12)
1299#define M_RIDRAM_CORRUPT_MASK BIT(11)
1300#define RIDRAM_CORRUPT_MASK BIT(10)
1301#define M_TRANS_CORRUPT_MASK BIT(9)
1302#define TRANS_CORRUPT_MASK BIT(8)
1303#define M_WDATA_CORRUPT_MASK BIT(7)
1304#define WDATA_CORRUPT_MASK BIT(6)
1305#define M_RBM_POISONED_REQ_MASK BIT(5)
1306#define RBM_POISONED_REQ_MASK BIT(4)
1307#define M_XGIC_POISONED_REQ_MASK BIT(3)
1308#define XGIC_POISONED_REQ_MASK BIT(2)
1309#define M_WRERR_RESP_MASK BIT(1)
1310#define WRERR_RESP_MASK BIT(0)
1311#define IOBBATRANSERRREQINFOL 0x0038
1312#define IOBBATRANSERRREQINFOH 0x003c
1313#define REQTYPE_F2_RD(src) ((src) & BIT(0))
1314#define ERRADDRH_F2_RD(src) (((src) & 0xffc00000) >> 22)
1315#define IOBBATRANSERRCSWREQID 0x0040
1316#define XGICTRANSERRINTSTS 0x0050
1317#define M_WR_ACCESS_ERR_MASK BIT(3)
1318#define WR_ACCESS_ERR_MASK BIT(2)
1319#define M_RD_ACCESS_ERR_MASK BIT(1)
1320#define RD_ACCESS_ERR_MASK BIT(0)
1321#define XGICTRANSERRINTMSK 0x0054
1322#define XGICTRANSERRREQINFO 0x0058
1323#define REQTYPE_MASK BIT(26)
1324#define ERRADDR_RD(src) ((src) & 0x03ffffff)
1325#define GLBL_ERR_STS 0x0800
1326#define MDED_ERR_MASK BIT(3)
1327#define DED_ERR_MASK BIT(2)
1328#define MSEC_ERR_MASK BIT(1)
1329#define SEC_ERR_MASK BIT(0)
1330#define GLBL_SEC_ERRL 0x0810
1331#define GLBL_SEC_ERRH 0x0818
1332#define GLBL_MSEC_ERRL 0x0820
1333#define GLBL_MSEC_ERRH 0x0828
1334#define GLBL_DED_ERRL 0x0830
1335#define GLBL_DED_ERRLMASK 0x0834
1336#define GLBL_DED_ERRH 0x0838
1337#define GLBL_DED_ERRHMASK 0x083c
1338#define GLBL_MDED_ERRL 0x0840
1339#define GLBL_MDED_ERRLMASK 0x0844
1340#define GLBL_MDED_ERRH 0x0848
1341#define GLBL_MDED_ERRHMASK 0x084c
1342
Loc Ho4d67e3c2016-01-22 13:47:04 -07001343/* IO Bus Registers */
1344#define RBCSR 0x0000
1345#define STICKYERR_MASK BIT(0)
1346#define RBEIR 0x0008
1347#define AGENT_OFFLINE_ERR_MASK BIT(30)
1348#define UNIMPL_RBPAGE_ERR_MASK BIT(29)
1349#define WORD_ALIGNED_ERR_MASK BIT(28)
1350#define PAGE_ACCESS_ERR_MASK BIT(27)
1351#define WRITE_ACCESS_MASK BIT(26)
1352#define RBERRADDR_RD(src) ((src) & 0x03FFFFFF)
1353
Loc Hof864b792015-09-23 17:41:00 -07001354static const char * const soc_mem_err_v1[] = {
1355 "10GbE0",
1356 "10GbE1",
1357 "Security",
1358 "SATA45",
1359 "SATA23/ETH23",
1360 "SATA01/ETH01",
1361 "USB1",
1362 "USB0",
1363 "QML",
1364 "QM0",
1365 "QM1 (XGbE01)",
1366 "PCIE4",
1367 "PCIE3",
1368 "PCIE2",
1369 "PCIE1",
1370 "PCIE0",
1371 "CTX Manager",
1372 "OCM",
1373 "1GbE",
1374 "CLE",
1375 "AHBC",
1376 "PktDMA",
1377 "GFC",
1378 "MSLIM",
1379 "10GbE2",
1380 "10GbE3",
1381 "QM2 (XGbE23)",
1382 "IOB",
1383 "unknown",
1384 "unknown",
1385 "unknown",
1386 "unknown",
1387};
1388
1389static void xgene_edac_iob_gic_report(struct edac_device_ctl_info *edac_dev)
1390{
1391 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1392 u32 err_addr_lo;
1393 u32 err_addr_hi;
1394 u32 reg;
1395 u32 info;
1396
1397 /* GIC transaction error interrupt */
1398 reg = readl(ctx->dev_csr + XGICTRANSERRINTSTS);
1399 if (!reg)
1400 goto chk_iob_err;
1401 dev_err(edac_dev->dev, "XGIC transaction error\n");
1402 if (reg & RD_ACCESS_ERR_MASK)
1403 dev_err(edac_dev->dev, "XGIC read size error\n");
1404 if (reg & M_RD_ACCESS_ERR_MASK)
1405 dev_err(edac_dev->dev, "Multiple XGIC read size error\n");
1406 if (reg & WR_ACCESS_ERR_MASK)
1407 dev_err(edac_dev->dev, "XGIC write size error\n");
1408 if (reg & M_WR_ACCESS_ERR_MASK)
1409 dev_err(edac_dev->dev, "Multiple XGIC write size error\n");
1410 info = readl(ctx->dev_csr + XGICTRANSERRREQINFO);
1411 dev_err(edac_dev->dev, "XGIC %s access @ 0x%08X (0x%08X)\n",
1412 info & REQTYPE_MASK ? "read" : "write", ERRADDR_RD(info),
1413 info);
1414 writel(reg, ctx->dev_csr + XGICTRANSERRINTSTS);
1415
1416chk_iob_err:
1417 /* IOB memory error */
1418 reg = readl(ctx->dev_csr + GLBL_ERR_STS);
1419 if (!reg)
1420 return;
1421 if (reg & SEC_ERR_MASK) {
1422 err_addr_lo = readl(ctx->dev_csr + GLBL_SEC_ERRL);
1423 err_addr_hi = readl(ctx->dev_csr + GLBL_SEC_ERRH);
1424 dev_err(edac_dev->dev,
1425 "IOB single-bit correctable memory at 0x%08X.%08X error\n",
1426 err_addr_lo, err_addr_hi);
1427 writel(err_addr_lo, ctx->dev_csr + GLBL_SEC_ERRL);
1428 writel(err_addr_hi, ctx->dev_csr + GLBL_SEC_ERRH);
1429 }
1430 if (reg & MSEC_ERR_MASK) {
1431 err_addr_lo = readl(ctx->dev_csr + GLBL_MSEC_ERRL);
1432 err_addr_hi = readl(ctx->dev_csr + GLBL_MSEC_ERRH);
1433 dev_err(edac_dev->dev,
1434 "IOB multiple single-bit correctable memory at 0x%08X.%08X error\n",
1435 err_addr_lo, err_addr_hi);
1436 writel(err_addr_lo, ctx->dev_csr + GLBL_MSEC_ERRL);
1437 writel(err_addr_hi, ctx->dev_csr + GLBL_MSEC_ERRH);
1438 }
1439 if (reg & (SEC_ERR_MASK | MSEC_ERR_MASK))
1440 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1441
1442 if (reg & DED_ERR_MASK) {
1443 err_addr_lo = readl(ctx->dev_csr + GLBL_DED_ERRL);
1444 err_addr_hi = readl(ctx->dev_csr + GLBL_DED_ERRH);
1445 dev_err(edac_dev->dev,
1446 "IOB double-bit uncorrectable memory at 0x%08X.%08X error\n",
1447 err_addr_lo, err_addr_hi);
1448 writel(err_addr_lo, ctx->dev_csr + GLBL_DED_ERRL);
1449 writel(err_addr_hi, ctx->dev_csr + GLBL_DED_ERRH);
1450 }
1451 if (reg & MDED_ERR_MASK) {
1452 err_addr_lo = readl(ctx->dev_csr + GLBL_MDED_ERRL);
1453 err_addr_hi = readl(ctx->dev_csr + GLBL_MDED_ERRH);
1454 dev_err(edac_dev->dev,
1455 "Multiple IOB double-bit uncorrectable memory at 0x%08X.%08X error\n",
1456 err_addr_lo, err_addr_hi);
1457 writel(err_addr_lo, ctx->dev_csr + GLBL_MDED_ERRL);
1458 writel(err_addr_hi, ctx->dev_csr + GLBL_MDED_ERRH);
1459 }
1460 if (reg & (DED_ERR_MASK | MDED_ERR_MASK))
1461 edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1462}
1463
1464static void xgene_edac_rb_report(struct edac_device_ctl_info *edac_dev)
1465{
1466 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1467 u32 err_addr_lo;
1468 u32 err_addr_hi;
1469 u32 reg;
1470
Loc Ho4d67e3c2016-01-22 13:47:04 -07001471 /* If the register bus resource isn't available, just skip it */
1472 if (!ctx->edac->rb_map)
1473 goto rb_skip;
1474
1475 /*
1476 * Check RB access errors
1477 * 1. Out of range
1478 * 2. Un-implemented page
1479 * 3. Un-aligned access
1480 * 4. Offline slave IP
1481 */
1482 if (regmap_read(ctx->edac->rb_map, RBCSR, &reg))
1483 return;
1484 if (reg & STICKYERR_MASK) {
1485 bool write;
1486 u32 address;
1487
1488 dev_err(edac_dev->dev, "IOB bus access error(s)\n");
1489 if (regmap_read(ctx->edac->rb_map, RBEIR, &reg))
1490 return;
1491 write = reg & WRITE_ACCESS_MASK ? 1 : 0;
1492 address = RBERRADDR_RD(reg);
1493 if (reg & AGENT_OFFLINE_ERR_MASK)
1494 dev_err(edac_dev->dev,
1495 "IOB bus %s access to offline agent error\n",
1496 write ? "write" : "read");
1497 if (reg & UNIMPL_RBPAGE_ERR_MASK)
1498 dev_err(edac_dev->dev,
1499 "IOB bus %s access to unimplemented page error\n",
1500 write ? "write" : "read");
1501 if (reg & WORD_ALIGNED_ERR_MASK)
1502 dev_err(edac_dev->dev,
1503 "IOB bus %s word aligned access error\n",
1504 write ? "write" : "read");
1505 if (reg & PAGE_ACCESS_ERR_MASK)
1506 dev_err(edac_dev->dev,
1507 "IOB bus %s to page out of range access error\n",
1508 write ? "write" : "read");
1509 if (regmap_write(ctx->edac->rb_map, RBEIR, 0))
1510 return;
1511 if (regmap_write(ctx->edac->rb_map, RBCSR, 0))
1512 return;
1513 }
1514rb_skip:
1515
Loc Hof864b792015-09-23 17:41:00 -07001516 /* IOB Bridge agent transaction error interrupt */
1517 reg = readl(ctx->dev_csr + IOBBATRANSERRINTSTS);
1518 if (!reg)
1519 return;
1520
1521 dev_err(edac_dev->dev, "IOB bridge agent (BA) transaction error\n");
1522 if (reg & WRERR_RESP_MASK)
1523 dev_err(edac_dev->dev, "IOB BA write response error\n");
1524 if (reg & M_WRERR_RESP_MASK)
1525 dev_err(edac_dev->dev,
1526 "Multiple IOB BA write response error\n");
1527 if (reg & XGIC_POISONED_REQ_MASK)
1528 dev_err(edac_dev->dev, "IOB BA XGIC poisoned write error\n");
1529 if (reg & M_XGIC_POISONED_REQ_MASK)
1530 dev_err(edac_dev->dev,
1531 "Multiple IOB BA XGIC poisoned write error\n");
1532 if (reg & RBM_POISONED_REQ_MASK)
1533 dev_err(edac_dev->dev, "IOB BA RBM poisoned write error\n");
1534 if (reg & M_RBM_POISONED_REQ_MASK)
1535 dev_err(edac_dev->dev,
1536 "Multiple IOB BA RBM poisoned write error\n");
1537 if (reg & WDATA_CORRUPT_MASK)
1538 dev_err(edac_dev->dev, "IOB BA write error\n");
1539 if (reg & M_WDATA_CORRUPT_MASK)
1540 dev_err(edac_dev->dev, "Multiple IOB BA write error\n");
1541 if (reg & TRANS_CORRUPT_MASK)
1542 dev_err(edac_dev->dev, "IOB BA transaction error\n");
1543 if (reg & M_TRANS_CORRUPT_MASK)
1544 dev_err(edac_dev->dev, "Multiple IOB BA transaction error\n");
1545 if (reg & RIDRAM_CORRUPT_MASK)
1546 dev_err(edac_dev->dev,
1547 "IOB BA RDIDRAM read transaction ID error\n");
1548 if (reg & M_RIDRAM_CORRUPT_MASK)
1549 dev_err(edac_dev->dev,
1550 "Multiple IOB BA RDIDRAM read transaction ID error\n");
1551 if (reg & WIDRAM_CORRUPT_MASK)
1552 dev_err(edac_dev->dev,
1553 "IOB BA RDIDRAM write transaction ID error\n");
1554 if (reg & M_WIDRAM_CORRUPT_MASK)
1555 dev_err(edac_dev->dev,
1556 "Multiple IOB BA RDIDRAM write transaction ID error\n");
1557 if (reg & ILLEGAL_ACCESS_MASK)
1558 dev_err(edac_dev->dev,
1559 "IOB BA XGIC/RB illegal access error\n");
1560 if (reg & M_ILLEGAL_ACCESS_MASK)
1561 dev_err(edac_dev->dev,
1562 "Multiple IOB BA XGIC/RB illegal access error\n");
1563
1564 err_addr_lo = readl(ctx->dev_csr + IOBBATRANSERRREQINFOL);
1565 err_addr_hi = readl(ctx->dev_csr + IOBBATRANSERRREQINFOH);
1566 dev_err(edac_dev->dev, "IOB BA %s access at 0x%02X.%08X (0x%08X)\n",
1567 REQTYPE_F2_RD(err_addr_hi) ? "read" : "write",
1568 ERRADDRH_F2_RD(err_addr_hi), err_addr_lo, err_addr_hi);
1569 if (reg & WRERR_RESP_MASK)
1570 dev_err(edac_dev->dev, "IOB BA requestor ID 0x%08X\n",
1571 readl(ctx->dev_csr + IOBBATRANSERRCSWREQID));
1572 writel(reg, ctx->dev_csr + IOBBATRANSERRINTSTS);
1573}
1574
1575static void xgene_edac_pa_report(struct edac_device_ctl_info *edac_dev)
1576{
1577 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1578 u32 err_addr_lo;
1579 u32 err_addr_hi;
1580 u32 reg;
1581
1582 /* IOB Processing agent transaction error interrupt */
1583 reg = readl(ctx->dev_csr + IOBPATRANSERRINTSTS);
1584 if (!reg)
1585 goto chk_iob_axi0;
Colin Ian King4bd035e2017-02-23 00:26:09 +00001586 dev_err(edac_dev->dev, "IOB processing agent (PA) transaction error\n");
Loc Hof864b792015-09-23 17:41:00 -07001587 if (reg & IOBPA_RDATA_CORRUPT_MASK)
1588 dev_err(edac_dev->dev, "IOB PA read data RAM error\n");
1589 if (reg & IOBPA_M_RDATA_CORRUPT_MASK)
1590 dev_err(edac_dev->dev,
Colin Ian King81761702016-11-14 23:11:04 +00001591 "Multiple IOB PA read data RAM error\n");
Loc Hof864b792015-09-23 17:41:00 -07001592 if (reg & IOBPA_WDATA_CORRUPT_MASK)
1593 dev_err(edac_dev->dev, "IOB PA write data RAM error\n");
1594 if (reg & IOBPA_M_WDATA_CORRUPT_MASK)
1595 dev_err(edac_dev->dev,
Colin Ian King81761702016-11-14 23:11:04 +00001596 "Multiple IOB PA write data RAM error\n");
Loc Hof864b792015-09-23 17:41:00 -07001597 if (reg & IOBPA_TRANS_CORRUPT_MASK)
1598 dev_err(edac_dev->dev, "IOB PA transaction error\n");
1599 if (reg & IOBPA_M_TRANS_CORRUPT_MASK)
Colin Ian King81761702016-11-14 23:11:04 +00001600 dev_err(edac_dev->dev, "Multiple IOB PA transaction error\n");
Loc Hof864b792015-09-23 17:41:00 -07001601 if (reg & IOBPA_REQIDRAM_CORRUPT_MASK)
1602 dev_err(edac_dev->dev, "IOB PA transaction ID RAM error\n");
1603 if (reg & IOBPA_M_REQIDRAM_CORRUPT_MASK)
1604 dev_err(edac_dev->dev,
1605 "Multiple IOB PA transaction ID RAM error\n");
1606 writel(reg, ctx->dev_csr + IOBPATRANSERRINTSTS);
1607
1608chk_iob_axi0:
1609 /* IOB AXI0 Error */
1610 reg = readl(ctx->dev_csr + IOBAXIS0TRANSERRINTSTS);
1611 if (!reg)
1612 goto chk_iob_axi1;
1613 err_addr_lo = readl(ctx->dev_csr + IOBAXIS0TRANSERRREQINFOL);
1614 err_addr_hi = readl(ctx->dev_csr + IOBAXIS0TRANSERRREQINFOH);
1615 dev_err(edac_dev->dev,
1616 "%sAXI slave 0 illegal %s access @ 0x%02X.%08X (0x%08X)\n",
1617 reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "",
1618 REQTYPE_RD(err_addr_hi) ? "read" : "write",
1619 ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi);
1620 writel(reg, ctx->dev_csr + IOBAXIS0TRANSERRINTSTS);
1621
1622chk_iob_axi1:
1623 /* IOB AXI1 Error */
1624 reg = readl(ctx->dev_csr + IOBAXIS1TRANSERRINTSTS);
1625 if (!reg)
1626 return;
1627 err_addr_lo = readl(ctx->dev_csr + IOBAXIS1TRANSERRREQINFOL);
1628 err_addr_hi = readl(ctx->dev_csr + IOBAXIS1TRANSERRREQINFOH);
1629 dev_err(edac_dev->dev,
1630 "%sAXI slave 1 illegal %s access @ 0x%02X.%08X (0x%08X)\n",
1631 reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "",
1632 REQTYPE_RD(err_addr_hi) ? "read" : "write",
1633 ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi);
1634 writel(reg, ctx->dev_csr + IOBAXIS1TRANSERRINTSTS);
1635}
1636
1637static void xgene_edac_soc_check(struct edac_device_ctl_info *edac_dev)
1638{
1639 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1640 const char * const *soc_mem_err = NULL;
1641 u32 pcp_hp_stat;
1642 u32 pcp_lp_stat;
1643 u32 reg;
1644 int i;
1645
1646 xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
1647 xgene_edac_pcp_rd(ctx->edac, PCPLPERRINTSTS, &pcp_lp_stat);
1648 xgene_edac_pcp_rd(ctx->edac, MEMERRINTSTS, &reg);
1649 if (!((pcp_hp_stat & (IOB_PA_ERR_MASK | IOB_BA_ERR_MASK |
1650 IOB_XGIC_ERR_MASK | IOB_RB_ERR_MASK)) ||
1651 (pcp_lp_stat & CSW_SWITCH_TRACE_ERR_MASK) || reg))
1652 return;
1653
1654 if (pcp_hp_stat & IOB_XGIC_ERR_MASK)
1655 xgene_edac_iob_gic_report(edac_dev);
1656
1657 if (pcp_hp_stat & (IOB_RB_ERR_MASK | IOB_BA_ERR_MASK))
1658 xgene_edac_rb_report(edac_dev);
1659
1660 if (pcp_hp_stat & IOB_PA_ERR_MASK)
1661 xgene_edac_pa_report(edac_dev);
1662
1663 if (pcp_lp_stat & CSW_SWITCH_TRACE_ERR_MASK) {
1664 dev_info(edac_dev->dev,
1665 "CSW switch trace correctable memory parity error\n");
1666 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1667 }
1668
1669 if (!reg)
1670 return;
1671 if (ctx->version == 1)
1672 soc_mem_err = soc_mem_err_v1;
1673 if (!soc_mem_err) {
1674 dev_err(edac_dev->dev, "SoC memory parity error 0x%08X\n",
1675 reg);
1676 edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1677 return;
1678 }
1679 for (i = 0; i < 31; i++) {
1680 if (reg & (1 << i)) {
1681 dev_err(edac_dev->dev, "%s memory parity error\n",
1682 soc_mem_err[i]);
1683 edac_device_handle_ue(edac_dev, 0, 0,
1684 edac_dev->ctl_name);
1685 }
1686 }
1687}
1688
1689static void xgene_edac_soc_hw_init(struct edac_device_ctl_info *edac_dev,
1690 bool enable)
1691{
1692 struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1693
1694 /* Enable SoC IP error interrupt */
1695 if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1696 if (enable) {
1697 xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
1698 IOB_PA_ERR_MASK |
1699 IOB_BA_ERR_MASK |
1700 IOB_XGIC_ERR_MASK |
1701 IOB_RB_ERR_MASK);
1702 xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
1703 CSW_SWITCH_TRACE_ERR_MASK);
1704 } else {
1705 xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
1706 IOB_PA_ERR_MASK |
1707 IOB_BA_ERR_MASK |
1708 IOB_XGIC_ERR_MASK |
1709 IOB_RB_ERR_MASK);
1710 xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
1711 CSW_SWITCH_TRACE_ERR_MASK);
1712 }
1713
1714 writel(enable ? 0x0 : 0xFFFFFFFF,
1715 ctx->dev_csr + IOBAXIS0TRANSERRINTMSK);
1716 writel(enable ? 0x0 : 0xFFFFFFFF,
1717 ctx->dev_csr + IOBAXIS1TRANSERRINTMSK);
1718 writel(enable ? 0x0 : 0xFFFFFFFF,
1719 ctx->dev_csr + XGICTRANSERRINTMSK);
1720
1721 xgene_edac_pcp_setbits(ctx->edac, MEMERRINTMSK,
1722 enable ? 0x0 : 0xFFFFFFFF);
1723 }
1724}
1725
1726static int xgene_edac_soc_add(struct xgene_edac *edac, struct device_node *np,
1727 int version)
1728{
1729 struct edac_device_ctl_info *edac_dev;
1730 struct xgene_edac_dev_ctx *ctx;
1731 void __iomem *dev_csr;
1732 struct resource res;
1733 int edac_idx;
1734 int rc;
1735
1736 if (!devres_open_group(edac->dev, xgene_edac_soc_add, GFP_KERNEL))
1737 return -ENOMEM;
1738
1739 rc = of_address_to_resource(np, 0, &res);
1740 if (rc < 0) {
1741 dev_err(edac->dev, "no SoC resource address\n");
1742 goto err_release_group;
1743 }
1744 dev_csr = devm_ioremap_resource(edac->dev, &res);
1745 if (IS_ERR(dev_csr)) {
1746 dev_err(edac->dev,
1747 "devm_ioremap_resource failed for soc resource address\n");
1748 rc = PTR_ERR(dev_csr);
1749 goto err_release_group;
1750 }
1751
1752 edac_idx = edac_device_alloc_index();
1753 edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
1754 "SOC", 1, "SOC", 1, 2, NULL, 0,
1755 edac_idx);
1756 if (!edac_dev) {
1757 rc = -ENOMEM;
1758 goto err_release_group;
1759 }
1760
1761 ctx = edac_dev->pvt_info;
1762 ctx->dev_csr = dev_csr;
1763 ctx->name = "xgene_soc_err";
1764 ctx->edac_idx = edac_idx;
1765 ctx->edac = edac;
1766 ctx->edac_dev = edac_dev;
1767 ctx->ddev = *edac->dev;
1768 ctx->version = version;
1769 edac_dev->dev = &ctx->ddev;
1770 edac_dev->ctl_name = ctx->name;
1771 edac_dev->dev_name = ctx->name;
1772 edac_dev->mod_name = EDAC_MOD_STR;
1773
1774 if (edac_op_state == EDAC_OPSTATE_POLL)
1775 edac_dev->edac_check = xgene_edac_soc_check;
1776
1777 rc = edac_device_add_device(edac_dev);
1778 if (rc > 0) {
1779 dev_err(edac->dev, "failed edac_device_add_device()\n");
1780 rc = -ENOMEM;
1781 goto err_ctl_free;
1782 }
1783
1784 if (edac_op_state == EDAC_OPSTATE_INT)
1785 edac_dev->op_state = OP_RUNNING_INTERRUPT;
1786
1787 list_add(&ctx->next, &edac->socs);
1788
1789 xgene_edac_soc_hw_init(edac_dev, 1);
1790
1791 devres_remove_group(edac->dev, xgene_edac_soc_add);
1792
1793 dev_info(edac->dev, "X-Gene EDAC SoC registered\n");
1794
1795 return 0;
1796
1797err_ctl_free:
1798 edac_device_free_ctl_info(edac_dev);
1799err_release_group:
1800 devres_release_group(edac->dev, xgene_edac_soc_add);
1801 return rc;
1802}
1803
1804static int xgene_edac_soc_remove(struct xgene_edac_dev_ctx *soc)
1805{
1806 struct edac_device_ctl_info *edac_dev = soc->edac_dev;
1807
1808 xgene_edac_soc_hw_init(edac_dev, 0);
1809 edac_device_del_device(soc->edac->dev);
1810 edac_device_free_ctl_info(edac_dev);
1811 return 0;
1812}
1813
Loc Ho0d442932015-05-22 17:32:59 -06001814static irqreturn_t xgene_edac_isr(int irq, void *dev_id)
1815{
1816 struct xgene_edac *ctx = dev_id;
1817 struct xgene_edac_pmd_ctx *pmd;
Loc Ho93474732015-09-23 17:40:59 -07001818 struct xgene_edac_dev_ctx *node;
Loc Ho0d442932015-05-22 17:32:59 -06001819 unsigned int pcp_hp_stat;
1820 unsigned int pcp_lp_stat;
1821
1822 xgene_edac_pcp_rd(ctx, PCPHPERRINTSTS, &pcp_hp_stat);
1823 xgene_edac_pcp_rd(ctx, PCPLPERRINTSTS, &pcp_lp_stat);
1824 if ((MCU_UNCORR_ERR_MASK & pcp_hp_stat) ||
1825 (MCU_CTL_ERR_MASK & pcp_hp_stat) ||
1826 (MCU_CORR_ERR_MASK & pcp_lp_stat)) {
1827 struct xgene_edac_mc_ctx *mcu;
1828
Loc Ho93474732015-09-23 17:40:59 -07001829 list_for_each_entry(mcu, &ctx->mcus, next)
Loc Ho0d442932015-05-22 17:32:59 -06001830 xgene_edac_mc_check(mcu->mci);
Loc Ho0d442932015-05-22 17:32:59 -06001831 }
1832
1833 list_for_each_entry(pmd, &ctx->pmds, next) {
1834 if ((PMD0_MERR_MASK << pmd->pmd) & pcp_hp_stat)
1835 xgene_edac_pmd_check(pmd->edac_dev);
1836 }
1837
Loc Ho93474732015-09-23 17:40:59 -07001838 list_for_each_entry(node, &ctx->l3s, next)
1839 xgene_edac_l3_check(node->edac_dev);
1840
Loc Hof864b792015-09-23 17:41:00 -07001841 list_for_each_entry(node, &ctx->socs, next)
1842 xgene_edac_soc_check(node->edac_dev);
1843
Loc Ho0d442932015-05-22 17:32:59 -06001844 return IRQ_HANDLED;
1845}
1846
1847static int xgene_edac_probe(struct platform_device *pdev)
1848{
1849 struct xgene_edac *edac;
1850 struct device_node *child;
1851 struct resource *res;
1852 int rc;
1853
1854 edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL);
1855 if (!edac)
1856 return -ENOMEM;
1857
1858 edac->dev = &pdev->dev;
1859 platform_set_drvdata(pdev, edac);
1860 INIT_LIST_HEAD(&edac->mcus);
1861 INIT_LIST_HEAD(&edac->pmds);
Loc Ho93474732015-09-23 17:40:59 -07001862 INIT_LIST_HEAD(&edac->l3s);
Loc Hof864b792015-09-23 17:41:00 -07001863 INIT_LIST_HEAD(&edac->socs);
Loc Ho0d442932015-05-22 17:32:59 -06001864 spin_lock_init(&edac->lock);
1865 mutex_init(&edac->mc_lock);
1866
1867 edac->csw_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1868 "regmap-csw");
1869 if (IS_ERR(edac->csw_map)) {
1870 dev_err(edac->dev, "unable to get syscon regmap csw\n");
1871 rc = PTR_ERR(edac->csw_map);
1872 goto out_err;
1873 }
1874
1875 edac->mcba_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1876 "regmap-mcba");
1877 if (IS_ERR(edac->mcba_map)) {
1878 dev_err(edac->dev, "unable to get syscon regmap mcba\n");
1879 rc = PTR_ERR(edac->mcba_map);
1880 goto out_err;
1881 }
1882
1883 edac->mcbb_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1884 "regmap-mcbb");
1885 if (IS_ERR(edac->mcbb_map)) {
1886 dev_err(edac->dev, "unable to get syscon regmap mcbb\n");
1887 rc = PTR_ERR(edac->mcbb_map);
1888 goto out_err;
1889 }
1890 edac->efuse_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1891 "regmap-efuse");
1892 if (IS_ERR(edac->efuse_map)) {
1893 dev_err(edac->dev, "unable to get syscon regmap efuse\n");
1894 rc = PTR_ERR(edac->efuse_map);
1895 goto out_err;
1896 }
1897
Loc Ho4d67e3c2016-01-22 13:47:04 -07001898 /*
1899 * NOTE: The register bus resource is optional for compatibility
1900 * reason.
1901 */
1902 edac->rb_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1903 "regmap-rb");
1904 if (IS_ERR(edac->rb_map)) {
1905 dev_warn(edac->dev, "missing syscon regmap rb\n");
1906 edac->rb_map = NULL;
1907 }
1908
Loc Ho0d442932015-05-22 17:32:59 -06001909 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1910 edac->pcp_csr = devm_ioremap_resource(&pdev->dev, res);
1911 if (IS_ERR(edac->pcp_csr)) {
1912 dev_err(&pdev->dev, "no PCP resource address\n");
1913 rc = PTR_ERR(edac->pcp_csr);
1914 goto out_err;
1915 }
1916
1917 if (edac_op_state == EDAC_OPSTATE_INT) {
1918 int irq;
1919 int i;
1920
1921 for (i = 0; i < 3; i++) {
1922 irq = platform_get_irq(pdev, i);
1923 if (irq < 0) {
1924 dev_err(&pdev->dev, "No IRQ resource\n");
1925 rc = -EINVAL;
1926 goto out_err;
1927 }
1928 rc = devm_request_irq(&pdev->dev, irq,
1929 xgene_edac_isr, IRQF_SHARED,
1930 dev_name(&pdev->dev), edac);
1931 if (rc) {
1932 dev_err(&pdev->dev,
1933 "Could not request IRQ %d\n", irq);
1934 goto out_err;
1935 }
1936 }
1937 }
1938
Loc Ho93474732015-09-23 17:40:59 -07001939 edac->dfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
1940
Loc Ho0d442932015-05-22 17:32:59 -06001941 for_each_child_of_node(pdev->dev.of_node, child) {
1942 if (!of_device_is_available(child))
1943 continue;
1944 if (of_device_is_compatible(child, "apm,xgene-edac-mc"))
1945 xgene_edac_mc_add(edac, child);
1946 if (of_device_is_compatible(child, "apm,xgene-edac-pmd"))
Arnd Bergmann451bb7f2015-06-01 16:09:35 -06001947 xgene_edac_pmd_add(edac, child, 1);
1948 if (of_device_is_compatible(child, "apm,xgene-edac-pmd-v2"))
1949 xgene_edac_pmd_add(edac, child, 2);
Loc Ho93474732015-09-23 17:40:59 -07001950 if (of_device_is_compatible(child, "apm,xgene-edac-l3"))
1951 xgene_edac_l3_add(edac, child, 1);
1952 if (of_device_is_compatible(child, "apm,xgene-edac-l3-v2"))
1953 xgene_edac_l3_add(edac, child, 2);
Loc Hof864b792015-09-23 17:41:00 -07001954 if (of_device_is_compatible(child, "apm,xgene-edac-soc"))
1955 xgene_edac_soc_add(edac, child, 0);
1956 if (of_device_is_compatible(child, "apm,xgene-edac-soc-v1"))
1957 xgene_edac_soc_add(edac, child, 1);
Loc Ho0d442932015-05-22 17:32:59 -06001958 }
1959
1960 return 0;
1961
1962out_err:
1963 return rc;
1964}
1965
1966static int xgene_edac_remove(struct platform_device *pdev)
1967{
1968 struct xgene_edac *edac = dev_get_drvdata(&pdev->dev);
1969 struct xgene_edac_mc_ctx *mcu;
1970 struct xgene_edac_mc_ctx *temp_mcu;
1971 struct xgene_edac_pmd_ctx *pmd;
1972 struct xgene_edac_pmd_ctx *temp_pmd;
Loc Ho93474732015-09-23 17:40:59 -07001973 struct xgene_edac_dev_ctx *node;
1974 struct xgene_edac_dev_ctx *temp_node;
Loc Ho0d442932015-05-22 17:32:59 -06001975
Loc Ho93474732015-09-23 17:40:59 -07001976 list_for_each_entry_safe(mcu, temp_mcu, &edac->mcus, next)
Loc Ho0d442932015-05-22 17:32:59 -06001977 xgene_edac_mc_remove(mcu);
Loc Ho0d442932015-05-22 17:32:59 -06001978
Loc Ho93474732015-09-23 17:40:59 -07001979 list_for_each_entry_safe(pmd, temp_pmd, &edac->pmds, next)
Loc Ho0d442932015-05-22 17:32:59 -06001980 xgene_edac_pmd_remove(pmd);
Loc Ho93474732015-09-23 17:40:59 -07001981
1982 list_for_each_entry_safe(node, temp_node, &edac->l3s, next)
1983 xgene_edac_l3_remove(node);
1984
Loc Hof864b792015-09-23 17:41:00 -07001985 list_for_each_entry_safe(node, temp_node, &edac->socs, next)
1986 xgene_edac_soc_remove(node);
1987
Loc Ho0d442932015-05-22 17:32:59 -06001988 return 0;
1989}
1990
1991static const struct of_device_id xgene_edac_of_match[] = {
1992 { .compatible = "apm,xgene-edac" },
1993 {},
1994};
1995MODULE_DEVICE_TABLE(of, xgene_edac_of_match);
1996
1997static struct platform_driver xgene_edac_driver = {
1998 .probe = xgene_edac_probe,
1999 .remove = xgene_edac_remove,
2000 .driver = {
2001 .name = "xgene-edac",
Loc Ho0d442932015-05-22 17:32:59 -06002002 .of_match_table = xgene_edac_of_match,
2003 },
2004};
2005
2006static int __init xgene_edac_init(void)
2007{
2008 int rc;
2009
2010 /* Make sure error reporting method is sane */
2011 switch (edac_op_state) {
2012 case EDAC_OPSTATE_POLL:
2013 case EDAC_OPSTATE_INT:
2014 break;
2015 default:
2016 edac_op_state = EDAC_OPSTATE_INT;
2017 break;
2018 }
2019
2020 rc = platform_driver_register(&xgene_edac_driver);
2021 if (rc) {
2022 edac_printk(KERN_ERR, EDAC_MOD_STR,
2023 "EDAC fails to register\n");
2024 goto reg_failed;
2025 }
2026
2027 return 0;
2028
2029reg_failed:
2030 return rc;
2031}
2032module_init(xgene_edac_init);
2033
2034static void __exit xgene_edac_exit(void)
2035{
2036 platform_driver_unregister(&xgene_edac_driver);
2037}
2038module_exit(xgene_edac_exit);
2039
2040MODULE_LICENSE("GPL");
2041MODULE_AUTHOR("Feng Kan <fkan@apm.com>");
2042MODULE_DESCRIPTION("APM X-Gene EDAC driver");
2043module_param(edac_op_state, int, 0444);
2044MODULE_PARM_DESC(edac_op_state,
2045 "EDAC error reporting state: 0=Poll, 2=Interrupt");