blob: 6be9986fc6bd22a7d8b5a051cad7d436023fa93e [file] [log] [blame]
Qiuxu Zhuo10590a92020-11-05 15:49:14 +08001// SPDX-License-Identifier: GPL-2.0
2/*
3 * Driver for Intel client SoC with integrated memory controller using IBECC
4 *
5 * Copyright (C) 2020 Intel Corporation
6 *
7 * The In-Band ECC (IBECC) IP provides ECC protection to all or specific
8 * regions of the physical memory space. It's used for memory controllers
9 * that don't support the out-of-band ECC which often needs an additional
10 * storage device to each channel for storing ECC data.
11 */
12
13#include <linux/module.h>
14#include <linux/init.h>
15#include <linux/pci.h>
16#include <linux/slab.h>
17#include <linux/irq_work.h>
18#include <linux/llist.h>
19#include <linux/genalloc.h>
20#include <linux/edac.h>
21#include <linux/bits.h>
22#include <linux/io.h>
23#include <asm/mach_traps.h>
24#include <asm/nmi.h>
25
26#include "edac_mc.h"
27#include "edac_module.h"
28
29#define IGEN6_REVISION "v2.4"
30
31#define EDAC_MOD_STR "igen6_edac"
32#define IGEN6_NMI_NAME "igen6_ibecc"
33
34/* Debug macros */
35#define igen6_printk(level, fmt, arg...) \
36 edac_printk(level, "igen6", fmt, ##arg)
37
38#define igen6_mc_printk(mci, level, fmt, arg...) \
39 edac_mc_chipset_printk(mci, level, "igen6", fmt, ##arg)
40
41#define GET_BITFIELD(v, lo, hi) (((v) & GENMASK_ULL(hi, lo)) >> (lo))
42
43#define NUM_IMC 1 /* Max memory controllers */
44#define NUM_CHANNELS 2 /* Max channels */
45#define NUM_DIMMS 2 /* Max DIMMs per channel */
46
47#define _4GB BIT_ULL(32)
48
49/* Size of physical memory */
50#define TOM_OFFSET 0xa0
51/* Top of low usable DRAM */
52#define TOLUD_OFFSET 0xbc
53/* Capability register C */
54#define CAPID_C_OFFSET 0xec
55#define CAPID_C_IBECC BIT(15)
56
57/* Error Status */
58#define ERRSTS_OFFSET 0xc8
59#define ERRSTS_CE BIT_ULL(6)
60#define ERRSTS_UE BIT_ULL(7)
61
62/* Error Command */
63#define ERRCMD_OFFSET 0xca
64#define ERRCMD_CE BIT_ULL(6)
65#define ERRCMD_UE BIT_ULL(7)
66
67/* IBECC MMIO base address */
68#define IBECC_BASE (res_cfg->ibecc_base)
69#define IBECC_ACTIVATE_OFFSET IBECC_BASE
70#define IBECC_ACTIVATE_EN BIT(0)
71
72/* IBECC error log */
73#define ECC_ERROR_LOG_OFFSET (IBECC_BASE + 0x170)
74#define ECC_ERROR_LOG_CE BIT_ULL(62)
75#define ECC_ERROR_LOG_UE BIT_ULL(63)
76#define ECC_ERROR_LOG_ADDR_SHIFT 5
77#define ECC_ERROR_LOG_ADDR(v) GET_BITFIELD(v, 5, 38)
78#define ECC_ERROR_LOG_SYND(v) GET_BITFIELD(v, 46, 61)
79
80/* Host MMIO base address */
81#define MCHBAR_OFFSET 0x48
82#define MCHBAR_EN BIT_ULL(0)
83#define MCHBAR_BASE(v) (GET_BITFIELD(v, 16, 38) << 16)
84#define MCHBAR_SIZE 0x10000
85
86/* Parameters for the channel decode stage */
87#define MAD_INTER_CHANNEL_OFFSET 0x5000
88#define MAD_INTER_CHANNEL_DDR_TYPE(v) GET_BITFIELD(v, 0, 2)
89#define MAD_INTER_CHANNEL_ECHM(v) GET_BITFIELD(v, 3, 3)
90#define MAD_INTER_CHANNEL_CH_L_MAP(v) GET_BITFIELD(v, 4, 4)
91#define MAD_INTER_CHANNEL_CH_S_SIZE(v) ((u64)GET_BITFIELD(v, 12, 19) << 29)
92
93/* Parameters for DRAM decode stage */
94#define MAD_INTRA_CH0_OFFSET 0x5004
95#define MAD_INTRA_CH_DIMM_L_MAP(v) GET_BITFIELD(v, 0, 0)
96
97/* DIMM characteristics */
98#define MAD_DIMM_CH0_OFFSET 0x500c
99#define MAD_DIMM_CH_DIMM_L_SIZE(v) ((u64)GET_BITFIELD(v, 0, 6) << 29)
100#define MAD_DIMM_CH_DLW(v) GET_BITFIELD(v, 7, 8)
101#define MAD_DIMM_CH_DIMM_S_SIZE(v) ((u64)GET_BITFIELD(v, 16, 22) << 29)
102#define MAD_DIMM_CH_DSW(v) GET_BITFIELD(v, 24, 25)
103
104/* Hash for channel selection */
105#define CHANNEL_HASH_OFFSET 0X5024
106/* Hash for enhanced channel selection */
107#define CHANNEL_EHASH_OFFSET 0X5028
108#define CHANNEL_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6)
109#define CHANNEL_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26)
110#define CHANNEL_HASH_MODE(v) GET_BITFIELD(v, 28, 28)
111
112static struct res_config {
113 int num_imc;
114 u32 ibecc_base;
115 bool (*ibecc_available)(struct pci_dev *pdev);
116 /* Convert error address logged in IBECC to system physical address */
117 u64 (*err_addr_to_sys_addr)(u64 eaddr);
118 /* Convert error address logged in IBECC to integrated memory controller address */
119 u64 (*err_addr_to_imc_addr)(u64 eaddr);
120} *res_cfg;
121
122struct igen6_imc {
123 int mc;
124 struct mem_ctl_info *mci;
125 struct pci_dev *pdev;
126 struct device dev;
127 void __iomem *window;
128 u64 ch_s_size;
129 int ch_l_map;
130 u64 dimm_s_size[NUM_CHANNELS];
131 u64 dimm_l_size[NUM_CHANNELS];
132 int dimm_l_map[NUM_CHANNELS];
133};
134
135static struct igen6_pvt {
136 struct igen6_imc imc[NUM_IMC];
137} *igen6_pvt;
138
139/* The top of low usable DRAM */
140static u32 igen6_tolud;
141/* The size of physical memory */
142static u64 igen6_tom;
143
144struct decoded_addr {
145 int mc;
146 u64 imc_addr;
147 u64 sys_addr;
148 int channel_idx;
149 u64 channel_addr;
150 int sub_channel_idx;
151 u64 sub_channel_addr;
152};
153
154struct ecclog_node {
155 struct llist_node llnode;
156 int mc;
157 u64 ecclog;
158};
159
160/*
161 * In the NMI handler, the driver uses the lock-less memory allocator
162 * to allocate memory to store the IBECC error logs and links the logs
163 * to the lock-less list. Delay printk() and the work of error reporting
164 * to EDAC core in a worker.
165 */
166#define ECCLOG_POOL_SIZE PAGE_SIZE
kernel test robot77429ee2020-11-23 11:18:50 +0800167static LLIST_HEAD(ecclog_llist);
Qiuxu Zhuo10590a92020-11-05 15:49:14 +0800168static struct gen_pool *ecclog_pool;
169static char ecclog_buf[ECCLOG_POOL_SIZE];
170static struct irq_work ecclog_irq_work;
171static struct work_struct ecclog_work;
172
173/* Compute die IDs for Elkhart Lake with IBECC */
174#define DID_EHL_SKU5 0x4514
175#define DID_EHL_SKU6 0x4528
176#define DID_EHL_SKU7 0x452a
177#define DID_EHL_SKU8 0x4516
178#define DID_EHL_SKU9 0x452c
179#define DID_EHL_SKU10 0x452e
180#define DID_EHL_SKU11 0x4532
181#define DID_EHL_SKU12 0x4518
182#define DID_EHL_SKU13 0x451a
183#define DID_EHL_SKU14 0x4534
184#define DID_EHL_SKU15 0x4536
185
186static bool ehl_ibecc_available(struct pci_dev *pdev)
187{
188 u32 v;
189
190 if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v))
191 return false;
192
193 return !!(CAPID_C_IBECC & v);
194}
195
196static u64 ehl_err_addr_to_sys_addr(u64 eaddr)
197{
198 return eaddr;
199}
200
201static u64 ehl_err_addr_to_imc_addr(u64 eaddr)
202{
203 if (eaddr < igen6_tolud)
204 return eaddr;
205
206 if (igen6_tom <= _4GB)
207 return eaddr + igen6_tolud - _4GB;
208
209 if (eaddr < _4GB)
210 return eaddr + igen6_tolud - igen6_tom;
211
212 return eaddr;
213}
214
215static struct res_config ehl_cfg = {
216 .num_imc = 1,
217 .ibecc_base = 0xdc00,
218 .ibecc_available = ehl_ibecc_available,
219 .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr,
220 .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr,
221};
222
223static const struct pci_device_id igen6_pci_tbl[] = {
224 { PCI_VDEVICE(INTEL, DID_EHL_SKU5), (kernel_ulong_t)&ehl_cfg },
225 { PCI_VDEVICE(INTEL, DID_EHL_SKU6), (kernel_ulong_t)&ehl_cfg },
226 { PCI_VDEVICE(INTEL, DID_EHL_SKU7), (kernel_ulong_t)&ehl_cfg },
227 { PCI_VDEVICE(INTEL, DID_EHL_SKU8), (kernel_ulong_t)&ehl_cfg },
228 { PCI_VDEVICE(INTEL, DID_EHL_SKU9), (kernel_ulong_t)&ehl_cfg },
229 { PCI_VDEVICE(INTEL, DID_EHL_SKU10), (kernel_ulong_t)&ehl_cfg },
230 { PCI_VDEVICE(INTEL, DID_EHL_SKU11), (kernel_ulong_t)&ehl_cfg },
231 { PCI_VDEVICE(INTEL, DID_EHL_SKU12), (kernel_ulong_t)&ehl_cfg },
232 { PCI_VDEVICE(INTEL, DID_EHL_SKU13), (kernel_ulong_t)&ehl_cfg },
233 { PCI_VDEVICE(INTEL, DID_EHL_SKU14), (kernel_ulong_t)&ehl_cfg },
234 { PCI_VDEVICE(INTEL, DID_EHL_SKU15), (kernel_ulong_t)&ehl_cfg },
235 { },
236};
237MODULE_DEVICE_TABLE(pci, igen6_pci_tbl);
238
239static enum dev_type get_width(int dimm_l, u32 mad_dimm)
240{
241 u32 w = dimm_l ? MAD_DIMM_CH_DLW(mad_dimm) :
242 MAD_DIMM_CH_DSW(mad_dimm);
243
244 switch (w) {
245 case 0:
246 return DEV_X8;
247 case 1:
248 return DEV_X16;
249 case 2:
250 return DEV_X32;
251 default:
252 return DEV_UNKNOWN;
253 }
254}
255
256static enum mem_type get_memory_type(u32 mad_inter)
257{
258 u32 t = MAD_INTER_CHANNEL_DDR_TYPE(mad_inter);
259
260 switch (t) {
261 case 0:
262 return MEM_DDR4;
263 case 1:
264 return MEM_DDR3;
265 case 2:
266 return MEM_LPDDR3;
267 case 3:
268 return MEM_LPDDR4;
269 case 4:
270 return MEM_WIO2;
271 default:
272 return MEM_UNKNOWN;
273 }
274}
275
276static int decode_chan_idx(u64 addr, u64 mask, int intlv_bit)
277{
278 u64 hash_addr = addr & mask, hash = 0;
279 u64 intlv = (addr >> intlv_bit) & 1;
280 int i;
281
282 for (i = 6; i < 20; i++)
283 hash ^= (hash_addr >> i) & 1;
284
285 return (int)hash ^ intlv;
286}
287
288static u64 decode_channel_addr(u64 addr, int intlv_bit)
289{
290 u64 channel_addr;
291
292 /* Remove the interleave bit and shift upper part down to fill gap */
293 channel_addr = GET_BITFIELD(addr, intlv_bit + 1, 63) << intlv_bit;
294 channel_addr |= GET_BITFIELD(addr, 0, intlv_bit - 1);
295
296 return channel_addr;
297}
298
299static void decode_addr(u64 addr, u32 hash, u64 s_size, int l_map,
300 int *idx, u64 *sub_addr)
301{
302 int intlv_bit = CHANNEL_HASH_LSB_MASK_BIT(hash) + 6;
303
304 if (addr > 2 * s_size) {
305 *sub_addr = addr - s_size;
306 *idx = l_map;
307 return;
308 }
309
310 if (CHANNEL_HASH_MODE(hash)) {
311 *sub_addr = decode_channel_addr(addr, intlv_bit);
312 *idx = decode_chan_idx(addr, CHANNEL_HASH_MASK(hash), intlv_bit);
313 } else {
314 *sub_addr = decode_channel_addr(addr, 6);
315 *idx = GET_BITFIELD(addr, 6, 6);
316 }
317}
318
319static int igen6_decode(struct decoded_addr *res)
320{
321 struct igen6_imc *imc = &igen6_pvt->imc[res->mc];
322 u64 addr = res->imc_addr, sub_addr, s_size;
323 int idx, l_map;
324 u32 hash;
325
326 if (addr >= igen6_tom) {
327 edac_dbg(0, "Address 0x%llx out of range\n", addr);
328 return -EINVAL;
329 }
330
331 /* Decode channel */
332 hash = readl(imc->window + CHANNEL_HASH_OFFSET);
333 s_size = imc->ch_s_size;
334 l_map = imc->ch_l_map;
335 decode_addr(addr, hash, s_size, l_map, &idx, &sub_addr);
336 res->channel_idx = idx;
337 res->channel_addr = sub_addr;
338
339 /* Decode sub-channel/DIMM */
340 hash = readl(imc->window + CHANNEL_EHASH_OFFSET);
341 s_size = imc->dimm_s_size[idx];
342 l_map = imc->dimm_l_map[idx];
343 decode_addr(res->channel_addr, hash, s_size, l_map, &idx, &sub_addr);
344 res->sub_channel_idx = idx;
345 res->sub_channel_addr = sub_addr;
346
347 return 0;
348}
349
350static void igen6_output_error(struct decoded_addr *res,
351 struct mem_ctl_info *mci, u64 ecclog)
352{
353 enum hw_event_mc_err_type type = ecclog & ECC_ERROR_LOG_UE ?
354 HW_EVENT_ERR_UNCORRECTED :
355 HW_EVENT_ERR_CORRECTED;
356
357 edac_mc_handle_error(type, mci, 1,
358 res->sys_addr >> PAGE_SHIFT,
359 res->sys_addr & ~PAGE_MASK,
360 ECC_ERROR_LOG_SYND(ecclog),
361 res->channel_idx, res->sub_channel_idx,
362 -1, "", "");
363}
364
365static struct gen_pool *ecclog_gen_pool_create(void)
366{
367 struct gen_pool *pool;
368
369 pool = gen_pool_create(ilog2(sizeof(struct ecclog_node)), -1);
370 if (!pool)
371 return NULL;
372
373 if (gen_pool_add(pool, (unsigned long)ecclog_buf, ECCLOG_POOL_SIZE, -1)) {
374 gen_pool_destroy(pool);
375 return NULL;
376 }
377
378 return pool;
379}
380
381static int ecclog_gen_pool_add(int mc, u64 ecclog)
382{
383 struct ecclog_node *node;
384
385 node = (void *)gen_pool_alloc(ecclog_pool, sizeof(*node));
386 if (!node)
387 return -ENOMEM;
388
389 node->mc = mc;
390 node->ecclog = ecclog;
391 llist_add(&node->llnode, &ecclog_llist);
392
393 return 0;
394}
395
396/*
397 * Either the memory-mapped I/O status register ECC_ERROR_LOG or the PCI
398 * configuration space status register ERRSTS can indicate whether a
399 * correctable error or an uncorrectable error occurred. We only use the
400 * ECC_ERROR_LOG register to check error type, but need to clear both
401 * registers to enable future error events.
402 */
403static u64 ecclog_read_and_clear(struct igen6_imc *imc)
404{
405 u64 ecclog = readq(imc->window + ECC_ERROR_LOG_OFFSET);
406
407 if (ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE)) {
408 /* Clear CE/UE bits by writing 1s */
409 writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET);
410 return ecclog;
411 }
412
413 return 0;
414}
415
416static void errsts_clear(struct igen6_imc *imc)
417{
418 u16 errsts;
419
420 if (pci_read_config_word(imc->pdev, ERRSTS_OFFSET, &errsts)) {
421 igen6_printk(KERN_ERR, "Failed to read ERRSTS\n");
422 return;
423 }
424
425 /* Clear CE/UE bits by writing 1s */
426 if (errsts & (ERRSTS_CE | ERRSTS_UE))
427 pci_write_config_word(imc->pdev, ERRSTS_OFFSET, errsts);
428}
429
430static int errcmd_enable_error_reporting(bool enable)
431{
432 struct igen6_imc *imc = &igen6_pvt->imc[0];
433 u16 errcmd;
434 int rc;
435
436 rc = pci_read_config_word(imc->pdev, ERRCMD_OFFSET, &errcmd);
437 if (rc)
438 return rc;
439
440 if (enable)
441 errcmd |= ERRCMD_CE | ERRSTS_UE;
442 else
443 errcmd &= ~(ERRCMD_CE | ERRSTS_UE);
444
445 rc = pci_write_config_word(imc->pdev, ERRCMD_OFFSET, errcmd);
446 if (rc)
447 return rc;
448
449 return 0;
450}
451
452static int ecclog_handler(void)
453{
454 struct igen6_imc *imc;
455 int i, n = 0;
456 u64 ecclog;
457
458 for (i = 0; i < res_cfg->num_imc; i++) {
459 imc = &igen6_pvt->imc[i];
460
461 /* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */
462
463 ecclog = ecclog_read_and_clear(imc);
464 if (!ecclog)
465 continue;
466
467 if (!ecclog_gen_pool_add(i, ecclog))
468 irq_work_queue(&ecclog_irq_work);
469
470 n++;
471 }
472
473 return n;
474}
475
476static void ecclog_work_cb(struct work_struct *work)
477{
478 struct ecclog_node *node, *tmp;
479 struct mem_ctl_info *mci;
480 struct llist_node *head;
481 struct decoded_addr res;
482 u64 eaddr;
483
484 head = llist_del_all(&ecclog_llist);
485 if (!head)
486 return;
487
488 llist_for_each_entry_safe(node, tmp, head, llnode) {
489 memset(&res, 0, sizeof(res));
490 eaddr = ECC_ERROR_LOG_ADDR(node->ecclog) <<
491 ECC_ERROR_LOG_ADDR_SHIFT;
492 res.mc = node->mc;
493 res.sys_addr = res_cfg->err_addr_to_sys_addr(eaddr);
494 res.imc_addr = res_cfg->err_addr_to_imc_addr(eaddr);
495
496 mci = igen6_pvt->imc[res.mc].mci;
497
498 edac_dbg(2, "MC %d, ecclog = 0x%llx\n", node->mc, node->ecclog);
499 igen6_mc_printk(mci, KERN_DEBUG, "HANDLING IBECC MEMORY ERROR\n");
500 igen6_mc_printk(mci, KERN_DEBUG, "ADDR 0x%llx ", res.sys_addr);
501
502 if (!igen6_decode(&res))
503 igen6_output_error(&res, mci, node->ecclog);
504
505 gen_pool_free(ecclog_pool, (unsigned long)node, sizeof(*node));
506 }
507}
508
509static void ecclog_irq_work_cb(struct irq_work *irq_work)
510{
511 int i;
512
513 for (i = 0; i < res_cfg->num_imc; i++)
514 errsts_clear(&igen6_pvt->imc[i]);
515
516 if (!llist_empty(&ecclog_llist))
517 schedule_work(&ecclog_work);
518}
519
520static int ecclog_nmi_handler(unsigned int cmd, struct pt_regs *regs)
521{
522 unsigned char reason;
523
524 if (!ecclog_handler())
525 return NMI_DONE;
526
527 /*
528 * Both In-Band ECC correctable error and uncorrectable error are
529 * reported by SERR# NMI. The NMI generic code (see pci_serr_error())
530 * doesn't clear the bit NMI_REASON_CLEAR_SERR (in port 0x61) to
531 * re-enable the SERR# NMI after NMI handling. So clear this bit here
532 * to re-enable SERR# NMI for receiving future In-Band ECC errors.
533 */
534 reason = x86_platform.get_nmi_reason() & NMI_REASON_CLEAR_MASK;
535 reason |= NMI_REASON_CLEAR_SERR;
536 outb(reason, NMI_REASON_PORT);
537 reason &= ~NMI_REASON_CLEAR_SERR;
538 outb(reason, NMI_REASON_PORT);
539
540 return NMI_HANDLED;
541}
542
543static bool igen6_check_ecc(struct igen6_imc *imc)
544{
545 u32 activate = readl(imc->window + IBECC_ACTIVATE_OFFSET);
546
547 return !!(activate & IBECC_ACTIVATE_EN);
548}
549
550static int igen6_get_dimm_config(struct mem_ctl_info *mci)
551{
552 struct igen6_imc *imc = mci->pvt_info;
553 u32 mad_inter, mad_intra, mad_dimm;
554 int i, j, ndimms, mc = imc->mc;
555 struct dimm_info *dimm;
556 enum mem_type mtype;
557 enum dev_type dtype;
558 u64 dsize;
559 bool ecc;
560
561 edac_dbg(2, "\n");
562
563 mad_inter = readl(imc->window + MAD_INTER_CHANNEL_OFFSET);
564 mtype = get_memory_type(mad_inter);
565 ecc = igen6_check_ecc(imc);
566 imc->ch_s_size = MAD_INTER_CHANNEL_CH_S_SIZE(mad_inter);
567 imc->ch_l_map = MAD_INTER_CHANNEL_CH_L_MAP(mad_inter);
568
569 for (i = 0; i < NUM_CHANNELS; i++) {
570 mad_intra = readl(imc->window + MAD_INTRA_CH0_OFFSET + i * 4);
571 mad_dimm = readl(imc->window + MAD_DIMM_CH0_OFFSET + i * 4);
572
573 imc->dimm_l_size[i] = MAD_DIMM_CH_DIMM_L_SIZE(mad_dimm);
574 imc->dimm_s_size[i] = MAD_DIMM_CH_DIMM_S_SIZE(mad_dimm);
575 imc->dimm_l_map[i] = MAD_INTRA_CH_DIMM_L_MAP(mad_intra);
576 ndimms = 0;
577
578 for (j = 0; j < NUM_DIMMS; j++) {
579 dimm = edac_get_dimm(mci, i, j, 0);
580
581 if (j ^ imc->dimm_l_map[i]) {
582 dtype = get_width(0, mad_dimm);
583 dsize = imc->dimm_s_size[i];
584 } else {
585 dtype = get_width(1, mad_dimm);
586 dsize = imc->dimm_l_size[i];
587 }
588
589 if (!dsize)
590 continue;
591
592 dimm->grain = 64;
593 dimm->mtype = mtype;
594 dimm->dtype = dtype;
595 dimm->nr_pages = MiB_TO_PAGES(dsize >> 20);
596 dimm->edac_mode = EDAC_SECDED;
597 snprintf(dimm->label, sizeof(dimm->label),
598 "MC#%d_Chan#%d_DIMM#%d", mc, i, j);
599 edac_dbg(0, "MC %d, Channel %d, DIMM %d, Size %llu MiB (%u pages)\n",
600 mc, i, j, dsize >> 20, dimm->nr_pages);
601
602 ndimms++;
603 }
604
605 if (ndimms && !ecc) {
606 igen6_printk(KERN_ERR, "MC%d In-Band ECC is disabled\n", mc);
607 return -ENODEV;
608 }
609 }
610
611 return 0;
612}
613
614#ifdef CONFIG_EDAC_DEBUG
Qiuxu Zhuo2223d8c2020-11-05 15:49:34 +0800615/* Top of upper usable DRAM */
616static u64 igen6_touud;
617#define TOUUD_OFFSET 0xa8
618
Qiuxu Zhuo10590a92020-11-05 15:49:14 +0800619static void igen6_reg_dump(struct igen6_imc *imc)
620{
621 int i;
622
623 edac_dbg(2, "CHANNEL_HASH : 0x%x\n",
624 readl(imc->window + CHANNEL_HASH_OFFSET));
625 edac_dbg(2, "CHANNEL_EHASH : 0x%x\n",
626 readl(imc->window + CHANNEL_EHASH_OFFSET));
627 edac_dbg(2, "MAD_INTER_CHANNEL: 0x%x\n",
628 readl(imc->window + MAD_INTER_CHANNEL_OFFSET));
629 edac_dbg(2, "ECC_ERROR_LOG : 0x%llx\n",
630 readq(imc->window + ECC_ERROR_LOG_OFFSET));
631
632 for (i = 0; i < NUM_CHANNELS; i++) {
633 edac_dbg(2, "MAD_INTRA_CH%d : 0x%x\n", i,
634 readl(imc->window + MAD_INTRA_CH0_OFFSET + i * 4));
635 edac_dbg(2, "MAD_DIMM_CH%d : 0x%x\n", i,
636 readl(imc->window + MAD_DIMM_CH0_OFFSET + i * 4));
637 }
638 edac_dbg(2, "TOLUD : 0x%x", igen6_tolud);
Qiuxu Zhuo2223d8c2020-11-05 15:49:34 +0800639 edac_dbg(2, "TOUUD : 0x%llx", igen6_touud);
Qiuxu Zhuo10590a92020-11-05 15:49:14 +0800640 edac_dbg(2, "TOM : 0x%llx", igen6_tom);
641}
Qiuxu Zhuo2223d8c2020-11-05 15:49:34 +0800642
643static struct dentry *igen6_test;
644
645static int debugfs_u64_set(void *data, u64 val)
646{
647 u64 ecclog;
648
649 if ((val >= igen6_tolud && val < _4GB) || val >= igen6_touud) {
650 edac_dbg(0, "Address 0x%llx out of range\n", val);
651 return 0;
652 }
653
654 pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val);
655
656 val >>= ECC_ERROR_LOG_ADDR_SHIFT;
657 ecclog = (val << ECC_ERROR_LOG_ADDR_SHIFT) | ECC_ERROR_LOG_CE;
658
659 if (!ecclog_gen_pool_add(0, ecclog))
660 irq_work_queue(&ecclog_irq_work);
661
662 return 0;
663}
664DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
665
666static void igen6_debug_setup(void)
667{
668 igen6_test = edac_debugfs_create_dir("igen6_test");
669 if (!igen6_test)
670 return;
671
672 if (!edac_debugfs_create_file("addr", 0200, igen6_test,
673 NULL, &fops_u64_wo)) {
674 debugfs_remove(igen6_test);
675 igen6_test = NULL;
676 }
677}
678
679static void igen6_debug_teardown(void)
680{
681 debugfs_remove_recursive(igen6_test);
682}
Qiuxu Zhuo10590a92020-11-05 15:49:14 +0800683#else
684static void igen6_reg_dump(struct igen6_imc *imc) {}
Qiuxu Zhuo2223d8c2020-11-05 15:49:34 +0800685static void igen6_debug_setup(void) {}
686static void igen6_debug_teardown(void) {}
Qiuxu Zhuo10590a92020-11-05 15:49:14 +0800687#endif
688
689static int igen6_pci_setup(struct pci_dev *pdev, u64 *mchbar)
690{
691 union {
692 u64 v;
693 struct {
694 u32 v_lo;
695 u32 v_hi;
696 };
697 } u;
698
699 edac_dbg(2, "\n");
700
701 if (!res_cfg->ibecc_available(pdev)) {
702 edac_dbg(2, "No In-Band ECC IP\n");
703 goto fail;
704 }
705
706 if (pci_read_config_dword(pdev, TOLUD_OFFSET, &igen6_tolud)) {
707 igen6_printk(KERN_ERR, "Failed to read TOLUD\n");
708 goto fail;
709 }
710
711 igen6_tolud &= GENMASK(31, 20);
712
713 if (pci_read_config_dword(pdev, TOM_OFFSET, &u.v_lo)) {
714 igen6_printk(KERN_ERR, "Failed to read lower TOM\n");
715 goto fail;
716 }
717
718 if (pci_read_config_dword(pdev, TOM_OFFSET + 4, &u.v_hi)) {
719 igen6_printk(KERN_ERR, "Failed to read upper TOM\n");
720 goto fail;
721 }
722
723 igen6_tom = u.v & GENMASK_ULL(38, 20);
724
725 if (pci_read_config_dword(pdev, MCHBAR_OFFSET, &u.v_lo)) {
726 igen6_printk(KERN_ERR, "Failed to read lower MCHBAR\n");
727 goto fail;
728 }
729
730 if (pci_read_config_dword(pdev, MCHBAR_OFFSET + 4, &u.v_hi)) {
731 igen6_printk(KERN_ERR, "Failed to read upper MCHBAR\n");
732 goto fail;
733 }
734
735 if (!(u.v & MCHBAR_EN)) {
736 igen6_printk(KERN_ERR, "MCHBAR is disabled\n");
737 goto fail;
738 }
739
740 *mchbar = MCHBAR_BASE(u.v);
741
Qiuxu Zhuo2223d8c2020-11-05 15:49:34 +0800742#ifdef CONFIG_EDAC_DEBUG
743 if (pci_read_config_dword(pdev, TOUUD_OFFSET, &u.v_lo))
744 edac_dbg(2, "Failed to read lower TOUUD\n");
745 else if (pci_read_config_dword(pdev, TOUUD_OFFSET + 4, &u.v_hi))
746 edac_dbg(2, "Failed to read upper TOUUD\n");
747 else
748 igen6_touud = u.v & GENMASK_ULL(38, 20);
749#endif
750
Qiuxu Zhuo10590a92020-11-05 15:49:14 +0800751 return 0;
752fail:
753 return -ENODEV;
754}
755
756static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev)
757{
758 struct edac_mc_layer layers[2];
759 struct mem_ctl_info *mci;
760 struct igen6_imc *imc;
761 void __iomem *window;
762 int rc;
763
764 edac_dbg(2, "\n");
765
766 mchbar += mc * MCHBAR_SIZE;
767 window = ioremap(mchbar, MCHBAR_SIZE);
768 if (!window) {
769 igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", mchbar);
770 return -ENODEV;
771 }
772
773 layers[0].type = EDAC_MC_LAYER_CHANNEL;
774 layers[0].size = NUM_CHANNELS;
775 layers[0].is_virt_csrow = false;
776 layers[1].type = EDAC_MC_LAYER_SLOT;
777 layers[1].size = NUM_DIMMS;
778 layers[1].is_virt_csrow = true;
779
780 mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, 0);
781 if (!mci) {
782 rc = -ENOMEM;
783 goto fail;
784 }
785
786 mci->ctl_name = kasprintf(GFP_KERNEL, "Intel_client_SoC MC#%d", mc);
787 if (!mci->ctl_name) {
788 rc = -ENOMEM;
789 goto fail2;
790 }
791
792 mci->mtype_cap = MEM_FLAG_LPDDR4 | MEM_FLAG_DDR4;
793 mci->edac_ctl_cap = EDAC_FLAG_SECDED;
794 mci->edac_cap = EDAC_FLAG_SECDED;
795 mci->mod_name = EDAC_MOD_STR;
796 mci->dev_name = pci_name(pdev);
797 mci->pvt_info = &igen6_pvt->imc[mc];
798
799 imc = mci->pvt_info;
800 device_initialize(&imc->dev);
801 /*
802 * EDAC core uses mci->pdev(pointer of structure device) as
803 * memory controller ID. The client SoCs attach one or more
804 * memory controllers to single pci_dev (single pci_dev->dev
805 * can be for multiple memory controllers).
806 *
807 * To make mci->pdev unique, assign pci_dev->dev to mci->pdev
808 * for the first memory controller and assign a unique imc->dev
809 * to mci->pdev for each non-first memory controller.
810 */
811 mci->pdev = mc ? &imc->dev : &pdev->dev;
812 imc->mc = mc;
813 imc->pdev = pdev;
814 imc->window = window;
815
816 igen6_reg_dump(imc);
817
818 rc = igen6_get_dimm_config(mci);
819 if (rc)
820 goto fail3;
821
822 rc = edac_mc_add_mc(mci);
823 if (rc) {
824 igen6_printk(KERN_ERR, "Failed to register mci#%d\n", mc);
825 goto fail3;
826 }
827
828 imc->mci = mci;
829 return 0;
830fail3:
831 kfree(mci->ctl_name);
832fail2:
833 edac_mc_free(mci);
834fail:
835 iounmap(window);
836 return rc;
837}
838
839static void igen6_unregister_mcis(void)
840{
841 struct mem_ctl_info *mci;
842 struct igen6_imc *imc;
843 int i;
844
845 edac_dbg(2, "\n");
846
847 for (i = 0; i < res_cfg->num_imc; i++) {
848 imc = &igen6_pvt->imc[i];
849 mci = imc->mci;
850 if (!mci)
851 continue;
852
853 edac_mc_del_mc(mci->pdev);
854 kfree(mci->ctl_name);
855 edac_mc_free(mci);
856 iounmap(imc->window);
857 }
858}
859
860static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
861{
862 u64 mchbar;
863 int i, rc;
864
865 edac_dbg(2, "\n");
866
867 igen6_pvt = kzalloc(sizeof(*igen6_pvt), GFP_KERNEL);
868 if (!igen6_pvt)
869 return -ENOMEM;
870
871 res_cfg = (struct res_config *)ent->driver_data;
872
873 rc = igen6_pci_setup(pdev, &mchbar);
874 if (rc)
875 goto fail;
876
877 for (i = 0; i < res_cfg->num_imc; i++) {
878 rc = igen6_register_mci(i, mchbar, pdev);
879 if (rc)
880 goto fail2;
881 }
882
883 ecclog_pool = ecclog_gen_pool_create();
884 if (!ecclog_pool) {
885 rc = -ENOMEM;
886 goto fail2;
887 }
888
889 INIT_WORK(&ecclog_work, ecclog_work_cb);
890 init_irq_work(&ecclog_irq_work, ecclog_irq_work_cb);
891
892 /* Check if any pending errors before registering the NMI handler */
893 ecclog_handler();
894
895 rc = register_nmi_handler(NMI_SERR, ecclog_nmi_handler,
896 0, IGEN6_NMI_NAME);
897 if (rc) {
898 igen6_printk(KERN_ERR, "Failed to register NMI handler\n");
899 goto fail3;
900 }
901
902 /* Enable error reporting */
903 rc = errcmd_enable_error_reporting(true);
904 if (rc) {
905 igen6_printk(KERN_ERR, "Failed to enable error reporting\n");
906 goto fail4;
907 }
908
Qiuxu Zhuo2223d8c2020-11-05 15:49:34 +0800909 igen6_debug_setup();
Qiuxu Zhuo10590a92020-11-05 15:49:14 +0800910 return 0;
911fail4:
912 unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME);
913fail3:
914 gen_pool_destroy(ecclog_pool);
915fail2:
916 igen6_unregister_mcis();
917fail:
918 kfree(igen6_pvt);
919 return rc;
920}
921
922static void igen6_remove(struct pci_dev *pdev)
923{
924 edac_dbg(2, "\n");
925
Qiuxu Zhuo2223d8c2020-11-05 15:49:34 +0800926 igen6_debug_teardown();
Qiuxu Zhuo10590a92020-11-05 15:49:14 +0800927 errcmd_enable_error_reporting(false);
928 unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME);
929 irq_work_sync(&ecclog_irq_work);
930 flush_work(&ecclog_work);
931 gen_pool_destroy(ecclog_pool);
932 igen6_unregister_mcis();
933 kfree(igen6_pvt);
934}
935
936static struct pci_driver igen6_driver = {
937 .name = EDAC_MOD_STR,
938 .probe = igen6_probe,
939 .remove = igen6_remove,
940 .id_table = igen6_pci_tbl,
941};
942
943static int __init igen6_init(void)
944{
945 const char *owner;
946 int rc;
947
948 edac_dbg(2, "\n");
949
950 owner = edac_get_owner();
951 if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
952 return -ENODEV;
953
954 edac_op_state = EDAC_OPSTATE_NMI;
955
956 rc = pci_register_driver(&igen6_driver);
957 if (rc)
958 return rc;
959
960 igen6_printk(KERN_INFO, "%s\n", IGEN6_REVISION);
961
962 return 0;
963}
964
965static void __exit igen6_exit(void)
966{
967 edac_dbg(2, "\n");
968
969 pci_unregister_driver(&igen6_driver);
970}
971
972module_init(igen6_init);
973module_exit(igen6_exit);
974
975MODULE_LICENSE("GPL v2");
976MODULE_AUTHOR("Qiuxu Zhuo");
977MODULE_DESCRIPTION("MC Driver for Intel client SoC using In-Band ECC");