blob: bc434ad5465dee5e8c478cc54e13b898198471ac [file] [log] [blame]
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001/* Intel Sandy Bridge -EN/-EP/-EX Memory Controller kernel module
2 *
3 * This driver supports the memory controllers found on the Intel
4 * processor family Sandy Bridge.
5 *
6 * This file may be distributed under the terms of the
7 * GNU General Public License version 2 only.
8 *
9 * Copyright (c) 2011 by:
10 * Mauro Carvalho Chehab <mchehab@redhat.com>
11 */
12
13#include <linux/module.h>
14#include <linux/init.h>
15#include <linux/pci.h>
16#include <linux/pci_ids.h>
17#include <linux/slab.h>
18#include <linux/delay.h>
19#include <linux/edac.h>
20#include <linux/mmzone.h>
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -020021#include <linux/smp.h>
22#include <linux/bitmap.h>
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -030023#include <linux/math64.h>
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -020024#include <asm/processor.h>
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -020025#include <asm/mce.h>
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -020026
27#include "edac_core.h"
28
29/* Static vars */
30static LIST_HEAD(sbridge_edac_list);
31static DEFINE_MUTEX(sbridge_edac_lock);
32static int probed;
33
34/*
35 * Alter this version for the module when modifications are made
36 */
37#define SBRIDGE_REVISION " Ver: 1.0.0 "
38#define EDAC_MOD_STR "sbridge_edac"
39
40/*
41 * Debug macros
42 */
43#define sbridge_printk(level, fmt, arg...) \
44 edac_printk(level, "sbridge", fmt, ##arg)
45
46#define sbridge_mc_printk(mci, level, fmt, arg...) \
47 edac_mc_chipset_printk(mci, level, "sbridge", fmt, ##arg)
48
49/*
50 * Get a bit field at register value <v>, from bit <lo> to bit <hi>
51 */
52#define GET_BITFIELD(v, lo, hi) \
53 (((v) & ((1ULL << ((hi) - (lo) + 1)) - 1) << (lo)) >> (lo))
54
55/*
56 * sbridge Memory Controller Registers
57 */
58
59/*
60 * FIXME: For now, let's order by device function, as it makes
David Mackey15ed1032012-04-17 11:30:52 -070061 * easier for driver's development process. This table should be
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -020062 * moved to pci_id.h when submitted upstream
63 */
64#define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0 0x3cf4 /* 12.6 */
65#define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1 0x3cf6 /* 12.7 */
66#define PCI_DEVICE_ID_INTEL_SBRIDGE_BR 0x3cf5 /* 13.6 */
67#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0 0x3ca0 /* 14.0 */
68#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA 0x3ca8 /* 15.0 */
69#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS 0x3c71 /* 15.1 */
70#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0 0x3caa /* 15.2 */
71#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1 0x3cab /* 15.3 */
72#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2 0x3cac /* 15.4 */
73#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3 0x3cad /* 15.5 */
74#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO 0x3cb8 /* 17.0 */
75
76 /*
77 * Currently, unused, but will be needed in the future
78 * implementations, as they hold the error counters
79 */
80#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR0 0x3c72 /* 16.2 */
81#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR1 0x3c73 /* 16.3 */
82#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR2 0x3c76 /* 16.6 */
83#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR3 0x3c77 /* 16.7 */
84
85/* Devices 12 Function 6, Offsets 0x80 to 0xcc */
86static const u32 dram_rule[] = {
87 0x80, 0x88, 0x90, 0x98, 0xa0,
88 0xa8, 0xb0, 0xb8, 0xc0, 0xc8,
89};
90#define MAX_SAD ARRAY_SIZE(dram_rule)
91
92#define SAD_LIMIT(reg) ((GET_BITFIELD(reg, 6, 25) << 26) | 0x3ffffff)
93#define DRAM_ATTR(reg) GET_BITFIELD(reg, 2, 3)
94#define INTERLEAVE_MODE(reg) GET_BITFIELD(reg, 1, 1)
95#define DRAM_RULE_ENABLE(reg) GET_BITFIELD(reg, 0, 0)
96
97static char *get_dram_attr(u32 reg)
98{
99 switch(DRAM_ATTR(reg)) {
100 case 0:
101 return "DRAM";
102 case 1:
103 return "MMCFG";
104 case 2:
105 return "NXM";
106 default:
107 return "unknown";
108 }
109}
110
111static const u32 interleave_list[] = {
112 0x84, 0x8c, 0x94, 0x9c, 0xa4,
113 0xac, 0xb4, 0xbc, 0xc4, 0xcc,
114};
115#define MAX_INTERLEAVE ARRAY_SIZE(interleave_list)
116
117#define SAD_PKG0(reg) GET_BITFIELD(reg, 0, 2)
118#define SAD_PKG1(reg) GET_BITFIELD(reg, 3, 5)
119#define SAD_PKG2(reg) GET_BITFIELD(reg, 8, 10)
120#define SAD_PKG3(reg) GET_BITFIELD(reg, 11, 13)
121#define SAD_PKG4(reg) GET_BITFIELD(reg, 16, 18)
122#define SAD_PKG5(reg) GET_BITFIELD(reg, 19, 21)
123#define SAD_PKG6(reg) GET_BITFIELD(reg, 24, 26)
124#define SAD_PKG7(reg) GET_BITFIELD(reg, 27, 29)
125
126static inline int sad_pkg(u32 reg, int interleave)
127{
128 switch (interleave) {
129 case 0:
130 return SAD_PKG0(reg);
131 case 1:
132 return SAD_PKG1(reg);
133 case 2:
134 return SAD_PKG2(reg);
135 case 3:
136 return SAD_PKG3(reg);
137 case 4:
138 return SAD_PKG4(reg);
139 case 5:
140 return SAD_PKG5(reg);
141 case 6:
142 return SAD_PKG6(reg);
143 case 7:
144 return SAD_PKG7(reg);
145 default:
146 return -EINVAL;
147 }
148}
149
150/* Devices 12 Function 7 */
151
152#define TOLM 0x80
153#define TOHM 0x84
154
155#define GET_TOLM(reg) ((GET_BITFIELD(reg, 0, 3) << 28) | 0x3ffffff)
156#define GET_TOHM(reg) ((GET_BITFIELD(reg, 0, 20) << 25) | 0x3ffffff)
157
158/* Device 13 Function 6 */
159
160#define SAD_TARGET 0xf0
161
162#define SOURCE_ID(reg) GET_BITFIELD(reg, 9, 11)
163
164#define SAD_CONTROL 0xf4
165
166#define NODE_ID(reg) GET_BITFIELD(reg, 0, 2)
167
168/* Device 14 function 0 */
169
170static const u32 tad_dram_rule[] = {
171 0x40, 0x44, 0x48, 0x4c,
172 0x50, 0x54, 0x58, 0x5c,
173 0x60, 0x64, 0x68, 0x6c,
174};
175#define MAX_TAD ARRAY_SIZE(tad_dram_rule)
176
177#define TAD_LIMIT(reg) ((GET_BITFIELD(reg, 12, 31) << 26) | 0x3ffffff)
178#define TAD_SOCK(reg) GET_BITFIELD(reg, 10, 11)
179#define TAD_CH(reg) GET_BITFIELD(reg, 8, 9)
180#define TAD_TGT3(reg) GET_BITFIELD(reg, 6, 7)
181#define TAD_TGT2(reg) GET_BITFIELD(reg, 4, 5)
182#define TAD_TGT1(reg) GET_BITFIELD(reg, 2, 3)
183#define TAD_TGT0(reg) GET_BITFIELD(reg, 0, 1)
184
185/* Device 15, function 0 */
186
187#define MCMTR 0x7c
188
189#define IS_ECC_ENABLED(mcmtr) GET_BITFIELD(mcmtr, 2, 2)
190#define IS_LOCKSTEP_ENABLED(mcmtr) GET_BITFIELD(mcmtr, 1, 1)
191#define IS_CLOSE_PG(mcmtr) GET_BITFIELD(mcmtr, 0, 0)
192
193/* Device 15, function 1 */
194
195#define RASENABLES 0xac
196#define IS_MIRROR_ENABLED(reg) GET_BITFIELD(reg, 0, 0)
197
198/* Device 15, functions 2-5 */
199
200static const int mtr_regs[] = {
201 0x80, 0x84, 0x88,
202};
203
204#define RANK_DISABLE(mtr) GET_BITFIELD(mtr, 16, 19)
205#define IS_DIMM_PRESENT(mtr) GET_BITFIELD(mtr, 14, 14)
206#define RANK_CNT_BITS(mtr) GET_BITFIELD(mtr, 12, 13)
207#define RANK_WIDTH_BITS(mtr) GET_BITFIELD(mtr, 2, 4)
208#define COL_WIDTH_BITS(mtr) GET_BITFIELD(mtr, 0, 1)
209
210static const u32 tad_ch_nilv_offset[] = {
211 0x90, 0x94, 0x98, 0x9c,
212 0xa0, 0xa4, 0xa8, 0xac,
213 0xb0, 0xb4, 0xb8, 0xbc,
214};
215#define CHN_IDX_OFFSET(reg) GET_BITFIELD(reg, 28, 29)
216#define TAD_OFFSET(reg) (GET_BITFIELD(reg, 6, 25) << 26)
217
218static const u32 rir_way_limit[] = {
219 0x108, 0x10c, 0x110, 0x114, 0x118,
220};
221#define MAX_RIR_RANGES ARRAY_SIZE(rir_way_limit)
222
223#define IS_RIR_VALID(reg) GET_BITFIELD(reg, 31, 31)
224#define RIR_WAY(reg) GET_BITFIELD(reg, 28, 29)
225#define RIR_LIMIT(reg) ((GET_BITFIELD(reg, 1, 10) << 29)| 0x1fffffff)
226
227#define MAX_RIR_WAY 8
228
229static const u32 rir_offset[MAX_RIR_RANGES][MAX_RIR_WAY] = {
230 { 0x120, 0x124, 0x128, 0x12c, 0x130, 0x134, 0x138, 0x13c },
231 { 0x140, 0x144, 0x148, 0x14c, 0x150, 0x154, 0x158, 0x15c },
232 { 0x160, 0x164, 0x168, 0x16c, 0x170, 0x174, 0x178, 0x17c },
233 { 0x180, 0x184, 0x188, 0x18c, 0x190, 0x194, 0x198, 0x19c },
234 { 0x1a0, 0x1a4, 0x1a8, 0x1ac, 0x1b0, 0x1b4, 0x1b8, 0x1bc },
235};
236
237#define RIR_RNK_TGT(reg) GET_BITFIELD(reg, 16, 19)
238#define RIR_OFFSET(reg) GET_BITFIELD(reg, 2, 14)
239
240/* Device 16, functions 2-7 */
241
242/*
243 * FIXME: Implement the error count reads directly
244 */
245
246static const u32 correrrcnt[] = {
247 0x104, 0x108, 0x10c, 0x110,
248};
249
250#define RANK_ODD_OV(reg) GET_BITFIELD(reg, 31, 31)
251#define RANK_ODD_ERR_CNT(reg) GET_BITFIELD(reg, 16, 30)
252#define RANK_EVEN_OV(reg) GET_BITFIELD(reg, 15, 15)
253#define RANK_EVEN_ERR_CNT(reg) GET_BITFIELD(reg, 0, 14)
254
255static const u32 correrrthrsld[] = {
256 0x11c, 0x120, 0x124, 0x128,
257};
258
259#define RANK_ODD_ERR_THRSLD(reg) GET_BITFIELD(reg, 16, 30)
260#define RANK_EVEN_ERR_THRSLD(reg) GET_BITFIELD(reg, 0, 14)
261
262
263/* Device 17, function 0 */
264
Aristeu Rozanskief1e8d02013-10-30 13:26:56 -0300265#define SB_RANK_CFG_A 0x0328
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200266
267#define IS_RDIMM_ENABLED(reg) GET_BITFIELD(reg, 11, 11)
268
269/*
270 * sbridge structs
271 */
272
273#define NUM_CHANNELS 4
274#define MAX_DIMMS 3 /* Max DIMMS per channel */
275
Aristeu Rozanskifb79a502013-10-30 13:26:57 -0300276struct sbridge_pvt;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200277struct sbridge_info {
278 u32 mcmtr;
Aristeu Rozanskief1e8d02013-10-30 13:26:56 -0300279 u32 rankcfgr;
Aristeu Rozanskifb79a502013-10-30 13:26:57 -0300280 u64 (*get_tolm)(struct sbridge_pvt *pvt);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200281};
282
283struct sbridge_channel {
284 u32 ranks;
285 u32 dimms;
286};
287
288struct pci_id_descr {
289 int dev;
290 int func;
291 int dev_id;
292 int optional;
293};
294
295struct pci_id_table {
296 const struct pci_id_descr *descr;
297 int n_devs;
298};
299
300struct sbridge_dev {
301 struct list_head list;
302 u8 bus, mc;
303 u8 node_id, source_id;
304 struct pci_dev **pdev;
305 int n_devs;
306 struct mem_ctl_info *mci;
307};
308
309struct sbridge_pvt {
310 struct pci_dev *pci_ta, *pci_ddrio, *pci_ras;
311 struct pci_dev *pci_sad0, *pci_sad1, *pci_ha0;
312 struct pci_dev *pci_br;
313 struct pci_dev *pci_tad[NUM_CHANNELS];
314
315 struct sbridge_dev *sbridge_dev;
316
317 struct sbridge_info info;
318 struct sbridge_channel channel[NUM_CHANNELS];
319
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200320 /* Memory type detection */
321 bool is_mirrored, is_lockstep, is_close_pg;
322
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200323 /* Fifo double buffers */
324 struct mce mce_entry[MCE_LOG_LEN];
325 struct mce mce_outentry[MCE_LOG_LEN];
326
327 /* Fifo in/out counters */
328 unsigned mce_in, mce_out;
329
330 /* Count indicator to show errors not got */
331 unsigned mce_overrun;
332
333 /* Memory description */
334 u64 tolm, tohm;
335};
336
Luck, Tonyde4772c2013-03-28 09:59:15 -0700337#define PCI_DESCR(device, function, device_id, opt) \
338 .dev = (device), \
339 .func = (function), \
340 .dev_id = (device_id), \
341 .optional = opt
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200342
343static const struct pci_id_descr pci_dev_descr_sbridge[] = {
344 /* Processor Home Agent */
Luck, Tonyde4772c2013-03-28 09:59:15 -0700345 { PCI_DESCR(14, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0, 0) },
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200346
347 /* Memory controller */
Luck, Tonyde4772c2013-03-28 09:59:15 -0700348 { PCI_DESCR(15, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA, 0) },
349 { PCI_DESCR(15, 1, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS, 0) },
350 { PCI_DESCR(15, 2, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0, 0) },
351 { PCI_DESCR(15, 3, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1, 0) },
352 { PCI_DESCR(15, 4, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2, 0) },
353 { PCI_DESCR(15, 5, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3, 0) },
354 { PCI_DESCR(17, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO, 1) },
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200355
356 /* System Address Decoder */
Luck, Tonyde4772c2013-03-28 09:59:15 -0700357 { PCI_DESCR(12, 6, PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0, 0) },
358 { PCI_DESCR(12, 7, PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1, 0) },
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200359
360 /* Broadcast Registers */
Luck, Tonyde4772c2013-03-28 09:59:15 -0700361 { PCI_DESCR(13, 6, PCI_DEVICE_ID_INTEL_SBRIDGE_BR, 0) },
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200362};
363
364#define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
365static const struct pci_id_table pci_dev_descr_sbridge_table[] = {
366 PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge),
367 {0,} /* 0 terminated list. */
368};
369
370/*
371 * pci_device_id table for which devices we are looking for
372 */
Lionel Debroux36c46f32012-02-27 07:41:47 +0100373static DEFINE_PCI_DEVICE_TABLE(sbridge_pci_tbl) = {
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200374 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA)},
375 {0,} /* 0 terminated list. */
376};
377
378
379/****************************************************************************
David Mackey15ed1032012-04-17 11:30:52 -0700380 Ancillary status routines
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200381 ****************************************************************************/
382
383static inline int numrank(u32 mtr)
384{
385 int ranks = (1 << RANK_CNT_BITS(mtr));
386
387 if (ranks > 4) {
Joe Perches956b9ba2012-04-29 17:08:39 -0300388 edac_dbg(0, "Invalid number of ranks: %d (max = 4) raw value = %x (%04x)\n",
389 ranks, (unsigned int)RANK_CNT_BITS(mtr), mtr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200390 return -EINVAL;
391 }
392
393 return ranks;
394}
395
396static inline int numrow(u32 mtr)
397{
398 int rows = (RANK_WIDTH_BITS(mtr) + 12);
399
400 if (rows < 13 || rows > 18) {
Joe Perches956b9ba2012-04-29 17:08:39 -0300401 edac_dbg(0, "Invalid number of rows: %d (should be between 14 and 17) raw value = %x (%04x)\n",
402 rows, (unsigned int)RANK_WIDTH_BITS(mtr), mtr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200403 return -EINVAL;
404 }
405
406 return 1 << rows;
407}
408
409static inline int numcol(u32 mtr)
410{
411 int cols = (COL_WIDTH_BITS(mtr) + 10);
412
413 if (cols > 12) {
Joe Perches956b9ba2012-04-29 17:08:39 -0300414 edac_dbg(0, "Invalid number of cols: %d (max = 4) raw value = %x (%04x)\n",
415 cols, (unsigned int)COL_WIDTH_BITS(mtr), mtr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200416 return -EINVAL;
417 }
418
419 return 1 << cols;
420}
421
422static struct sbridge_dev *get_sbridge_dev(u8 bus)
423{
424 struct sbridge_dev *sbridge_dev;
425
426 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
427 if (sbridge_dev->bus == bus)
428 return sbridge_dev;
429 }
430
431 return NULL;
432}
433
434static struct sbridge_dev *alloc_sbridge_dev(u8 bus,
435 const struct pci_id_table *table)
436{
437 struct sbridge_dev *sbridge_dev;
438
439 sbridge_dev = kzalloc(sizeof(*sbridge_dev), GFP_KERNEL);
440 if (!sbridge_dev)
441 return NULL;
442
443 sbridge_dev->pdev = kzalloc(sizeof(*sbridge_dev->pdev) * table->n_devs,
444 GFP_KERNEL);
445 if (!sbridge_dev->pdev) {
446 kfree(sbridge_dev);
447 return NULL;
448 }
449
450 sbridge_dev->bus = bus;
451 sbridge_dev->n_devs = table->n_devs;
452 list_add_tail(&sbridge_dev->list, &sbridge_edac_list);
453
454 return sbridge_dev;
455}
456
457static void free_sbridge_dev(struct sbridge_dev *sbridge_dev)
458{
459 list_del(&sbridge_dev->list);
460 kfree(sbridge_dev->pdev);
461 kfree(sbridge_dev);
462}
463
Aristeu Rozanskifb79a502013-10-30 13:26:57 -0300464static u64 sbridge_get_tolm(struct sbridge_pvt *pvt)
465{
466 u32 reg;
467
468 /* Address range is 32:28 */
469 pci_read_config_dword(pvt->pci_sad1, TOLM, &reg);
470 return GET_TOLM(reg);
471}
472
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200473/****************************************************************************
474 Memory check routines
475 ****************************************************************************/
476static struct pci_dev *get_pdev_slot_func(u8 bus, unsigned slot,
477 unsigned func)
478{
479 struct sbridge_dev *sbridge_dev = get_sbridge_dev(bus);
480 int i;
481
482 if (!sbridge_dev)
483 return NULL;
484
485 for (i = 0; i < sbridge_dev->n_devs; i++) {
486 if (!sbridge_dev->pdev[i])
487 continue;
488
489 if (PCI_SLOT(sbridge_dev->pdev[i]->devfn) == slot &&
490 PCI_FUNC(sbridge_dev->pdev[i]->devfn) == func) {
Joe Perches956b9ba2012-04-29 17:08:39 -0300491 edac_dbg(1, "Associated %02x.%02x.%d with %p\n",
492 bus, slot, func, sbridge_dev->pdev[i]);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200493 return sbridge_dev->pdev[i];
494 }
495 }
496
497 return NULL;
498}
499
500/**
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -0300501 * check_if_ecc_is_active() - Checks if ECC is active
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200502 * bus: Device bus
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200503 */
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -0300504static int check_if_ecc_is_active(const u8 bus)
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200505{
506 struct pci_dev *pdev = NULL;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200507 u32 mcmtr;
508
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200509 pdev = get_pdev_slot_func(bus, 15, 0);
510 if (!pdev) {
511 sbridge_printk(KERN_ERR, "Couldn't find PCI device "
512 "%2x.%02d.%d!!!\n",
513 bus, 15, 0);
514 return -ENODEV;
515 }
516
517 pci_read_config_dword(pdev, MCMTR, &mcmtr);
518 if (!IS_ECC_ENABLED(mcmtr)) {
519 sbridge_printk(KERN_ERR, "ECC is disabled. Aborting\n");
520 return -ENODEV;
521 }
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200522 return 0;
523}
524
Mauro Carvalho Chehab084a4fc2012-01-27 18:38:08 -0300525static int get_dimm_config(struct mem_ctl_info *mci)
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200526{
527 struct sbridge_pvt *pvt = mci->pvt_info;
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -0300528 struct dimm_info *dimm;
Mauro Carvalho Chehabdeb09dd2012-09-20 12:09:30 -0300529 unsigned i, j, banks, ranks, rows, cols, npages;
530 u64 size;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200531 u32 reg;
532 enum edac_type mode;
Mark A. Grondonac6e13b52011-10-18 11:02:58 -0200533 enum mem_type mtype;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200534
Aristeu Rozanskief1e8d02013-10-30 13:26:56 -0300535 pvt->info.rankcfgr = SB_RANK_CFG_A;
536
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200537 pci_read_config_dword(pvt->pci_br, SAD_TARGET, &reg);
538 pvt->sbridge_dev->source_id = SOURCE_ID(reg);
539
540 pci_read_config_dword(pvt->pci_br, SAD_CONTROL, &reg);
541 pvt->sbridge_dev->node_id = NODE_ID(reg);
Joe Perches956b9ba2012-04-29 17:08:39 -0300542 edac_dbg(0, "mc#%d: Node ID: %d, source ID: %d\n",
543 pvt->sbridge_dev->mc,
544 pvt->sbridge_dev->node_id,
545 pvt->sbridge_dev->source_id);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200546
547 pci_read_config_dword(pvt->pci_ras, RASENABLES, &reg);
548 if (IS_MIRROR_ENABLED(reg)) {
Joe Perches956b9ba2012-04-29 17:08:39 -0300549 edac_dbg(0, "Memory mirror is enabled\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200550 pvt->is_mirrored = true;
551 } else {
Joe Perches956b9ba2012-04-29 17:08:39 -0300552 edac_dbg(0, "Memory mirror is disabled\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200553 pvt->is_mirrored = false;
554 }
555
556 pci_read_config_dword(pvt->pci_ta, MCMTR, &pvt->info.mcmtr);
557 if (IS_LOCKSTEP_ENABLED(pvt->info.mcmtr)) {
Joe Perches956b9ba2012-04-29 17:08:39 -0300558 edac_dbg(0, "Lockstep is enabled\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200559 mode = EDAC_S8ECD8ED;
560 pvt->is_lockstep = true;
561 } else {
Joe Perches956b9ba2012-04-29 17:08:39 -0300562 edac_dbg(0, "Lockstep is disabled\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200563 mode = EDAC_S4ECD4ED;
564 pvt->is_lockstep = false;
565 }
566 if (IS_CLOSE_PG(pvt->info.mcmtr)) {
Joe Perches956b9ba2012-04-29 17:08:39 -0300567 edac_dbg(0, "address map is on closed page mode\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200568 pvt->is_close_pg = true;
569 } else {
Joe Perches956b9ba2012-04-29 17:08:39 -0300570 edac_dbg(0, "address map is on open page mode\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200571 pvt->is_close_pg = false;
572 }
573
Luck, Tonyde4772c2013-03-28 09:59:15 -0700574 if (pvt->pci_ddrio) {
Aristeu Rozanskief1e8d02013-10-30 13:26:56 -0300575 pci_read_config_dword(pvt->pci_ddrio, pvt->info.rankcfgr,
576 &reg);
Luck, Tonyde4772c2013-03-28 09:59:15 -0700577 if (IS_RDIMM_ENABLED(reg)) {
578 /* FIXME: Can also be LRDIMM */
579 edac_dbg(0, "Memory is registered\n");
580 mtype = MEM_RDDR3;
581 } else {
582 edac_dbg(0, "Memory is unregistered\n");
583 mtype = MEM_DDR3;
584 }
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200585 } else {
Luck, Tonyde4772c2013-03-28 09:59:15 -0700586 edac_dbg(0, "Cannot determine memory type\n");
587 mtype = MEM_UNKNOWN;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200588 }
589
590 /* On all supported DDR3 DIMM types, there are 8 banks available */
591 banks = 8;
592
593 for (i = 0; i < NUM_CHANNELS; i++) {
594 u32 mtr;
595
596 for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) {
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -0300597 dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
598 i, j, 0);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200599 pci_read_config_dword(pvt->pci_tad[i],
600 mtr_regs[j], &mtr);
Joe Perches956b9ba2012-04-29 17:08:39 -0300601 edac_dbg(4, "Channel #%d MTR%d = %x\n", i, j, mtr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200602 if (IS_DIMM_PRESENT(mtr)) {
603 pvt->channel[i].dimms++;
604
605 ranks = numrank(mtr);
606 rows = numrow(mtr);
607 cols = numcol(mtr);
608
609 /* DDR3 has 8 I/O banks */
Mauro Carvalho Chehabdeb09dd2012-09-20 12:09:30 -0300610 size = ((u64)rows * cols * banks * ranks) >> (20 - 3);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200611 npages = MiB_TO_PAGES(size);
612
Mauro Carvalho Chehabdeb09dd2012-09-20 12:09:30 -0300613 edac_dbg(0, "mc#%d: channel %d, dimm %d, %Ld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
Joe Perches956b9ba2012-04-29 17:08:39 -0300614 pvt->sbridge_dev->mc, i, j,
615 size, npages,
616 banks, ranks, rows, cols);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200617
Mauro Carvalho Chehaba895bf82012-01-28 09:09:38 -0300618 dimm->nr_pages = npages;
Mauro Carvalho Chehab084a4fc2012-01-27 18:38:08 -0300619 dimm->grain = 32;
620 dimm->dtype = (banks == 8) ? DEV_X8 : DEV_X4;
621 dimm->mtype = mtype;
622 dimm->edac_mode = mode;
623 snprintf(dimm->label, sizeof(dimm->label),
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200624 "CPU_SrcID#%u_Channel#%u_DIMM#%u",
625 pvt->sbridge_dev->source_id, i, j);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200626 }
627 }
628 }
629
630 return 0;
631}
632
633static void get_memory_layout(const struct mem_ctl_info *mci)
634{
635 struct sbridge_pvt *pvt = mci->pvt_info;
636 int i, j, k, n_sads, n_tads, sad_interl;
637 u32 reg;
638 u64 limit, prv = 0;
639 u64 tmp_mb;
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300640 u32 mb, kb;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200641 u32 rir_way;
642
643 /*
644 * Step 1) Get TOLM/TOHM ranges
645 */
646
Aristeu Rozanskifb79a502013-10-30 13:26:57 -0300647 pvt->tolm = pvt->info.get_tolm(pvt);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200648 tmp_mb = (1 + pvt->tolm) >> 20;
649
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300650 mb = div_u64_rem(tmp_mb, 1000, &kb);
Joe Perches956b9ba2012-04-29 17:08:39 -0300651 edac_dbg(0, "TOLM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tolm);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200652
653 /* Address range is already 45:25 */
654 pci_read_config_dword(pvt->pci_sad1, TOHM,
655 &reg);
656 pvt->tohm = GET_TOHM(reg);
657 tmp_mb = (1 + pvt->tohm) >> 20;
658
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300659 mb = div_u64_rem(tmp_mb, 1000, &kb);
Mauro Carvalho Chehabda14d932012-10-25 09:07:21 -0200660 edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tohm);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200661
662 /*
663 * Step 2) Get SAD range and SAD Interleave list
664 * TAD registers contain the interleave wayness. However, it
665 * seems simpler to just discover it indirectly, with the
666 * algorithm bellow.
667 */
668 prv = 0;
669 for (n_sads = 0; n_sads < MAX_SAD; n_sads++) {
670 /* SAD_LIMIT Address range is 45:26 */
671 pci_read_config_dword(pvt->pci_sad0, dram_rule[n_sads],
672 &reg);
673 limit = SAD_LIMIT(reg);
674
675 if (!DRAM_RULE_ENABLE(reg))
676 continue;
677
678 if (limit <= prv)
679 break;
680
681 tmp_mb = (limit + 1) >> 20;
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300682 mb = div_u64_rem(tmp_mb, 1000, &kb);
Joe Perches956b9ba2012-04-29 17:08:39 -0300683 edac_dbg(0, "SAD#%d %s up to %u.%03u GB (0x%016Lx) Interleave: %s reg=0x%08x\n",
684 n_sads,
685 get_dram_attr(reg),
686 mb, kb,
687 ((u64)tmp_mb) << 20L,
688 INTERLEAVE_MODE(reg) ? "8:6" : "[8:6]XOR[18:16]",
689 reg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200690 prv = limit;
691
692 pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads],
693 &reg);
694 sad_interl = sad_pkg(reg, 0);
695 for (j = 0; j < 8; j++) {
696 if (j > 0 && sad_interl == sad_pkg(reg, j))
697 break;
698
Joe Perches956b9ba2012-04-29 17:08:39 -0300699 edac_dbg(0, "SAD#%d, interleave #%d: %d\n",
700 n_sads, j, sad_pkg(reg, j));
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200701 }
702 }
703
704 /*
705 * Step 3) Get TAD range
706 */
707 prv = 0;
708 for (n_tads = 0; n_tads < MAX_TAD; n_tads++) {
709 pci_read_config_dword(pvt->pci_ha0, tad_dram_rule[n_tads],
710 &reg);
711 limit = TAD_LIMIT(reg);
712 if (limit <= prv)
713 break;
714 tmp_mb = (limit + 1) >> 20;
715
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300716 mb = div_u64_rem(tmp_mb, 1000, &kb);
Joe Perches956b9ba2012-04-29 17:08:39 -0300717 edac_dbg(0, "TAD#%d: up to %u.%03u GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n",
718 n_tads, mb, kb,
719 ((u64)tmp_mb) << 20L,
720 (u32)TAD_SOCK(reg),
721 (u32)TAD_CH(reg),
722 (u32)TAD_TGT0(reg),
723 (u32)TAD_TGT1(reg),
724 (u32)TAD_TGT2(reg),
725 (u32)TAD_TGT3(reg),
726 reg);
Hui Wang7fae0db2012-02-06 04:11:01 -0300727 prv = limit;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200728 }
729
730 /*
731 * Step 4) Get TAD offsets, per each channel
732 */
733 for (i = 0; i < NUM_CHANNELS; i++) {
734 if (!pvt->channel[i].dimms)
735 continue;
736 for (j = 0; j < n_tads; j++) {
737 pci_read_config_dword(pvt->pci_tad[i],
738 tad_ch_nilv_offset[j],
739 &reg);
740 tmp_mb = TAD_OFFSET(reg) >> 20;
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300741 mb = div_u64_rem(tmp_mb, 1000, &kb);
Joe Perches956b9ba2012-04-29 17:08:39 -0300742 edac_dbg(0, "TAD CH#%d, offset #%d: %u.%03u GB (0x%016Lx), reg=0x%08x\n",
743 i, j,
744 mb, kb,
745 ((u64)tmp_mb) << 20L,
746 reg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200747 }
748 }
749
750 /*
751 * Step 6) Get RIR Wayness/Limit, per each channel
752 */
753 for (i = 0; i < NUM_CHANNELS; i++) {
754 if (!pvt->channel[i].dimms)
755 continue;
756 for (j = 0; j < MAX_RIR_RANGES; j++) {
757 pci_read_config_dword(pvt->pci_tad[i],
758 rir_way_limit[j],
759 &reg);
760
761 if (!IS_RIR_VALID(reg))
762 continue;
763
764 tmp_mb = RIR_LIMIT(reg) >> 20;
765 rir_way = 1 << RIR_WAY(reg);
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300766 mb = div_u64_rem(tmp_mb, 1000, &kb);
Joe Perches956b9ba2012-04-29 17:08:39 -0300767 edac_dbg(0, "CH#%d RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d, reg=0x%08x\n",
768 i, j,
769 mb, kb,
770 ((u64)tmp_mb) << 20L,
771 rir_way,
772 reg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200773
774 for (k = 0; k < rir_way; k++) {
775 pci_read_config_dword(pvt->pci_tad[i],
776 rir_offset[j][k],
777 &reg);
778 tmp_mb = RIR_OFFSET(reg) << 6;
779
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300780 mb = div_u64_rem(tmp_mb, 1000, &kb);
Joe Perches956b9ba2012-04-29 17:08:39 -0300781 edac_dbg(0, "CH#%d RIR#%d INTL#%d, offset %u.%03u GB (0x%016Lx), tgt: %d, reg=0x%08x\n",
782 i, j, k,
783 mb, kb,
784 ((u64)tmp_mb) << 20L,
785 (u32)RIR_RNK_TGT(reg),
786 reg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200787 }
788 }
789 }
790}
791
792struct mem_ctl_info *get_mci_for_node_id(u8 node_id)
793{
794 struct sbridge_dev *sbridge_dev;
795
796 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
797 if (sbridge_dev->node_id == node_id)
798 return sbridge_dev->mci;
799 }
800 return NULL;
801}
802
803static int get_memory_error_data(struct mem_ctl_info *mci,
804 u64 addr,
805 u8 *socket,
806 long *channel_mask,
807 u8 *rank,
Mauro Carvalho Chehabe17a2f42a2012-05-11 11:41:45 -0300808 char **area_type, char *msg)
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200809{
810 struct mem_ctl_info *new_mci;
811 struct sbridge_pvt *pvt = mci->pvt_info;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200812 int n_rir, n_sads, n_tads, sad_way, sck_xch;
813 int sad_interl, idx, base_ch;
814 int interleave_mode;
815 unsigned sad_interleave[MAX_INTERLEAVE];
816 u32 reg;
817 u8 ch_way,sck_way;
818 u32 tad_offset;
819 u32 rir_way;
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300820 u32 mb, kb;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200821 u64 ch_addr, offset, limit, prv = 0;
822
823
824 /*
825 * Step 0) Check if the address is at special memory ranges
826 * The check bellow is probably enough to fill all cases where
827 * the error is not inside a memory, except for the legacy
828 * range (e. g. VGA addresses). It is unlikely, however, that the
829 * memory controller would generate an error on that range.
830 */
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300831 if ((addr > (u64) pvt->tolm) && (addr < (1LL << 32))) {
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200832 sprintf(msg, "Error at TOLM area, on addr 0x%08Lx", addr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200833 return -EINVAL;
834 }
835 if (addr >= (u64)pvt->tohm) {
836 sprintf(msg, "Error at MMIOH area, on addr 0x%016Lx", addr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200837 return -EINVAL;
838 }
839
840 /*
841 * Step 1) Get socket
842 */
843 for (n_sads = 0; n_sads < MAX_SAD; n_sads++) {
844 pci_read_config_dword(pvt->pci_sad0, dram_rule[n_sads],
845 &reg);
846
847 if (!DRAM_RULE_ENABLE(reg))
848 continue;
849
850 limit = SAD_LIMIT(reg);
851 if (limit <= prv) {
852 sprintf(msg, "Can't discover the memory socket");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200853 return -EINVAL;
854 }
855 if (addr <= limit)
856 break;
857 prv = limit;
858 }
859 if (n_sads == MAX_SAD) {
860 sprintf(msg, "Can't discover the memory socket");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200861 return -EINVAL;
862 }
Mauro Carvalho Chehabe17a2f42a2012-05-11 11:41:45 -0300863 *area_type = get_dram_attr(reg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200864 interleave_mode = INTERLEAVE_MODE(reg);
865
866 pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads],
867 &reg);
868 sad_interl = sad_pkg(reg, 0);
869 for (sad_way = 0; sad_way < 8; sad_way++) {
870 if (sad_way > 0 && sad_interl == sad_pkg(reg, sad_way))
871 break;
872 sad_interleave[sad_way] = sad_pkg(reg, sad_way);
Joe Perches956b9ba2012-04-29 17:08:39 -0300873 edac_dbg(0, "SAD interleave #%d: %d\n",
874 sad_way, sad_interleave[sad_way]);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200875 }
Joe Perches956b9ba2012-04-29 17:08:39 -0300876 edac_dbg(0, "mc#%d: Error detected on SAD#%d: address 0x%016Lx < 0x%016Lx, Interleave [%d:6]%s\n",
877 pvt->sbridge_dev->mc,
878 n_sads,
879 addr,
880 limit,
881 sad_way + 7,
882 interleave_mode ? "" : "XOR[18:16]");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200883 if (interleave_mode)
884 idx = ((addr >> 6) ^ (addr >> 16)) & 7;
885 else
886 idx = (addr >> 6) & 7;
887 switch (sad_way) {
888 case 1:
889 idx = 0;
890 break;
891 case 2:
892 idx = idx & 1;
893 break;
894 case 4:
895 idx = idx & 3;
896 break;
897 case 8:
898 break;
899 default:
900 sprintf(msg, "Can't discover socket interleave");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200901 return -EINVAL;
902 }
903 *socket = sad_interleave[idx];
Joe Perches956b9ba2012-04-29 17:08:39 -0300904 edac_dbg(0, "SAD interleave index: %d (wayness %d) = CPU socket %d\n",
905 idx, sad_way, *socket);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200906
907 /*
908 * Move to the proper node structure, in order to access the
909 * right PCI registers
910 */
911 new_mci = get_mci_for_node_id(*socket);
912 if (!new_mci) {
913 sprintf(msg, "Struct for socket #%u wasn't initialized",
914 *socket);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200915 return -EINVAL;
916 }
917 mci = new_mci;
918 pvt = mci->pvt_info;
919
920 /*
921 * Step 2) Get memory channel
922 */
923 prv = 0;
924 for (n_tads = 0; n_tads < MAX_TAD; n_tads++) {
925 pci_read_config_dword(pvt->pci_ha0, tad_dram_rule[n_tads],
926 &reg);
927 limit = TAD_LIMIT(reg);
928 if (limit <= prv) {
929 sprintf(msg, "Can't discover the memory channel");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200930 return -EINVAL;
931 }
932 if (addr <= limit)
933 break;
934 prv = limit;
935 }
936 ch_way = TAD_CH(reg) + 1;
937 sck_way = TAD_SOCK(reg) + 1;
938 /*
939 * FIXME: Is it right to always use channel 0 for offsets?
940 */
941 pci_read_config_dword(pvt->pci_tad[0],
942 tad_ch_nilv_offset[n_tads],
943 &tad_offset);
944
945 if (ch_way == 3)
946 idx = addr >> 6;
947 else
948 idx = addr >> (6 + sck_way);
949 idx = idx % ch_way;
950
951 /*
952 * FIXME: Shouldn't we use CHN_IDX_OFFSET() here, when ch_way == 3 ???
953 */
954 switch (idx) {
955 case 0:
956 base_ch = TAD_TGT0(reg);
957 break;
958 case 1:
959 base_ch = TAD_TGT1(reg);
960 break;
961 case 2:
962 base_ch = TAD_TGT2(reg);
963 break;
964 case 3:
965 base_ch = TAD_TGT3(reg);
966 break;
967 default:
968 sprintf(msg, "Can't discover the TAD target");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200969 return -EINVAL;
970 }
971 *channel_mask = 1 << base_ch;
972
973 if (pvt->is_mirrored) {
974 *channel_mask |= 1 << ((base_ch + 2) % 4);
975 switch(ch_way) {
976 case 2:
977 case 4:
978 sck_xch = 1 << sck_way * (ch_way >> 1);
979 break;
980 default:
981 sprintf(msg, "Invalid mirror set. Can't decode addr");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200982 return -EINVAL;
983 }
984 } else
985 sck_xch = (1 << sck_way) * ch_way;
986
987 if (pvt->is_lockstep)
988 *channel_mask |= 1 << ((base_ch + 1) % 4);
989
990 offset = TAD_OFFSET(tad_offset);
991
Joe Perches956b9ba2012-04-29 17:08:39 -0300992 edac_dbg(0, "TAD#%d: address 0x%016Lx < 0x%016Lx, socket interleave %d, channel interleave %d (offset 0x%08Lx), index %d, base ch: %d, ch mask: 0x%02lx\n",
993 n_tads,
994 addr,
995 limit,
996 (u32)TAD_SOCK(reg),
997 ch_way,
998 offset,
999 idx,
1000 base_ch,
1001 *channel_mask);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001002
1003 /* Calculate channel address */
1004 /* Remove the TAD offset */
1005
1006 if (offset > addr) {
1007 sprintf(msg, "Can't calculate ch addr: TAD offset 0x%08Lx is too high for addr 0x%08Lx!",
1008 offset, addr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001009 return -EINVAL;
1010 }
1011 addr -= offset;
1012 /* Store the low bits [0:6] of the addr */
1013 ch_addr = addr & 0x7f;
1014 /* Remove socket wayness and remove 6 bits */
1015 addr >>= 6;
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -03001016 addr = div_u64(addr, sck_xch);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001017#if 0
1018 /* Divide by channel way */
1019 addr = addr / ch_way;
1020#endif
1021 /* Recover the last 6 bits */
1022 ch_addr |= addr << 6;
1023
1024 /*
1025 * Step 3) Decode rank
1026 */
1027 for (n_rir = 0; n_rir < MAX_RIR_RANGES; n_rir++) {
1028 pci_read_config_dword(pvt->pci_tad[base_ch],
1029 rir_way_limit[n_rir],
1030 &reg);
1031
1032 if (!IS_RIR_VALID(reg))
1033 continue;
1034
1035 limit = RIR_LIMIT(reg);
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -03001036 mb = div_u64_rem(limit >> 20, 1000, &kb);
Joe Perches956b9ba2012-04-29 17:08:39 -03001037 edac_dbg(0, "RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d\n",
1038 n_rir,
1039 mb, kb,
1040 limit,
1041 1 << RIR_WAY(reg));
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001042 if (ch_addr <= limit)
1043 break;
1044 }
1045 if (n_rir == MAX_RIR_RANGES) {
1046 sprintf(msg, "Can't discover the memory rank for ch addr 0x%08Lx",
1047 ch_addr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001048 return -EINVAL;
1049 }
1050 rir_way = RIR_WAY(reg);
1051 if (pvt->is_close_pg)
1052 idx = (ch_addr >> 6);
1053 else
1054 idx = (ch_addr >> 13); /* FIXME: Datasheet says to shift by 15 */
1055 idx %= 1 << rir_way;
1056
1057 pci_read_config_dword(pvt->pci_tad[base_ch],
1058 rir_offset[n_rir][idx],
1059 &reg);
1060 *rank = RIR_RNK_TGT(reg);
1061
Joe Perches956b9ba2012-04-29 17:08:39 -03001062 edac_dbg(0, "RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n",
1063 n_rir,
1064 ch_addr,
1065 limit,
1066 rir_way,
1067 idx);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001068
1069 return 0;
1070}
1071
1072/****************************************************************************
1073 Device initialization routines: put/get, init/exit
1074 ****************************************************************************/
1075
1076/*
1077 * sbridge_put_all_devices 'put' all the devices that we have
1078 * reserved via 'get'
1079 */
1080static void sbridge_put_devices(struct sbridge_dev *sbridge_dev)
1081{
1082 int i;
1083
Joe Perches956b9ba2012-04-29 17:08:39 -03001084 edac_dbg(0, "\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001085 for (i = 0; i < sbridge_dev->n_devs; i++) {
1086 struct pci_dev *pdev = sbridge_dev->pdev[i];
1087 if (!pdev)
1088 continue;
Joe Perches956b9ba2012-04-29 17:08:39 -03001089 edac_dbg(0, "Removing dev %02x:%02x.%d\n",
1090 pdev->bus->number,
1091 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001092 pci_dev_put(pdev);
1093 }
1094}
1095
1096static void sbridge_put_all_devices(void)
1097{
1098 struct sbridge_dev *sbridge_dev, *tmp;
1099
1100 list_for_each_entry_safe(sbridge_dev, tmp, &sbridge_edac_list, list) {
1101 sbridge_put_devices(sbridge_dev);
1102 free_sbridge_dev(sbridge_dev);
1103 }
1104}
1105
1106/*
1107 * sbridge_get_all_devices Find and perform 'get' operation on the MCH's
1108 * device/functions we want to reference for this driver
1109 *
1110 * Need to 'get' device 16 func 1 and func 2
1111 */
1112static int sbridge_get_onedevice(struct pci_dev **prev,
1113 u8 *num_mc,
1114 const struct pci_id_table *table,
1115 const unsigned devno)
1116{
1117 struct sbridge_dev *sbridge_dev;
1118 const struct pci_id_descr *dev_descr = &table->descr[devno];
1119
1120 struct pci_dev *pdev = NULL;
1121 u8 bus = 0;
1122
1123 sbridge_printk(KERN_INFO,
1124 "Seeking for: dev %02x.%d PCI ID %04x:%04x\n",
1125 dev_descr->dev, dev_descr->func,
1126 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1127
1128 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1129 dev_descr->dev_id, *prev);
1130
1131 if (!pdev) {
1132 if (*prev) {
1133 *prev = pdev;
1134 return 0;
1135 }
1136
1137 if (dev_descr->optional)
1138 return 0;
1139
1140 if (devno == 0)
1141 return -ENODEV;
1142
1143 sbridge_printk(KERN_INFO,
1144 "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1145 dev_descr->dev, dev_descr->func,
1146 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1147
1148 /* End of list, leave */
1149 return -ENODEV;
1150 }
1151 bus = pdev->bus->number;
1152
1153 sbridge_dev = get_sbridge_dev(bus);
1154 if (!sbridge_dev) {
1155 sbridge_dev = alloc_sbridge_dev(bus, table);
1156 if (!sbridge_dev) {
1157 pci_dev_put(pdev);
1158 return -ENOMEM;
1159 }
1160 (*num_mc)++;
1161 }
1162
1163 if (sbridge_dev->pdev[devno]) {
1164 sbridge_printk(KERN_ERR,
1165 "Duplicated device for "
1166 "dev %02x:%d.%d PCI ID %04x:%04x\n",
1167 bus, dev_descr->dev, dev_descr->func,
1168 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1169 pci_dev_put(pdev);
1170 return -ENODEV;
1171 }
1172
1173 sbridge_dev->pdev[devno] = pdev;
1174
1175 /* Sanity check */
1176 if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1177 PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1178 sbridge_printk(KERN_ERR,
1179 "Device PCI ID %04x:%04x "
1180 "has dev %02x:%d.%d instead of dev %02x:%02x.%d\n",
1181 PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1182 bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1183 bus, dev_descr->dev, dev_descr->func);
1184 return -ENODEV;
1185 }
1186
1187 /* Be sure that the device is enabled */
1188 if (unlikely(pci_enable_device(pdev) < 0)) {
1189 sbridge_printk(KERN_ERR,
1190 "Couldn't enable "
1191 "dev %02x:%d.%d PCI ID %04x:%04x\n",
1192 bus, dev_descr->dev, dev_descr->func,
1193 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1194 return -ENODEV;
1195 }
1196
Joe Perches956b9ba2012-04-29 17:08:39 -03001197 edac_dbg(0, "Detected dev %02x:%d.%d PCI ID %04x:%04x\n",
1198 bus, dev_descr->dev, dev_descr->func,
1199 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001200
1201 /*
1202 * As stated on drivers/pci/search.c, the reference count for
1203 * @from is always decremented if it is not %NULL. So, as we need
1204 * to get all devices up to null, we need to do a get for the device
1205 */
1206 pci_dev_get(pdev);
1207
1208 *prev = pdev;
1209
1210 return 0;
1211}
1212
1213static int sbridge_get_all_devices(u8 *num_mc)
1214{
1215 int i, rc;
1216 struct pci_dev *pdev = NULL;
1217 const struct pci_id_table *table = pci_dev_descr_sbridge_table;
1218
1219 while (table && table->descr) {
1220 for (i = 0; i < table->n_devs; i++) {
1221 pdev = NULL;
1222 do {
1223 rc = sbridge_get_onedevice(&pdev, num_mc,
1224 table, i);
1225 if (rc < 0) {
1226 if (i == 0) {
1227 i = table->n_devs;
1228 break;
1229 }
1230 sbridge_put_all_devices();
1231 return -ENODEV;
1232 }
1233 } while (pdev);
1234 }
1235 table++;
1236 }
1237
1238 return 0;
1239}
1240
1241static int mci_bind_devs(struct mem_ctl_info *mci,
1242 struct sbridge_dev *sbridge_dev)
1243{
1244 struct sbridge_pvt *pvt = mci->pvt_info;
1245 struct pci_dev *pdev;
1246 int i, func, slot;
1247
1248 for (i = 0; i < sbridge_dev->n_devs; i++) {
1249 pdev = sbridge_dev->pdev[i];
1250 if (!pdev)
1251 continue;
1252 slot = PCI_SLOT(pdev->devfn);
1253 func = PCI_FUNC(pdev->devfn);
1254 switch (slot) {
1255 case 12:
1256 switch (func) {
1257 case 6:
1258 pvt->pci_sad0 = pdev;
1259 break;
1260 case 7:
1261 pvt->pci_sad1 = pdev;
1262 break;
1263 default:
1264 goto error;
1265 }
1266 break;
1267 case 13:
1268 switch (func) {
1269 case 6:
1270 pvt->pci_br = pdev;
1271 break;
1272 default:
1273 goto error;
1274 }
1275 break;
1276 case 14:
1277 switch (func) {
1278 case 0:
1279 pvt->pci_ha0 = pdev;
1280 break;
1281 default:
1282 goto error;
1283 }
1284 break;
1285 case 15:
1286 switch (func) {
1287 case 0:
1288 pvt->pci_ta = pdev;
1289 break;
1290 case 1:
1291 pvt->pci_ras = pdev;
1292 break;
1293 case 2:
1294 case 3:
1295 case 4:
1296 case 5:
1297 pvt->pci_tad[func - 2] = pdev;
1298 break;
1299 default:
1300 goto error;
1301 }
1302 break;
1303 case 17:
1304 switch (func) {
1305 case 0:
1306 pvt->pci_ddrio = pdev;
1307 break;
1308 default:
1309 goto error;
1310 }
1311 break;
1312 default:
1313 goto error;
1314 }
1315
Joe Perches956b9ba2012-04-29 17:08:39 -03001316 edac_dbg(0, "Associated PCI %02x.%02d.%d with dev = %p\n",
1317 sbridge_dev->bus,
1318 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1319 pdev);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001320 }
1321
1322 /* Check if everything were registered */
1323 if (!pvt->pci_sad0 || !pvt->pci_sad1 || !pvt->pci_ha0 ||
Luck, Tonyde4772c2013-03-28 09:59:15 -07001324 !pvt-> pci_tad || !pvt->pci_ras || !pvt->pci_ta)
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001325 goto enodev;
1326
1327 for (i = 0; i < NUM_CHANNELS; i++) {
1328 if (!pvt->pci_tad[i])
1329 goto enodev;
1330 }
1331 return 0;
1332
1333enodev:
1334 sbridge_printk(KERN_ERR, "Some needed devices are missing\n");
1335 return -ENODEV;
1336
1337error:
1338 sbridge_printk(KERN_ERR, "Device %d, function %d "
1339 "is out of the expected range\n",
1340 slot, func);
1341 return -EINVAL;
1342}
1343
1344/****************************************************************************
1345 Error check routines
1346 ****************************************************************************/
1347
1348/*
1349 * While Sandy Bridge has error count registers, SMI BIOS read values from
1350 * and resets the counters. So, they are not reliable for the OS to read
1351 * from them. So, we have no option but to just trust on whatever MCE is
1352 * telling us about the errors.
1353 */
1354static void sbridge_mce_output_error(struct mem_ctl_info *mci,
1355 const struct mce *m)
1356{
1357 struct mem_ctl_info *new_mci;
1358 struct sbridge_pvt *pvt = mci->pvt_info;
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001359 enum hw_event_mc_err_type tp_event;
Mauro Carvalho Chehabe17a2f42a2012-05-11 11:41:45 -03001360 char *type, *optype, msg[256];
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001361 bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
1362 bool overflow = GET_BITFIELD(m->status, 62, 62);
1363 bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
1364 bool recoverable = GET_BITFIELD(m->status, 56, 56);
1365 u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52);
1366 u32 mscod = GET_BITFIELD(m->status, 16, 31);
1367 u32 errcode = GET_BITFIELD(m->status, 0, 15);
1368 u32 channel = GET_BITFIELD(m->status, 0, 3);
1369 u32 optypenum = GET_BITFIELD(m->status, 4, 6);
1370 long channel_mask, first_channel;
1371 u8 rank, socket;
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001372 int rc, dimm;
Mauro Carvalho Chehabe17a2f42a2012-05-11 11:41:45 -03001373 char *area_type = NULL;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001374
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001375 if (uncorrected_error) {
1376 if (ripv) {
1377 type = "FATAL";
1378 tp_event = HW_EVENT_ERR_FATAL;
1379 } else {
1380 type = "NON_FATAL";
1381 tp_event = HW_EVENT_ERR_UNCORRECTED;
1382 }
1383 } else {
1384 type = "CORRECTED";
1385 tp_event = HW_EVENT_ERR_CORRECTED;
1386 }
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001387
1388 /*
David Mackey15ed1032012-04-17 11:30:52 -07001389 * According with Table 15-9 of the Intel Architecture spec vol 3A,
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001390 * memory errors should fit in this mask:
1391 * 000f 0000 1mmm cccc (binary)
1392 * where:
1393 * f = Correction Report Filtering Bit. If 1, subsequent errors
1394 * won't be shown
1395 * mmm = error type
1396 * cccc = channel
1397 * If the mask doesn't match, report an error to the parsing logic
1398 */
1399 if (! ((errcode & 0xef80) == 0x80)) {
1400 optype = "Can't parse: it is not a mem";
1401 } else {
1402 switch (optypenum) {
1403 case 0:
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001404 optype = "generic undef request error";
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001405 break;
1406 case 1:
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001407 optype = "memory read error";
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001408 break;
1409 case 2:
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001410 optype = "memory write error";
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001411 break;
1412 case 3:
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001413 optype = "addr/cmd error";
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001414 break;
1415 case 4:
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001416 optype = "memory scrubbing error";
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001417 break;
1418 default:
1419 optype = "reserved";
1420 break;
1421 }
1422 }
1423
1424 rc = get_memory_error_data(mci, m->addr, &socket,
Mauro Carvalho Chehabe17a2f42a2012-05-11 11:41:45 -03001425 &channel_mask, &rank, &area_type, msg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001426 if (rc < 0)
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001427 goto err_parsing;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001428 new_mci = get_mci_for_node_id(socket);
1429 if (!new_mci) {
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001430 strcpy(msg, "Error: socket got corrupted!");
1431 goto err_parsing;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001432 }
1433 mci = new_mci;
1434 pvt = mci->pvt_info;
1435
1436 first_channel = find_first_bit(&channel_mask, NUM_CHANNELS);
1437
1438 if (rank < 4)
1439 dimm = 0;
1440 else if (rank < 8)
1441 dimm = 1;
1442 else
1443 dimm = 2;
1444
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001445
1446 /*
Mauro Carvalho Chehabe17a2f42a2012-05-11 11:41:45 -03001447 * FIXME: On some memory configurations (mirror, lockstep), the
1448 * Memory Controller can't point the error to a single DIMM. The
1449 * EDAC core should be handling the channel mask, in order to point
1450 * to the group of dimm's where the error may be happening.
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001451 */
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001452 snprintf(msg, sizeof(msg),
Mauro Carvalho Chehabc1053832012-06-04 13:40:05 -03001453 "%s%s area:%s err_code:%04x:%04x socket:%d channel_mask:%ld rank:%d",
Mauro Carvalho Chehabe17a2f42a2012-05-11 11:41:45 -03001454 overflow ? " OVERFLOW" : "",
1455 (uncorrected_error && recoverable) ? " recoverable" : "",
1456 area_type,
1457 mscod, errcode,
1458 socket,
1459 channel_mask,
1460 rank);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001461
Joe Perches956b9ba2012-04-29 17:08:39 -03001462 edac_dbg(0, "%s\n", msg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001463
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001464 /* FIXME: need support for channel mask */
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001465
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001466 /* Call the helper to output message */
Mauro Carvalho Chehabc1053832012-06-04 13:40:05 -03001467 edac_mc_handle_error(tp_event, mci, core_err_cnt,
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001468 m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
1469 channel, dimm, -1,
Mauro Carvalho Chehab03f7eae2012-06-04 11:29:25 -03001470 optype, msg);
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001471 return;
1472err_parsing:
Mauro Carvalho Chehabc1053832012-06-04 13:40:05 -03001473 edac_mc_handle_error(tp_event, mci, core_err_cnt, 0, 0, 0,
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001474 -1, -1, -1,
Mauro Carvalho Chehab03f7eae2012-06-04 11:29:25 -03001475 msg, "");
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001476
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001477}
1478
1479/*
1480 * sbridge_check_error Retrieve and process errors reported by the
1481 * hardware. Called by the Core module.
1482 */
1483static void sbridge_check_error(struct mem_ctl_info *mci)
1484{
1485 struct sbridge_pvt *pvt = mci->pvt_info;
1486 int i;
1487 unsigned count = 0;
1488 struct mce *m;
1489
1490 /*
1491 * MCE first step: Copy all mce errors into a temporary buffer
1492 * We use a double buffering here, to reduce the risk of
1493 * loosing an error.
1494 */
1495 smp_rmb();
1496 count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1497 % MCE_LOG_LEN;
1498 if (!count)
1499 return;
1500
1501 m = pvt->mce_outentry;
1502 if (pvt->mce_in + count > MCE_LOG_LEN) {
1503 unsigned l = MCE_LOG_LEN - pvt->mce_in;
1504
1505 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1506 smp_wmb();
1507 pvt->mce_in = 0;
1508 count -= l;
1509 m += l;
1510 }
1511 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1512 smp_wmb();
1513 pvt->mce_in += count;
1514
1515 smp_rmb();
1516 if (pvt->mce_overrun) {
1517 sbridge_printk(KERN_ERR, "Lost %d memory errors\n",
1518 pvt->mce_overrun);
1519 smp_wmb();
1520 pvt->mce_overrun = 0;
1521 }
1522
1523 /*
1524 * MCE second step: parse errors and display
1525 */
1526 for (i = 0; i < count; i++)
1527 sbridge_mce_output_error(mci, &pvt->mce_outentry[i]);
1528}
1529
1530/*
1531 * sbridge_mce_check_error Replicates mcelog routine to get errors
1532 * This routine simply queues mcelog errors, and
1533 * return. The error itself should be handled later
1534 * by sbridge_check_error.
1535 * WARNING: As this routine should be called at NMI time, extra care should
1536 * be taken to avoid deadlocks, and to be as fast as possible.
1537 */
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001538static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
1539 void *data)
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001540{
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001541 struct mce *mce = (struct mce *)data;
1542 struct mem_ctl_info *mci;
1543 struct sbridge_pvt *pvt;
1544
1545 mci = get_mci_for_node_id(mce->socketid);
1546 if (!mci)
1547 return NOTIFY_BAD;
1548 pvt = mci->pvt_info;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001549
1550 /*
1551 * Just let mcelog handle it if the error is
1552 * outside the memory controller. A memory error
1553 * is indicated by bit 7 = 1 and bits = 8-11,13-15 = 0.
1554 * bit 12 has an special meaning.
1555 */
1556 if ((mce->status & 0xefff) >> 7 != 1)
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001557 return NOTIFY_DONE;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001558
1559 printk("sbridge: HANDLING MCE MEMORY ERROR\n");
1560
1561 printk("CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n",
1562 mce->extcpu, mce->mcgstatus, mce->bank, mce->status);
1563 printk("TSC %llx ", mce->tsc);
1564 printk("ADDR %llx ", mce->addr);
1565 printk("MISC %llx ", mce->misc);
1566
1567 printk("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
1568 mce->cpuvendor, mce->cpuid, mce->time,
1569 mce->socketid, mce->apicid);
1570
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001571 /* Only handle if it is the right mc controller */
1572 if (cpu_data(mce->cpu).phys_proc_id != pvt->sbridge_dev->mc)
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001573 return NOTIFY_DONE;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001574
1575 smp_rmb();
1576 if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1577 smp_wmb();
1578 pvt->mce_overrun++;
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001579 return NOTIFY_DONE;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001580 }
1581
1582 /* Copy memory error at the ringbuffer */
1583 memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1584 smp_wmb();
1585 pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1586
1587 /* Handle fatal errors immediately */
1588 if (mce->mcgstatus & 1)
1589 sbridge_check_error(mci);
1590
1591 /* Advice mcelog that the error were handled */
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001592 return NOTIFY_STOP;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001593}
1594
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001595static struct notifier_block sbridge_mce_dec = {
1596 .notifier_call = sbridge_mce_check_error,
1597};
1598
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001599/****************************************************************************
1600 EDAC register/unregister logic
1601 ****************************************************************************/
1602
1603static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev)
1604{
1605 struct mem_ctl_info *mci = sbridge_dev->mci;
1606 struct sbridge_pvt *pvt;
1607
1608 if (unlikely(!mci || !mci->pvt_info)) {
Joe Perches956b9ba2012-04-29 17:08:39 -03001609 edac_dbg(0, "MC: dev = %p\n", &sbridge_dev->pdev[0]->dev);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001610
1611 sbridge_printk(KERN_ERR, "Couldn't find mci handler\n");
1612 return;
1613 }
1614
1615 pvt = mci->pvt_info;
1616
Joe Perches956b9ba2012-04-29 17:08:39 -03001617 edac_dbg(0, "MC: mci = %p, dev = %p\n",
1618 mci, &sbridge_dev->pdev[0]->dev);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001619
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001620 /* Remove MC sysfs nodes */
Mauro Carvalho Chehabfd687502012-03-16 07:44:18 -03001621 edac_mc_del_mc(mci->pdev);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001622
Joe Perches956b9ba2012-04-29 17:08:39 -03001623 edac_dbg(1, "%s: free mci struct\n", mci->ctl_name);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001624 kfree(mci->ctl_name);
1625 edac_mc_free(mci);
1626 sbridge_dev->mci = NULL;
1627}
1628
1629static int sbridge_register_mci(struct sbridge_dev *sbridge_dev)
1630{
1631 struct mem_ctl_info *mci;
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001632 struct edac_mc_layer layers[2];
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001633 struct sbridge_pvt *pvt;
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001634 int rc;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001635
1636 /* Check the number of active and not disabled channels */
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001637 rc = check_if_ecc_is_active(sbridge_dev->bus);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001638 if (unlikely(rc < 0))
1639 return rc;
1640
1641 /* allocate a new MC control structure */
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001642 layers[0].type = EDAC_MC_LAYER_CHANNEL;
1643 layers[0].size = NUM_CHANNELS;
1644 layers[0].is_virt_csrow = false;
1645 layers[1].type = EDAC_MC_LAYER_SLOT;
1646 layers[1].size = MAX_DIMMS;
1647 layers[1].is_virt_csrow = true;
Mauro Carvalho Chehabca0907b2012-05-02 14:37:00 -03001648 mci = edac_mc_alloc(sbridge_dev->mc, ARRAY_SIZE(layers), layers,
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001649 sizeof(*pvt));
1650
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001651 if (unlikely(!mci))
1652 return -ENOMEM;
1653
Joe Perches956b9ba2012-04-29 17:08:39 -03001654 edac_dbg(0, "MC: mci = %p, dev = %p\n",
1655 mci, &sbridge_dev->pdev[0]->dev);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001656
1657 pvt = mci->pvt_info;
1658 memset(pvt, 0, sizeof(*pvt));
1659
1660 /* Associate sbridge_dev and mci for future usage */
1661 pvt->sbridge_dev = sbridge_dev;
1662 sbridge_dev->mci = mci;
1663
1664 mci->mtype_cap = MEM_FLAG_DDR3;
1665 mci->edac_ctl_cap = EDAC_FLAG_NONE;
1666 mci->edac_cap = EDAC_FLAG_NONE;
1667 mci->mod_name = "sbridge_edac.c";
1668 mci->mod_ver = SBRIDGE_REVISION;
1669 mci->ctl_name = kasprintf(GFP_KERNEL, "Sandy Bridge Socket#%d", mci->mc_idx);
1670 mci->dev_name = pci_name(sbridge_dev->pdev[0]);
1671 mci->ctl_page_to_phys = NULL;
Aristeu Rozanskifb79a502013-10-30 13:26:57 -03001672 pvt->info.get_tolm = sbridge_get_tolm;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001673
1674 /* Set the function pointer to an actual operation function */
1675 mci->edac_check = sbridge_check_error;
1676
1677 /* Store pci devices at mci for faster access */
1678 rc = mci_bind_devs(mci, sbridge_dev);
1679 if (unlikely(rc < 0))
1680 goto fail0;
1681
1682 /* Get dimm basic config and the memory layout */
1683 get_dimm_config(mci);
1684 get_memory_layout(mci);
1685
1686 /* record ptr to the generic device */
Mauro Carvalho Chehabfd687502012-03-16 07:44:18 -03001687 mci->pdev = &sbridge_dev->pdev[0]->dev;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001688
1689 /* add this new MC control structure to EDAC's list of MCs */
1690 if (unlikely(edac_mc_add_mc(mci))) {
Joe Perches956b9ba2012-04-29 17:08:39 -03001691 edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001692 rc = -EINVAL;
1693 goto fail0;
1694 }
1695
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001696 return 0;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001697
1698fail0:
1699 kfree(mci->ctl_name);
1700 edac_mc_free(mci);
1701 sbridge_dev->mci = NULL;
1702 return rc;
1703}
1704
1705/*
1706 * sbridge_probe Probe for ONE instance of device to see if it is
1707 * present.
1708 * return:
1709 * 0 for FOUND a device
1710 * < 0 for error code
1711 */
1712
Greg Kroah-Hartman9b3c6e82012-12-21 13:23:51 -08001713static int sbridge_probe(struct pci_dev *pdev, const struct pci_device_id *id)
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001714{
1715 int rc;
1716 u8 mc, num_mc = 0;
1717 struct sbridge_dev *sbridge_dev;
1718
1719 /* get the pci devices we want to reserve for our use */
1720 mutex_lock(&sbridge_edac_lock);
1721
1722 /*
1723 * All memory controllers are allocated at the first pass.
1724 */
1725 if (unlikely(probed >= 1)) {
1726 mutex_unlock(&sbridge_edac_lock);
1727 return -ENODEV;
1728 }
1729 probed++;
1730
1731 rc = sbridge_get_all_devices(&num_mc);
1732 if (unlikely(rc < 0))
1733 goto fail0;
1734 mc = 0;
1735
1736 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
Joe Perches956b9ba2012-04-29 17:08:39 -03001737 edac_dbg(0, "Registering MC#%d (%d of %d)\n",
1738 mc, mc + 1, num_mc);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001739 sbridge_dev->mc = mc++;
1740 rc = sbridge_register_mci(sbridge_dev);
1741 if (unlikely(rc < 0))
1742 goto fail1;
1743 }
1744
1745 sbridge_printk(KERN_INFO, "Driver loaded.\n");
1746
1747 mutex_unlock(&sbridge_edac_lock);
1748 return 0;
1749
1750fail1:
1751 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list)
1752 sbridge_unregister_mci(sbridge_dev);
1753
1754 sbridge_put_all_devices();
1755fail0:
1756 mutex_unlock(&sbridge_edac_lock);
1757 return rc;
1758}
1759
1760/*
1761 * sbridge_remove destructor for one instance of device
1762 *
1763 */
Greg Kroah-Hartman9b3c6e82012-12-21 13:23:51 -08001764static void sbridge_remove(struct pci_dev *pdev)
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001765{
1766 struct sbridge_dev *sbridge_dev;
1767
Joe Perches956b9ba2012-04-29 17:08:39 -03001768 edac_dbg(0, "\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001769
1770 /*
1771 * we have a trouble here: pdev value for removal will be wrong, since
1772 * it will point to the X58 register used to detect that the machine
1773 * is a Nehalem or upper design. However, due to the way several PCI
1774 * devices are grouped together to provide MC functionality, we need
1775 * to use a different method for releasing the devices
1776 */
1777
1778 mutex_lock(&sbridge_edac_lock);
1779
1780 if (unlikely(!probed)) {
1781 mutex_unlock(&sbridge_edac_lock);
1782 return;
1783 }
1784
1785 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list)
1786 sbridge_unregister_mci(sbridge_dev);
1787
1788 /* Release PCI resources */
1789 sbridge_put_all_devices();
1790
1791 probed--;
1792
1793 mutex_unlock(&sbridge_edac_lock);
1794}
1795
1796MODULE_DEVICE_TABLE(pci, sbridge_pci_tbl);
1797
1798/*
1799 * sbridge_driver pci_driver structure for this module
1800 *
1801 */
1802static struct pci_driver sbridge_driver = {
1803 .name = "sbridge_edac",
1804 .probe = sbridge_probe,
Greg Kroah-Hartman9b3c6e82012-12-21 13:23:51 -08001805 .remove = sbridge_remove,
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001806 .id_table = sbridge_pci_tbl,
1807};
1808
1809/*
1810 * sbridge_init Module entry function
1811 * Try to initialize this module for its devices
1812 */
1813static int __init sbridge_init(void)
1814{
1815 int pci_rc;
1816
Joe Perches956b9ba2012-04-29 17:08:39 -03001817 edac_dbg(2, "\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001818
1819 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
1820 opstate_init();
1821
1822 pci_rc = pci_register_driver(&sbridge_driver);
1823
Chen Gonge35fca42012-05-08 20:40:12 -03001824 if (pci_rc >= 0) {
1825 mce_register_decode_chain(&sbridge_mce_dec);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001826 return 0;
Chen Gonge35fca42012-05-08 20:40:12 -03001827 }
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001828
1829 sbridge_printk(KERN_ERR, "Failed to register device with error %d.\n",
1830 pci_rc);
1831
1832 return pci_rc;
1833}
1834
1835/*
1836 * sbridge_exit() Module exit function
1837 * Unregister the driver
1838 */
1839static void __exit sbridge_exit(void)
1840{
Joe Perches956b9ba2012-04-29 17:08:39 -03001841 edac_dbg(2, "\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001842 pci_unregister_driver(&sbridge_driver);
Chen Gonge35fca42012-05-08 20:40:12 -03001843 mce_unregister_decode_chain(&sbridge_mce_dec);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001844}
1845
1846module_init(sbridge_init);
1847module_exit(sbridge_exit);
1848
1849module_param(edac_op_state, int, 0444);
1850MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
1851
1852MODULE_LICENSE("GPL");
1853MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
1854MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
1855MODULE_DESCRIPTION("MC Driver for Intel Sandy Bridge memory controllers - "
1856 SBRIDGE_REVISION);