blob: c42dec2325074f4915f7689629a8e40c5ef75fb6 [file] [log] [blame]
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001/* Intel Sandy Bridge -EN/-EP/-EX Memory Controller kernel module
2 *
3 * This driver supports the memory controllers found on the Intel
4 * processor family Sandy Bridge.
5 *
6 * This file may be distributed under the terms of the
7 * GNU General Public License version 2 only.
8 *
9 * Copyright (c) 2011 by:
10 * Mauro Carvalho Chehab <mchehab@redhat.com>
11 */
12
13#include <linux/module.h>
14#include <linux/init.h>
15#include <linux/pci.h>
16#include <linux/pci_ids.h>
17#include <linux/slab.h>
18#include <linux/delay.h>
19#include <linux/edac.h>
20#include <linux/mmzone.h>
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -020021#include <linux/smp.h>
22#include <linux/bitmap.h>
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -030023#include <linux/math64.h>
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -020024#include <asm/processor.h>
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -020025#include <asm/mce.h>
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -020026
27#include "edac_core.h"
28
29/* Static vars */
30static LIST_HEAD(sbridge_edac_list);
31static DEFINE_MUTEX(sbridge_edac_lock);
32static int probed;
33
34/*
35 * Alter this version for the module when modifications are made
36 */
37#define SBRIDGE_REVISION " Ver: 1.0.0 "
38#define EDAC_MOD_STR "sbridge_edac"
39
40/*
41 * Debug macros
42 */
43#define sbridge_printk(level, fmt, arg...) \
44 edac_printk(level, "sbridge", fmt, ##arg)
45
46#define sbridge_mc_printk(mci, level, fmt, arg...) \
47 edac_mc_chipset_printk(mci, level, "sbridge", fmt, ##arg)
48
49/*
50 * Get a bit field at register value <v>, from bit <lo> to bit <hi>
51 */
52#define GET_BITFIELD(v, lo, hi) \
53 (((v) & ((1ULL << ((hi) - (lo) + 1)) - 1) << (lo)) >> (lo))
54
55/*
56 * sbridge Memory Controller Registers
57 */
58
59/*
60 * FIXME: For now, let's order by device function, as it makes
David Mackey15ed1032012-04-17 11:30:52 -070061 * easier for driver's development process. This table should be
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -020062 * moved to pci_id.h when submitted upstream
63 */
64#define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0 0x3cf4 /* 12.6 */
65#define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1 0x3cf6 /* 12.7 */
66#define PCI_DEVICE_ID_INTEL_SBRIDGE_BR 0x3cf5 /* 13.6 */
67#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0 0x3ca0 /* 14.0 */
68#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA 0x3ca8 /* 15.0 */
69#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS 0x3c71 /* 15.1 */
70#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0 0x3caa /* 15.2 */
71#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1 0x3cab /* 15.3 */
72#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2 0x3cac /* 15.4 */
73#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3 0x3cad /* 15.5 */
74#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO 0x3cb8 /* 17.0 */
75
76 /*
77 * Currently, unused, but will be needed in the future
78 * implementations, as they hold the error counters
79 */
80#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR0 0x3c72 /* 16.2 */
81#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR1 0x3c73 /* 16.3 */
82#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR2 0x3c76 /* 16.6 */
83#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR3 0x3c77 /* 16.7 */
84
85/* Devices 12 Function 6, Offsets 0x80 to 0xcc */
Aristeu Rozanski464f1d82013-10-30 13:27:00 -030086static const u32 sbridge_dram_rule[] = {
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -020087 0x80, 0x88, 0x90, 0x98, 0xa0,
88 0xa8, 0xb0, 0xb8, 0xc0, 0xc8,
89};
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -020090
91#define SAD_LIMIT(reg) ((GET_BITFIELD(reg, 6, 25) << 26) | 0x3ffffff)
92#define DRAM_ATTR(reg) GET_BITFIELD(reg, 2, 3)
93#define INTERLEAVE_MODE(reg) GET_BITFIELD(reg, 1, 1)
94#define DRAM_RULE_ENABLE(reg) GET_BITFIELD(reg, 0, 0)
95
96static char *get_dram_attr(u32 reg)
97{
98 switch(DRAM_ATTR(reg)) {
99 case 0:
100 return "DRAM";
101 case 1:
102 return "MMCFG";
103 case 2:
104 return "NXM";
105 default:
106 return "unknown";
107 }
108}
109
110static const u32 interleave_list[] = {
111 0x84, 0x8c, 0x94, 0x9c, 0xa4,
112 0xac, 0xb4, 0xbc, 0xc4, 0xcc,
113};
114#define MAX_INTERLEAVE ARRAY_SIZE(interleave_list)
115
116#define SAD_PKG0(reg) GET_BITFIELD(reg, 0, 2)
117#define SAD_PKG1(reg) GET_BITFIELD(reg, 3, 5)
118#define SAD_PKG2(reg) GET_BITFIELD(reg, 8, 10)
119#define SAD_PKG3(reg) GET_BITFIELD(reg, 11, 13)
120#define SAD_PKG4(reg) GET_BITFIELD(reg, 16, 18)
121#define SAD_PKG5(reg) GET_BITFIELD(reg, 19, 21)
122#define SAD_PKG6(reg) GET_BITFIELD(reg, 24, 26)
123#define SAD_PKG7(reg) GET_BITFIELD(reg, 27, 29)
124
125static inline int sad_pkg(u32 reg, int interleave)
126{
127 switch (interleave) {
128 case 0:
129 return SAD_PKG0(reg);
130 case 1:
131 return SAD_PKG1(reg);
132 case 2:
133 return SAD_PKG2(reg);
134 case 3:
135 return SAD_PKG3(reg);
136 case 4:
137 return SAD_PKG4(reg);
138 case 5:
139 return SAD_PKG5(reg);
140 case 6:
141 return SAD_PKG6(reg);
142 case 7:
143 return SAD_PKG7(reg);
144 default:
145 return -EINVAL;
146 }
147}
148
149/* Devices 12 Function 7 */
150
151#define TOLM 0x80
152#define TOHM 0x84
153
154#define GET_TOLM(reg) ((GET_BITFIELD(reg, 0, 3) << 28) | 0x3ffffff)
155#define GET_TOHM(reg) ((GET_BITFIELD(reg, 0, 20) << 25) | 0x3ffffff)
156
157/* Device 13 Function 6 */
158
159#define SAD_TARGET 0xf0
160
161#define SOURCE_ID(reg) GET_BITFIELD(reg, 9, 11)
162
163#define SAD_CONTROL 0xf4
164
165#define NODE_ID(reg) GET_BITFIELD(reg, 0, 2)
166
167/* Device 14 function 0 */
168
169static const u32 tad_dram_rule[] = {
170 0x40, 0x44, 0x48, 0x4c,
171 0x50, 0x54, 0x58, 0x5c,
172 0x60, 0x64, 0x68, 0x6c,
173};
174#define MAX_TAD ARRAY_SIZE(tad_dram_rule)
175
176#define TAD_LIMIT(reg) ((GET_BITFIELD(reg, 12, 31) << 26) | 0x3ffffff)
177#define TAD_SOCK(reg) GET_BITFIELD(reg, 10, 11)
178#define TAD_CH(reg) GET_BITFIELD(reg, 8, 9)
179#define TAD_TGT3(reg) GET_BITFIELD(reg, 6, 7)
180#define TAD_TGT2(reg) GET_BITFIELD(reg, 4, 5)
181#define TAD_TGT1(reg) GET_BITFIELD(reg, 2, 3)
182#define TAD_TGT0(reg) GET_BITFIELD(reg, 0, 1)
183
184/* Device 15, function 0 */
185
186#define MCMTR 0x7c
187
188#define IS_ECC_ENABLED(mcmtr) GET_BITFIELD(mcmtr, 2, 2)
189#define IS_LOCKSTEP_ENABLED(mcmtr) GET_BITFIELD(mcmtr, 1, 1)
190#define IS_CLOSE_PG(mcmtr) GET_BITFIELD(mcmtr, 0, 0)
191
192/* Device 15, function 1 */
193
194#define RASENABLES 0xac
195#define IS_MIRROR_ENABLED(reg) GET_BITFIELD(reg, 0, 0)
196
197/* Device 15, functions 2-5 */
198
199static const int mtr_regs[] = {
200 0x80, 0x84, 0x88,
201};
202
203#define RANK_DISABLE(mtr) GET_BITFIELD(mtr, 16, 19)
204#define IS_DIMM_PRESENT(mtr) GET_BITFIELD(mtr, 14, 14)
205#define RANK_CNT_BITS(mtr) GET_BITFIELD(mtr, 12, 13)
206#define RANK_WIDTH_BITS(mtr) GET_BITFIELD(mtr, 2, 4)
207#define COL_WIDTH_BITS(mtr) GET_BITFIELD(mtr, 0, 1)
208
209static const u32 tad_ch_nilv_offset[] = {
210 0x90, 0x94, 0x98, 0x9c,
211 0xa0, 0xa4, 0xa8, 0xac,
212 0xb0, 0xb4, 0xb8, 0xbc,
213};
214#define CHN_IDX_OFFSET(reg) GET_BITFIELD(reg, 28, 29)
215#define TAD_OFFSET(reg) (GET_BITFIELD(reg, 6, 25) << 26)
216
217static const u32 rir_way_limit[] = {
218 0x108, 0x10c, 0x110, 0x114, 0x118,
219};
220#define MAX_RIR_RANGES ARRAY_SIZE(rir_way_limit)
221
222#define IS_RIR_VALID(reg) GET_BITFIELD(reg, 31, 31)
223#define RIR_WAY(reg) GET_BITFIELD(reg, 28, 29)
224#define RIR_LIMIT(reg) ((GET_BITFIELD(reg, 1, 10) << 29)| 0x1fffffff)
225
226#define MAX_RIR_WAY 8
227
228static const u32 rir_offset[MAX_RIR_RANGES][MAX_RIR_WAY] = {
229 { 0x120, 0x124, 0x128, 0x12c, 0x130, 0x134, 0x138, 0x13c },
230 { 0x140, 0x144, 0x148, 0x14c, 0x150, 0x154, 0x158, 0x15c },
231 { 0x160, 0x164, 0x168, 0x16c, 0x170, 0x174, 0x178, 0x17c },
232 { 0x180, 0x184, 0x188, 0x18c, 0x190, 0x194, 0x198, 0x19c },
233 { 0x1a0, 0x1a4, 0x1a8, 0x1ac, 0x1b0, 0x1b4, 0x1b8, 0x1bc },
234};
235
236#define RIR_RNK_TGT(reg) GET_BITFIELD(reg, 16, 19)
237#define RIR_OFFSET(reg) GET_BITFIELD(reg, 2, 14)
238
239/* Device 16, functions 2-7 */
240
241/*
242 * FIXME: Implement the error count reads directly
243 */
244
245static const u32 correrrcnt[] = {
246 0x104, 0x108, 0x10c, 0x110,
247};
248
249#define RANK_ODD_OV(reg) GET_BITFIELD(reg, 31, 31)
250#define RANK_ODD_ERR_CNT(reg) GET_BITFIELD(reg, 16, 30)
251#define RANK_EVEN_OV(reg) GET_BITFIELD(reg, 15, 15)
252#define RANK_EVEN_ERR_CNT(reg) GET_BITFIELD(reg, 0, 14)
253
254static const u32 correrrthrsld[] = {
255 0x11c, 0x120, 0x124, 0x128,
256};
257
258#define RANK_ODD_ERR_THRSLD(reg) GET_BITFIELD(reg, 16, 30)
259#define RANK_EVEN_ERR_THRSLD(reg) GET_BITFIELD(reg, 0, 14)
260
261
262/* Device 17, function 0 */
263
Aristeu Rozanskief1e8d02013-10-30 13:26:56 -0300264#define SB_RANK_CFG_A 0x0328
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200265
266#define IS_RDIMM_ENABLED(reg) GET_BITFIELD(reg, 11, 11)
267
268/*
269 * sbridge structs
270 */
271
272#define NUM_CHANNELS 4
273#define MAX_DIMMS 3 /* Max DIMMS per channel */
274
Aristeu Rozanskifb79a502013-10-30 13:26:57 -0300275struct sbridge_pvt;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200276struct sbridge_info {
Aristeu Rozanski464f1d82013-10-30 13:27:00 -0300277 u32 mcmtr;
278 u32 rankcfgr;
279 u64 (*get_tolm)(struct sbridge_pvt *pvt);
280 u64 (*get_tohm)(struct sbridge_pvt *pvt);
281 const u32 *dram_rule;
282 u8 max_sad;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200283};
284
285struct sbridge_channel {
286 u32 ranks;
287 u32 dimms;
288};
289
290struct pci_id_descr {
291 int dev;
292 int func;
293 int dev_id;
294 int optional;
295};
296
297struct pci_id_table {
298 const struct pci_id_descr *descr;
299 int n_devs;
300};
301
302struct sbridge_dev {
303 struct list_head list;
304 u8 bus, mc;
305 u8 node_id, source_id;
306 struct pci_dev **pdev;
307 int n_devs;
308 struct mem_ctl_info *mci;
309};
310
311struct sbridge_pvt {
312 struct pci_dev *pci_ta, *pci_ddrio, *pci_ras;
313 struct pci_dev *pci_sad0, *pci_sad1, *pci_ha0;
Aristeu Rozanski5f8a1b82013-10-30 13:26:58 -0300314 struct pci_dev *pci_br0;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200315 struct pci_dev *pci_tad[NUM_CHANNELS];
316
317 struct sbridge_dev *sbridge_dev;
318
319 struct sbridge_info info;
320 struct sbridge_channel channel[NUM_CHANNELS];
321
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200322 /* Memory type detection */
323 bool is_mirrored, is_lockstep, is_close_pg;
324
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200325 /* Fifo double buffers */
326 struct mce mce_entry[MCE_LOG_LEN];
327 struct mce mce_outentry[MCE_LOG_LEN];
328
329 /* Fifo in/out counters */
330 unsigned mce_in, mce_out;
331
332 /* Count indicator to show errors not got */
333 unsigned mce_overrun;
334
335 /* Memory description */
336 u64 tolm, tohm;
337};
338
Luck, Tonyde4772c2013-03-28 09:59:15 -0700339#define PCI_DESCR(device, function, device_id, opt) \
340 .dev = (device), \
341 .func = (function), \
342 .dev_id = (device_id), \
343 .optional = opt
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200344
345static const struct pci_id_descr pci_dev_descr_sbridge[] = {
346 /* Processor Home Agent */
Luck, Tonyde4772c2013-03-28 09:59:15 -0700347 { PCI_DESCR(14, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0, 0) },
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200348
349 /* Memory controller */
Luck, Tonyde4772c2013-03-28 09:59:15 -0700350 { PCI_DESCR(15, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA, 0) },
351 { PCI_DESCR(15, 1, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS, 0) },
352 { PCI_DESCR(15, 2, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0, 0) },
353 { PCI_DESCR(15, 3, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1, 0) },
354 { PCI_DESCR(15, 4, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2, 0) },
355 { PCI_DESCR(15, 5, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3, 0) },
356 { PCI_DESCR(17, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO, 1) },
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200357
358 /* System Address Decoder */
Luck, Tonyde4772c2013-03-28 09:59:15 -0700359 { PCI_DESCR(12, 6, PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0, 0) },
360 { PCI_DESCR(12, 7, PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1, 0) },
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200361
362 /* Broadcast Registers */
Luck, Tonyde4772c2013-03-28 09:59:15 -0700363 { PCI_DESCR(13, 6, PCI_DEVICE_ID_INTEL_SBRIDGE_BR, 0) },
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200364};
365
366#define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
367static const struct pci_id_table pci_dev_descr_sbridge_table[] = {
368 PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge),
369 {0,} /* 0 terminated list. */
370};
371
372/*
373 * pci_device_id table for which devices we are looking for
374 */
Lionel Debroux36c46f32012-02-27 07:41:47 +0100375static DEFINE_PCI_DEVICE_TABLE(sbridge_pci_tbl) = {
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200376 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA)},
377 {0,} /* 0 terminated list. */
378};
379
380
381/****************************************************************************
David Mackey15ed1032012-04-17 11:30:52 -0700382 Ancillary status routines
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200383 ****************************************************************************/
384
385static inline int numrank(u32 mtr)
386{
387 int ranks = (1 << RANK_CNT_BITS(mtr));
388
389 if (ranks > 4) {
Joe Perches956b9ba12012-04-29 17:08:39 -0300390 edac_dbg(0, "Invalid number of ranks: %d (max = 4) raw value = %x (%04x)\n",
391 ranks, (unsigned int)RANK_CNT_BITS(mtr), mtr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200392 return -EINVAL;
393 }
394
395 return ranks;
396}
397
398static inline int numrow(u32 mtr)
399{
400 int rows = (RANK_WIDTH_BITS(mtr) + 12);
401
402 if (rows < 13 || rows > 18) {
Joe Perches956b9ba12012-04-29 17:08:39 -0300403 edac_dbg(0, "Invalid number of rows: %d (should be between 14 and 17) raw value = %x (%04x)\n",
404 rows, (unsigned int)RANK_WIDTH_BITS(mtr), mtr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200405 return -EINVAL;
406 }
407
408 return 1 << rows;
409}
410
411static inline int numcol(u32 mtr)
412{
413 int cols = (COL_WIDTH_BITS(mtr) + 10);
414
415 if (cols > 12) {
Joe Perches956b9ba12012-04-29 17:08:39 -0300416 edac_dbg(0, "Invalid number of cols: %d (max = 4) raw value = %x (%04x)\n",
417 cols, (unsigned int)COL_WIDTH_BITS(mtr), mtr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200418 return -EINVAL;
419 }
420
421 return 1 << cols;
422}
423
424static struct sbridge_dev *get_sbridge_dev(u8 bus)
425{
426 struct sbridge_dev *sbridge_dev;
427
428 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
429 if (sbridge_dev->bus == bus)
430 return sbridge_dev;
431 }
432
433 return NULL;
434}
435
436static struct sbridge_dev *alloc_sbridge_dev(u8 bus,
437 const struct pci_id_table *table)
438{
439 struct sbridge_dev *sbridge_dev;
440
441 sbridge_dev = kzalloc(sizeof(*sbridge_dev), GFP_KERNEL);
442 if (!sbridge_dev)
443 return NULL;
444
445 sbridge_dev->pdev = kzalloc(sizeof(*sbridge_dev->pdev) * table->n_devs,
446 GFP_KERNEL);
447 if (!sbridge_dev->pdev) {
448 kfree(sbridge_dev);
449 return NULL;
450 }
451
452 sbridge_dev->bus = bus;
453 sbridge_dev->n_devs = table->n_devs;
454 list_add_tail(&sbridge_dev->list, &sbridge_edac_list);
455
456 return sbridge_dev;
457}
458
459static void free_sbridge_dev(struct sbridge_dev *sbridge_dev)
460{
461 list_del(&sbridge_dev->list);
462 kfree(sbridge_dev->pdev);
463 kfree(sbridge_dev);
464}
465
Aristeu Rozanskifb79a502013-10-30 13:26:57 -0300466static u64 sbridge_get_tolm(struct sbridge_pvt *pvt)
467{
468 u32 reg;
469
470 /* Address range is 32:28 */
471 pci_read_config_dword(pvt->pci_sad1, TOLM, &reg);
472 return GET_TOLM(reg);
473}
474
Aristeu Rozanski8fd6a432013-10-30 13:26:59 -0300475static u64 sbridge_get_tohm(struct sbridge_pvt *pvt)
476{
477 u32 reg;
478
479 pci_read_config_dword(pvt->pci_sad1, TOHM, &reg);
480 return GET_TOHM(reg);
481}
482
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200483/****************************************************************************
484 Memory check routines
485 ****************************************************************************/
486static struct pci_dev *get_pdev_slot_func(u8 bus, unsigned slot,
487 unsigned func)
488{
489 struct sbridge_dev *sbridge_dev = get_sbridge_dev(bus);
490 int i;
491
492 if (!sbridge_dev)
493 return NULL;
494
495 for (i = 0; i < sbridge_dev->n_devs; i++) {
496 if (!sbridge_dev->pdev[i])
497 continue;
498
499 if (PCI_SLOT(sbridge_dev->pdev[i]->devfn) == slot &&
500 PCI_FUNC(sbridge_dev->pdev[i]->devfn) == func) {
Joe Perches956b9ba12012-04-29 17:08:39 -0300501 edac_dbg(1, "Associated %02x.%02x.%d with %p\n",
502 bus, slot, func, sbridge_dev->pdev[i]);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200503 return sbridge_dev->pdev[i];
504 }
505 }
506
507 return NULL;
508}
509
510/**
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -0300511 * check_if_ecc_is_active() - Checks if ECC is active
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200512 * bus: Device bus
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200513 */
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -0300514static int check_if_ecc_is_active(const u8 bus)
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200515{
516 struct pci_dev *pdev = NULL;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200517 u32 mcmtr;
518
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200519 pdev = get_pdev_slot_func(bus, 15, 0);
520 if (!pdev) {
521 sbridge_printk(KERN_ERR, "Couldn't find PCI device "
522 "%2x.%02d.%d!!!\n",
523 bus, 15, 0);
524 return -ENODEV;
525 }
526
527 pci_read_config_dword(pdev, MCMTR, &mcmtr);
528 if (!IS_ECC_ENABLED(mcmtr)) {
529 sbridge_printk(KERN_ERR, "ECC is disabled. Aborting\n");
530 return -ENODEV;
531 }
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200532 return 0;
533}
534
Mauro Carvalho Chehab084a4fc2012-01-27 18:38:08 -0300535static int get_dimm_config(struct mem_ctl_info *mci)
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200536{
537 struct sbridge_pvt *pvt = mci->pvt_info;
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -0300538 struct dimm_info *dimm;
Mauro Carvalho Chehabdeb09dd2012-09-20 12:09:30 -0300539 unsigned i, j, banks, ranks, rows, cols, npages;
540 u64 size;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200541 u32 reg;
542 enum edac_type mode;
Mark A. Grondonac6e13b52011-10-18 11:02:58 -0200543 enum mem_type mtype;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200544
Aristeu Rozanskief1e8d02013-10-30 13:26:56 -0300545 pvt->info.rankcfgr = SB_RANK_CFG_A;
546
Aristeu Rozanski5f8a1b82013-10-30 13:26:58 -0300547 pci_read_config_dword(pvt->pci_br0, SAD_TARGET, &reg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200548 pvt->sbridge_dev->source_id = SOURCE_ID(reg);
549
Aristeu Rozanski5f8a1b82013-10-30 13:26:58 -0300550 pci_read_config_dword(pvt->pci_br0, SAD_CONTROL, &reg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200551 pvt->sbridge_dev->node_id = NODE_ID(reg);
Joe Perches956b9ba12012-04-29 17:08:39 -0300552 edac_dbg(0, "mc#%d: Node ID: %d, source ID: %d\n",
553 pvt->sbridge_dev->mc,
554 pvt->sbridge_dev->node_id,
555 pvt->sbridge_dev->source_id);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200556
557 pci_read_config_dword(pvt->pci_ras, RASENABLES, &reg);
558 if (IS_MIRROR_ENABLED(reg)) {
Joe Perches956b9ba12012-04-29 17:08:39 -0300559 edac_dbg(0, "Memory mirror is enabled\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200560 pvt->is_mirrored = true;
561 } else {
Joe Perches956b9ba12012-04-29 17:08:39 -0300562 edac_dbg(0, "Memory mirror is disabled\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200563 pvt->is_mirrored = false;
564 }
565
566 pci_read_config_dword(pvt->pci_ta, MCMTR, &pvt->info.mcmtr);
567 if (IS_LOCKSTEP_ENABLED(pvt->info.mcmtr)) {
Joe Perches956b9ba12012-04-29 17:08:39 -0300568 edac_dbg(0, "Lockstep is enabled\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200569 mode = EDAC_S8ECD8ED;
570 pvt->is_lockstep = true;
571 } else {
Joe Perches956b9ba12012-04-29 17:08:39 -0300572 edac_dbg(0, "Lockstep is disabled\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200573 mode = EDAC_S4ECD4ED;
574 pvt->is_lockstep = false;
575 }
576 if (IS_CLOSE_PG(pvt->info.mcmtr)) {
Joe Perches956b9ba12012-04-29 17:08:39 -0300577 edac_dbg(0, "address map is on closed page mode\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200578 pvt->is_close_pg = true;
579 } else {
Joe Perches956b9ba12012-04-29 17:08:39 -0300580 edac_dbg(0, "address map is on open page mode\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200581 pvt->is_close_pg = false;
582 }
583
Luck, Tonyde4772c2013-03-28 09:59:15 -0700584 if (pvt->pci_ddrio) {
Aristeu Rozanskief1e8d02013-10-30 13:26:56 -0300585 pci_read_config_dword(pvt->pci_ddrio, pvt->info.rankcfgr,
586 &reg);
Luck, Tonyde4772c2013-03-28 09:59:15 -0700587 if (IS_RDIMM_ENABLED(reg)) {
588 /* FIXME: Can also be LRDIMM */
589 edac_dbg(0, "Memory is registered\n");
590 mtype = MEM_RDDR3;
591 } else {
592 edac_dbg(0, "Memory is unregistered\n");
593 mtype = MEM_DDR3;
594 }
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200595 } else {
Luck, Tonyde4772c2013-03-28 09:59:15 -0700596 edac_dbg(0, "Cannot determine memory type\n");
597 mtype = MEM_UNKNOWN;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200598 }
599
600 /* On all supported DDR3 DIMM types, there are 8 banks available */
601 banks = 8;
602
603 for (i = 0; i < NUM_CHANNELS; i++) {
604 u32 mtr;
605
606 for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) {
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -0300607 dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
608 i, j, 0);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200609 pci_read_config_dword(pvt->pci_tad[i],
610 mtr_regs[j], &mtr);
Joe Perches956b9ba12012-04-29 17:08:39 -0300611 edac_dbg(4, "Channel #%d MTR%d = %x\n", i, j, mtr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200612 if (IS_DIMM_PRESENT(mtr)) {
613 pvt->channel[i].dimms++;
614
615 ranks = numrank(mtr);
616 rows = numrow(mtr);
617 cols = numcol(mtr);
618
619 /* DDR3 has 8 I/O banks */
Mauro Carvalho Chehabdeb09dd2012-09-20 12:09:30 -0300620 size = ((u64)rows * cols * banks * ranks) >> (20 - 3);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200621 npages = MiB_TO_PAGES(size);
622
Mauro Carvalho Chehabdeb09dd2012-09-20 12:09:30 -0300623 edac_dbg(0, "mc#%d: channel %d, dimm %d, %Ld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
Joe Perches956b9ba12012-04-29 17:08:39 -0300624 pvt->sbridge_dev->mc, i, j,
625 size, npages,
626 banks, ranks, rows, cols);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200627
Mauro Carvalho Chehaba895bf82012-01-28 09:09:38 -0300628 dimm->nr_pages = npages;
Mauro Carvalho Chehab084a4fc2012-01-27 18:38:08 -0300629 dimm->grain = 32;
630 dimm->dtype = (banks == 8) ? DEV_X8 : DEV_X4;
631 dimm->mtype = mtype;
632 dimm->edac_mode = mode;
633 snprintf(dimm->label, sizeof(dimm->label),
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200634 "CPU_SrcID#%u_Channel#%u_DIMM#%u",
635 pvt->sbridge_dev->source_id, i, j);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200636 }
637 }
638 }
639
640 return 0;
641}
642
643static void get_memory_layout(const struct mem_ctl_info *mci)
644{
645 struct sbridge_pvt *pvt = mci->pvt_info;
646 int i, j, k, n_sads, n_tads, sad_interl;
647 u32 reg;
648 u64 limit, prv = 0;
649 u64 tmp_mb;
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300650 u32 mb, kb;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200651 u32 rir_way;
652
653 /*
654 * Step 1) Get TOLM/TOHM ranges
655 */
656
Aristeu Rozanskifb79a502013-10-30 13:26:57 -0300657 pvt->tolm = pvt->info.get_tolm(pvt);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200658 tmp_mb = (1 + pvt->tolm) >> 20;
659
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300660 mb = div_u64_rem(tmp_mb, 1000, &kb);
Joe Perches956b9ba12012-04-29 17:08:39 -0300661 edac_dbg(0, "TOLM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tolm);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200662
663 /* Address range is already 45:25 */
Aristeu Rozanski8fd6a432013-10-30 13:26:59 -0300664 pvt->tohm = pvt->info.get_tohm(pvt);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200665 tmp_mb = (1 + pvt->tohm) >> 20;
666
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300667 mb = div_u64_rem(tmp_mb, 1000, &kb);
Mauro Carvalho Chehabda14d932012-10-25 09:07:21 -0200668 edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tohm);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200669
670 /*
671 * Step 2) Get SAD range and SAD Interleave list
672 * TAD registers contain the interleave wayness. However, it
673 * seems simpler to just discover it indirectly, with the
674 * algorithm bellow.
675 */
676 prv = 0;
Aristeu Rozanski464f1d82013-10-30 13:27:00 -0300677 for (n_sads = 0; n_sads < pvt->info.max_sad; n_sads++) {
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200678 /* SAD_LIMIT Address range is 45:26 */
Aristeu Rozanski464f1d82013-10-30 13:27:00 -0300679 pci_read_config_dword(pvt->pci_sad0, pvt->info.dram_rule[n_sads],
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200680 &reg);
681 limit = SAD_LIMIT(reg);
682
683 if (!DRAM_RULE_ENABLE(reg))
684 continue;
685
686 if (limit <= prv)
687 break;
688
689 tmp_mb = (limit + 1) >> 20;
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300690 mb = div_u64_rem(tmp_mb, 1000, &kb);
Joe Perches956b9ba12012-04-29 17:08:39 -0300691 edac_dbg(0, "SAD#%d %s up to %u.%03u GB (0x%016Lx) Interleave: %s reg=0x%08x\n",
692 n_sads,
693 get_dram_attr(reg),
694 mb, kb,
695 ((u64)tmp_mb) << 20L,
696 INTERLEAVE_MODE(reg) ? "8:6" : "[8:6]XOR[18:16]",
697 reg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200698 prv = limit;
699
700 pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads],
701 &reg);
702 sad_interl = sad_pkg(reg, 0);
703 for (j = 0; j < 8; j++) {
704 if (j > 0 && sad_interl == sad_pkg(reg, j))
705 break;
706
Joe Perches956b9ba12012-04-29 17:08:39 -0300707 edac_dbg(0, "SAD#%d, interleave #%d: %d\n",
708 n_sads, j, sad_pkg(reg, j));
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200709 }
710 }
711
712 /*
713 * Step 3) Get TAD range
714 */
715 prv = 0;
716 for (n_tads = 0; n_tads < MAX_TAD; n_tads++) {
717 pci_read_config_dword(pvt->pci_ha0, tad_dram_rule[n_tads],
718 &reg);
719 limit = TAD_LIMIT(reg);
720 if (limit <= prv)
721 break;
722 tmp_mb = (limit + 1) >> 20;
723
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300724 mb = div_u64_rem(tmp_mb, 1000, &kb);
Joe Perches956b9ba12012-04-29 17:08:39 -0300725 edac_dbg(0, "TAD#%d: up to %u.%03u GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n",
726 n_tads, mb, kb,
727 ((u64)tmp_mb) << 20L,
728 (u32)TAD_SOCK(reg),
729 (u32)TAD_CH(reg),
730 (u32)TAD_TGT0(reg),
731 (u32)TAD_TGT1(reg),
732 (u32)TAD_TGT2(reg),
733 (u32)TAD_TGT3(reg),
734 reg);
Hui Wang7fae0db2012-02-06 04:11:01 -0300735 prv = limit;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200736 }
737
738 /*
739 * Step 4) Get TAD offsets, per each channel
740 */
741 for (i = 0; i < NUM_CHANNELS; i++) {
742 if (!pvt->channel[i].dimms)
743 continue;
744 for (j = 0; j < n_tads; j++) {
745 pci_read_config_dword(pvt->pci_tad[i],
746 tad_ch_nilv_offset[j],
747 &reg);
748 tmp_mb = TAD_OFFSET(reg) >> 20;
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300749 mb = div_u64_rem(tmp_mb, 1000, &kb);
Joe Perches956b9ba12012-04-29 17:08:39 -0300750 edac_dbg(0, "TAD CH#%d, offset #%d: %u.%03u GB (0x%016Lx), reg=0x%08x\n",
751 i, j,
752 mb, kb,
753 ((u64)tmp_mb) << 20L,
754 reg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200755 }
756 }
757
758 /*
759 * Step 6) Get RIR Wayness/Limit, per each channel
760 */
761 for (i = 0; i < NUM_CHANNELS; i++) {
762 if (!pvt->channel[i].dimms)
763 continue;
764 for (j = 0; j < MAX_RIR_RANGES; j++) {
765 pci_read_config_dword(pvt->pci_tad[i],
766 rir_way_limit[j],
767 &reg);
768
769 if (!IS_RIR_VALID(reg))
770 continue;
771
772 tmp_mb = RIR_LIMIT(reg) >> 20;
773 rir_way = 1 << RIR_WAY(reg);
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300774 mb = div_u64_rem(tmp_mb, 1000, &kb);
Joe Perches956b9ba12012-04-29 17:08:39 -0300775 edac_dbg(0, "CH#%d RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d, reg=0x%08x\n",
776 i, j,
777 mb, kb,
778 ((u64)tmp_mb) << 20L,
779 rir_way,
780 reg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200781
782 for (k = 0; k < rir_way; k++) {
783 pci_read_config_dword(pvt->pci_tad[i],
784 rir_offset[j][k],
785 &reg);
786 tmp_mb = RIR_OFFSET(reg) << 6;
787
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300788 mb = div_u64_rem(tmp_mb, 1000, &kb);
Joe Perches956b9ba12012-04-29 17:08:39 -0300789 edac_dbg(0, "CH#%d RIR#%d INTL#%d, offset %u.%03u GB (0x%016Lx), tgt: %d, reg=0x%08x\n",
790 i, j, k,
791 mb, kb,
792 ((u64)tmp_mb) << 20L,
793 (u32)RIR_RNK_TGT(reg),
794 reg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200795 }
796 }
797 }
798}
799
800struct mem_ctl_info *get_mci_for_node_id(u8 node_id)
801{
802 struct sbridge_dev *sbridge_dev;
803
804 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
805 if (sbridge_dev->node_id == node_id)
806 return sbridge_dev->mci;
807 }
808 return NULL;
809}
810
811static int get_memory_error_data(struct mem_ctl_info *mci,
812 u64 addr,
813 u8 *socket,
814 long *channel_mask,
815 u8 *rank,
Mauro Carvalho Chehabe17a2f42a2012-05-11 11:41:45 -0300816 char **area_type, char *msg)
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200817{
818 struct mem_ctl_info *new_mci;
819 struct sbridge_pvt *pvt = mci->pvt_info;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200820 int n_rir, n_sads, n_tads, sad_way, sck_xch;
821 int sad_interl, idx, base_ch;
822 int interleave_mode;
823 unsigned sad_interleave[MAX_INTERLEAVE];
824 u32 reg;
825 u8 ch_way,sck_way;
826 u32 tad_offset;
827 u32 rir_way;
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300828 u32 mb, kb;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200829 u64 ch_addr, offset, limit, prv = 0;
830
831
832 /*
833 * Step 0) Check if the address is at special memory ranges
834 * The check bellow is probably enough to fill all cases where
835 * the error is not inside a memory, except for the legacy
836 * range (e. g. VGA addresses). It is unlikely, however, that the
837 * memory controller would generate an error on that range.
838 */
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300839 if ((addr > (u64) pvt->tolm) && (addr < (1LL << 32))) {
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200840 sprintf(msg, "Error at TOLM area, on addr 0x%08Lx", addr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200841 return -EINVAL;
842 }
843 if (addr >= (u64)pvt->tohm) {
844 sprintf(msg, "Error at MMIOH area, on addr 0x%016Lx", addr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200845 return -EINVAL;
846 }
847
848 /*
849 * Step 1) Get socket
850 */
Aristeu Rozanski464f1d82013-10-30 13:27:00 -0300851 for (n_sads = 0; n_sads < pvt->info.max_sad; n_sads++) {
852 pci_read_config_dword(pvt->pci_sad0, pvt->info.dram_rule[n_sads],
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200853 &reg);
854
855 if (!DRAM_RULE_ENABLE(reg))
856 continue;
857
858 limit = SAD_LIMIT(reg);
859 if (limit <= prv) {
860 sprintf(msg, "Can't discover the memory socket");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200861 return -EINVAL;
862 }
863 if (addr <= limit)
864 break;
865 prv = limit;
866 }
Aristeu Rozanski464f1d82013-10-30 13:27:00 -0300867 if (n_sads == pvt->info.max_sad) {
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200868 sprintf(msg, "Can't discover the memory socket");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200869 return -EINVAL;
870 }
Mauro Carvalho Chehabe17a2f42a2012-05-11 11:41:45 -0300871 *area_type = get_dram_attr(reg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200872 interleave_mode = INTERLEAVE_MODE(reg);
873
874 pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads],
875 &reg);
876 sad_interl = sad_pkg(reg, 0);
877 for (sad_way = 0; sad_way < 8; sad_way++) {
878 if (sad_way > 0 && sad_interl == sad_pkg(reg, sad_way))
879 break;
880 sad_interleave[sad_way] = sad_pkg(reg, sad_way);
Joe Perches956b9ba12012-04-29 17:08:39 -0300881 edac_dbg(0, "SAD interleave #%d: %d\n",
882 sad_way, sad_interleave[sad_way]);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200883 }
Joe Perches956b9ba12012-04-29 17:08:39 -0300884 edac_dbg(0, "mc#%d: Error detected on SAD#%d: address 0x%016Lx < 0x%016Lx, Interleave [%d:6]%s\n",
885 pvt->sbridge_dev->mc,
886 n_sads,
887 addr,
888 limit,
889 sad_way + 7,
890 interleave_mode ? "" : "XOR[18:16]");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200891 if (interleave_mode)
892 idx = ((addr >> 6) ^ (addr >> 16)) & 7;
893 else
894 idx = (addr >> 6) & 7;
895 switch (sad_way) {
896 case 1:
897 idx = 0;
898 break;
899 case 2:
900 idx = idx & 1;
901 break;
902 case 4:
903 idx = idx & 3;
904 break;
905 case 8:
906 break;
907 default:
908 sprintf(msg, "Can't discover socket interleave");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200909 return -EINVAL;
910 }
911 *socket = sad_interleave[idx];
Joe Perches956b9ba12012-04-29 17:08:39 -0300912 edac_dbg(0, "SAD interleave index: %d (wayness %d) = CPU socket %d\n",
913 idx, sad_way, *socket);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200914
915 /*
916 * Move to the proper node structure, in order to access the
917 * right PCI registers
918 */
919 new_mci = get_mci_for_node_id(*socket);
920 if (!new_mci) {
921 sprintf(msg, "Struct for socket #%u wasn't initialized",
922 *socket);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200923 return -EINVAL;
924 }
925 mci = new_mci;
926 pvt = mci->pvt_info;
927
928 /*
929 * Step 2) Get memory channel
930 */
931 prv = 0;
932 for (n_tads = 0; n_tads < MAX_TAD; n_tads++) {
933 pci_read_config_dword(pvt->pci_ha0, tad_dram_rule[n_tads],
934 &reg);
935 limit = TAD_LIMIT(reg);
936 if (limit <= prv) {
937 sprintf(msg, "Can't discover the memory channel");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200938 return -EINVAL;
939 }
940 if (addr <= limit)
941 break;
942 prv = limit;
943 }
944 ch_way = TAD_CH(reg) + 1;
945 sck_way = TAD_SOCK(reg) + 1;
946 /*
947 * FIXME: Is it right to always use channel 0 for offsets?
948 */
949 pci_read_config_dword(pvt->pci_tad[0],
950 tad_ch_nilv_offset[n_tads],
951 &tad_offset);
952
953 if (ch_way == 3)
954 idx = addr >> 6;
955 else
956 idx = addr >> (6 + sck_way);
957 idx = idx % ch_way;
958
959 /*
960 * FIXME: Shouldn't we use CHN_IDX_OFFSET() here, when ch_way == 3 ???
961 */
962 switch (idx) {
963 case 0:
964 base_ch = TAD_TGT0(reg);
965 break;
966 case 1:
967 base_ch = TAD_TGT1(reg);
968 break;
969 case 2:
970 base_ch = TAD_TGT2(reg);
971 break;
972 case 3:
973 base_ch = TAD_TGT3(reg);
974 break;
975 default:
976 sprintf(msg, "Can't discover the TAD target");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200977 return -EINVAL;
978 }
979 *channel_mask = 1 << base_ch;
980
981 if (pvt->is_mirrored) {
982 *channel_mask |= 1 << ((base_ch + 2) % 4);
983 switch(ch_way) {
984 case 2:
985 case 4:
986 sck_xch = 1 << sck_way * (ch_way >> 1);
987 break;
988 default:
989 sprintf(msg, "Invalid mirror set. Can't decode addr");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200990 return -EINVAL;
991 }
992 } else
993 sck_xch = (1 << sck_way) * ch_way;
994
995 if (pvt->is_lockstep)
996 *channel_mask |= 1 << ((base_ch + 1) % 4);
997
998 offset = TAD_OFFSET(tad_offset);
999
Joe Perches956b9ba12012-04-29 17:08:39 -03001000 edac_dbg(0, "TAD#%d: address 0x%016Lx < 0x%016Lx, socket interleave %d, channel interleave %d (offset 0x%08Lx), index %d, base ch: %d, ch mask: 0x%02lx\n",
1001 n_tads,
1002 addr,
1003 limit,
1004 (u32)TAD_SOCK(reg),
1005 ch_way,
1006 offset,
1007 idx,
1008 base_ch,
1009 *channel_mask);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001010
1011 /* Calculate channel address */
1012 /* Remove the TAD offset */
1013
1014 if (offset > addr) {
1015 sprintf(msg, "Can't calculate ch addr: TAD offset 0x%08Lx is too high for addr 0x%08Lx!",
1016 offset, addr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001017 return -EINVAL;
1018 }
1019 addr -= offset;
1020 /* Store the low bits [0:6] of the addr */
1021 ch_addr = addr & 0x7f;
1022 /* Remove socket wayness and remove 6 bits */
1023 addr >>= 6;
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -03001024 addr = div_u64(addr, sck_xch);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001025#if 0
1026 /* Divide by channel way */
1027 addr = addr / ch_way;
1028#endif
1029 /* Recover the last 6 bits */
1030 ch_addr |= addr << 6;
1031
1032 /*
1033 * Step 3) Decode rank
1034 */
1035 for (n_rir = 0; n_rir < MAX_RIR_RANGES; n_rir++) {
1036 pci_read_config_dword(pvt->pci_tad[base_ch],
1037 rir_way_limit[n_rir],
1038 &reg);
1039
1040 if (!IS_RIR_VALID(reg))
1041 continue;
1042
1043 limit = RIR_LIMIT(reg);
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -03001044 mb = div_u64_rem(limit >> 20, 1000, &kb);
Joe Perches956b9ba12012-04-29 17:08:39 -03001045 edac_dbg(0, "RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d\n",
1046 n_rir,
1047 mb, kb,
1048 limit,
1049 1 << RIR_WAY(reg));
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001050 if (ch_addr <= limit)
1051 break;
1052 }
1053 if (n_rir == MAX_RIR_RANGES) {
1054 sprintf(msg, "Can't discover the memory rank for ch addr 0x%08Lx",
1055 ch_addr);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001056 return -EINVAL;
1057 }
1058 rir_way = RIR_WAY(reg);
1059 if (pvt->is_close_pg)
1060 idx = (ch_addr >> 6);
1061 else
1062 idx = (ch_addr >> 13); /* FIXME: Datasheet says to shift by 15 */
1063 idx %= 1 << rir_way;
1064
1065 pci_read_config_dword(pvt->pci_tad[base_ch],
1066 rir_offset[n_rir][idx],
1067 &reg);
1068 *rank = RIR_RNK_TGT(reg);
1069
Joe Perches956b9ba12012-04-29 17:08:39 -03001070 edac_dbg(0, "RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n",
1071 n_rir,
1072 ch_addr,
1073 limit,
1074 rir_way,
1075 idx);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001076
1077 return 0;
1078}
1079
1080/****************************************************************************
1081 Device initialization routines: put/get, init/exit
1082 ****************************************************************************/
1083
1084/*
1085 * sbridge_put_all_devices 'put' all the devices that we have
1086 * reserved via 'get'
1087 */
1088static void sbridge_put_devices(struct sbridge_dev *sbridge_dev)
1089{
1090 int i;
1091
Joe Perches956b9ba12012-04-29 17:08:39 -03001092 edac_dbg(0, "\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001093 for (i = 0; i < sbridge_dev->n_devs; i++) {
1094 struct pci_dev *pdev = sbridge_dev->pdev[i];
1095 if (!pdev)
1096 continue;
Joe Perches956b9ba12012-04-29 17:08:39 -03001097 edac_dbg(0, "Removing dev %02x:%02x.%d\n",
1098 pdev->bus->number,
1099 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001100 pci_dev_put(pdev);
1101 }
1102}
1103
1104static void sbridge_put_all_devices(void)
1105{
1106 struct sbridge_dev *sbridge_dev, *tmp;
1107
1108 list_for_each_entry_safe(sbridge_dev, tmp, &sbridge_edac_list, list) {
1109 sbridge_put_devices(sbridge_dev);
1110 free_sbridge_dev(sbridge_dev);
1111 }
1112}
1113
1114/*
1115 * sbridge_get_all_devices Find and perform 'get' operation on the MCH's
1116 * device/functions we want to reference for this driver
1117 *
1118 * Need to 'get' device 16 func 1 and func 2
1119 */
1120static int sbridge_get_onedevice(struct pci_dev **prev,
1121 u8 *num_mc,
1122 const struct pci_id_table *table,
1123 const unsigned devno)
1124{
1125 struct sbridge_dev *sbridge_dev;
1126 const struct pci_id_descr *dev_descr = &table->descr[devno];
1127
1128 struct pci_dev *pdev = NULL;
1129 u8 bus = 0;
1130
1131 sbridge_printk(KERN_INFO,
1132 "Seeking for: dev %02x.%d PCI ID %04x:%04x\n",
1133 dev_descr->dev, dev_descr->func,
1134 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1135
1136 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1137 dev_descr->dev_id, *prev);
1138
1139 if (!pdev) {
1140 if (*prev) {
1141 *prev = pdev;
1142 return 0;
1143 }
1144
1145 if (dev_descr->optional)
1146 return 0;
1147
1148 if (devno == 0)
1149 return -ENODEV;
1150
1151 sbridge_printk(KERN_INFO,
1152 "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1153 dev_descr->dev, dev_descr->func,
1154 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1155
1156 /* End of list, leave */
1157 return -ENODEV;
1158 }
1159 bus = pdev->bus->number;
1160
1161 sbridge_dev = get_sbridge_dev(bus);
1162 if (!sbridge_dev) {
1163 sbridge_dev = alloc_sbridge_dev(bus, table);
1164 if (!sbridge_dev) {
1165 pci_dev_put(pdev);
1166 return -ENOMEM;
1167 }
1168 (*num_mc)++;
1169 }
1170
1171 if (sbridge_dev->pdev[devno]) {
1172 sbridge_printk(KERN_ERR,
1173 "Duplicated device for "
1174 "dev %02x:%d.%d PCI ID %04x:%04x\n",
1175 bus, dev_descr->dev, dev_descr->func,
1176 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1177 pci_dev_put(pdev);
1178 return -ENODEV;
1179 }
1180
1181 sbridge_dev->pdev[devno] = pdev;
1182
1183 /* Sanity check */
1184 if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1185 PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1186 sbridge_printk(KERN_ERR,
1187 "Device PCI ID %04x:%04x "
1188 "has dev %02x:%d.%d instead of dev %02x:%02x.%d\n",
1189 PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1190 bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1191 bus, dev_descr->dev, dev_descr->func);
1192 return -ENODEV;
1193 }
1194
1195 /* Be sure that the device is enabled */
1196 if (unlikely(pci_enable_device(pdev) < 0)) {
1197 sbridge_printk(KERN_ERR,
1198 "Couldn't enable "
1199 "dev %02x:%d.%d PCI ID %04x:%04x\n",
1200 bus, dev_descr->dev, dev_descr->func,
1201 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1202 return -ENODEV;
1203 }
1204
Joe Perches956b9ba12012-04-29 17:08:39 -03001205 edac_dbg(0, "Detected dev %02x:%d.%d PCI ID %04x:%04x\n",
1206 bus, dev_descr->dev, dev_descr->func,
1207 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001208
1209 /*
1210 * As stated on drivers/pci/search.c, the reference count for
1211 * @from is always decremented if it is not %NULL. So, as we need
1212 * to get all devices up to null, we need to do a get for the device
1213 */
1214 pci_dev_get(pdev);
1215
1216 *prev = pdev;
1217
1218 return 0;
1219}
1220
1221static int sbridge_get_all_devices(u8 *num_mc)
1222{
1223 int i, rc;
1224 struct pci_dev *pdev = NULL;
1225 const struct pci_id_table *table = pci_dev_descr_sbridge_table;
1226
1227 while (table && table->descr) {
1228 for (i = 0; i < table->n_devs; i++) {
1229 pdev = NULL;
1230 do {
1231 rc = sbridge_get_onedevice(&pdev, num_mc,
1232 table, i);
1233 if (rc < 0) {
1234 if (i == 0) {
1235 i = table->n_devs;
1236 break;
1237 }
1238 sbridge_put_all_devices();
1239 return -ENODEV;
1240 }
1241 } while (pdev);
1242 }
1243 table++;
1244 }
1245
1246 return 0;
1247}
1248
1249static int mci_bind_devs(struct mem_ctl_info *mci,
1250 struct sbridge_dev *sbridge_dev)
1251{
1252 struct sbridge_pvt *pvt = mci->pvt_info;
1253 struct pci_dev *pdev;
1254 int i, func, slot;
1255
1256 for (i = 0; i < sbridge_dev->n_devs; i++) {
1257 pdev = sbridge_dev->pdev[i];
1258 if (!pdev)
1259 continue;
1260 slot = PCI_SLOT(pdev->devfn);
1261 func = PCI_FUNC(pdev->devfn);
1262 switch (slot) {
1263 case 12:
1264 switch (func) {
1265 case 6:
1266 pvt->pci_sad0 = pdev;
1267 break;
1268 case 7:
1269 pvt->pci_sad1 = pdev;
1270 break;
1271 default:
1272 goto error;
1273 }
1274 break;
1275 case 13:
1276 switch (func) {
1277 case 6:
Aristeu Rozanski5f8a1b82013-10-30 13:26:58 -03001278 pvt->pci_br0 = pdev;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001279 break;
1280 default:
1281 goto error;
1282 }
1283 break;
1284 case 14:
1285 switch (func) {
1286 case 0:
1287 pvt->pci_ha0 = pdev;
1288 break;
1289 default:
1290 goto error;
1291 }
1292 break;
1293 case 15:
1294 switch (func) {
1295 case 0:
1296 pvt->pci_ta = pdev;
1297 break;
1298 case 1:
1299 pvt->pci_ras = pdev;
1300 break;
1301 case 2:
1302 case 3:
1303 case 4:
1304 case 5:
1305 pvt->pci_tad[func - 2] = pdev;
1306 break;
1307 default:
1308 goto error;
1309 }
1310 break;
1311 case 17:
1312 switch (func) {
1313 case 0:
1314 pvt->pci_ddrio = pdev;
1315 break;
1316 default:
1317 goto error;
1318 }
1319 break;
1320 default:
1321 goto error;
1322 }
1323
Joe Perches956b9ba12012-04-29 17:08:39 -03001324 edac_dbg(0, "Associated PCI %02x.%02d.%d with dev = %p\n",
1325 sbridge_dev->bus,
1326 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1327 pdev);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001328 }
1329
1330 /* Check if everything were registered */
1331 if (!pvt->pci_sad0 || !pvt->pci_sad1 || !pvt->pci_ha0 ||
Luck, Tonyde4772c2013-03-28 09:59:15 -07001332 !pvt-> pci_tad || !pvt->pci_ras || !pvt->pci_ta)
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001333 goto enodev;
1334
1335 for (i = 0; i < NUM_CHANNELS; i++) {
1336 if (!pvt->pci_tad[i])
1337 goto enodev;
1338 }
1339 return 0;
1340
1341enodev:
1342 sbridge_printk(KERN_ERR, "Some needed devices are missing\n");
1343 return -ENODEV;
1344
1345error:
1346 sbridge_printk(KERN_ERR, "Device %d, function %d "
1347 "is out of the expected range\n",
1348 slot, func);
1349 return -EINVAL;
1350}
1351
1352/****************************************************************************
1353 Error check routines
1354 ****************************************************************************/
1355
1356/*
1357 * While Sandy Bridge has error count registers, SMI BIOS read values from
1358 * and resets the counters. So, they are not reliable for the OS to read
1359 * from them. So, we have no option but to just trust on whatever MCE is
1360 * telling us about the errors.
1361 */
1362static void sbridge_mce_output_error(struct mem_ctl_info *mci,
1363 const struct mce *m)
1364{
1365 struct mem_ctl_info *new_mci;
1366 struct sbridge_pvt *pvt = mci->pvt_info;
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001367 enum hw_event_mc_err_type tp_event;
Mauro Carvalho Chehabe17a2f42a2012-05-11 11:41:45 -03001368 char *type, *optype, msg[256];
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001369 bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
1370 bool overflow = GET_BITFIELD(m->status, 62, 62);
1371 bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
1372 bool recoverable = GET_BITFIELD(m->status, 56, 56);
1373 u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52);
1374 u32 mscod = GET_BITFIELD(m->status, 16, 31);
1375 u32 errcode = GET_BITFIELD(m->status, 0, 15);
1376 u32 channel = GET_BITFIELD(m->status, 0, 3);
1377 u32 optypenum = GET_BITFIELD(m->status, 4, 6);
1378 long channel_mask, first_channel;
1379 u8 rank, socket;
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001380 int rc, dimm;
Mauro Carvalho Chehabe17a2f42a2012-05-11 11:41:45 -03001381 char *area_type = NULL;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001382
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001383 if (uncorrected_error) {
1384 if (ripv) {
1385 type = "FATAL";
1386 tp_event = HW_EVENT_ERR_FATAL;
1387 } else {
1388 type = "NON_FATAL";
1389 tp_event = HW_EVENT_ERR_UNCORRECTED;
1390 }
1391 } else {
1392 type = "CORRECTED";
1393 tp_event = HW_EVENT_ERR_CORRECTED;
1394 }
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001395
1396 /*
David Mackey15ed1032012-04-17 11:30:52 -07001397 * According with Table 15-9 of the Intel Architecture spec vol 3A,
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001398 * memory errors should fit in this mask:
1399 * 000f 0000 1mmm cccc (binary)
1400 * where:
1401 * f = Correction Report Filtering Bit. If 1, subsequent errors
1402 * won't be shown
1403 * mmm = error type
1404 * cccc = channel
1405 * If the mask doesn't match, report an error to the parsing logic
1406 */
1407 if (! ((errcode & 0xef80) == 0x80)) {
1408 optype = "Can't parse: it is not a mem";
1409 } else {
1410 switch (optypenum) {
1411 case 0:
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001412 optype = "generic undef request error";
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001413 break;
1414 case 1:
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001415 optype = "memory read error";
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001416 break;
1417 case 2:
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001418 optype = "memory write error";
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001419 break;
1420 case 3:
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001421 optype = "addr/cmd error";
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001422 break;
1423 case 4:
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001424 optype = "memory scrubbing error";
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001425 break;
1426 default:
1427 optype = "reserved";
1428 break;
1429 }
1430 }
1431
1432 rc = get_memory_error_data(mci, m->addr, &socket,
Mauro Carvalho Chehabe17a2f42a2012-05-11 11:41:45 -03001433 &channel_mask, &rank, &area_type, msg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001434 if (rc < 0)
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001435 goto err_parsing;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001436 new_mci = get_mci_for_node_id(socket);
1437 if (!new_mci) {
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001438 strcpy(msg, "Error: socket got corrupted!");
1439 goto err_parsing;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001440 }
1441 mci = new_mci;
1442 pvt = mci->pvt_info;
1443
1444 first_channel = find_first_bit(&channel_mask, NUM_CHANNELS);
1445
1446 if (rank < 4)
1447 dimm = 0;
1448 else if (rank < 8)
1449 dimm = 1;
1450 else
1451 dimm = 2;
1452
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001453
1454 /*
Mauro Carvalho Chehabe17a2f42a2012-05-11 11:41:45 -03001455 * FIXME: On some memory configurations (mirror, lockstep), the
1456 * Memory Controller can't point the error to a single DIMM. The
1457 * EDAC core should be handling the channel mask, in order to point
1458 * to the group of dimm's where the error may be happening.
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001459 */
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001460 snprintf(msg, sizeof(msg),
Mauro Carvalho Chehabc1053832012-06-04 13:40:05 -03001461 "%s%s area:%s err_code:%04x:%04x socket:%d channel_mask:%ld rank:%d",
Mauro Carvalho Chehabe17a2f42a2012-05-11 11:41:45 -03001462 overflow ? " OVERFLOW" : "",
1463 (uncorrected_error && recoverable) ? " recoverable" : "",
1464 area_type,
1465 mscod, errcode,
1466 socket,
1467 channel_mask,
1468 rank);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001469
Joe Perches956b9ba12012-04-29 17:08:39 -03001470 edac_dbg(0, "%s\n", msg);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001471
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001472 /* FIXME: need support for channel mask */
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001473
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001474 /* Call the helper to output message */
Mauro Carvalho Chehabc1053832012-06-04 13:40:05 -03001475 edac_mc_handle_error(tp_event, mci, core_err_cnt,
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001476 m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
1477 channel, dimm, -1,
Mauro Carvalho Chehab03f7eae2012-06-04 11:29:25 -03001478 optype, msg);
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001479 return;
1480err_parsing:
Mauro Carvalho Chehabc1053832012-06-04 13:40:05 -03001481 edac_mc_handle_error(tp_event, mci, core_err_cnt, 0, 0, 0,
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001482 -1, -1, -1,
Mauro Carvalho Chehab03f7eae2012-06-04 11:29:25 -03001483 msg, "");
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001484
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001485}
1486
1487/*
1488 * sbridge_check_error Retrieve and process errors reported by the
1489 * hardware. Called by the Core module.
1490 */
1491static void sbridge_check_error(struct mem_ctl_info *mci)
1492{
1493 struct sbridge_pvt *pvt = mci->pvt_info;
1494 int i;
1495 unsigned count = 0;
1496 struct mce *m;
1497
1498 /*
1499 * MCE first step: Copy all mce errors into a temporary buffer
1500 * We use a double buffering here, to reduce the risk of
1501 * loosing an error.
1502 */
1503 smp_rmb();
1504 count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1505 % MCE_LOG_LEN;
1506 if (!count)
1507 return;
1508
1509 m = pvt->mce_outentry;
1510 if (pvt->mce_in + count > MCE_LOG_LEN) {
1511 unsigned l = MCE_LOG_LEN - pvt->mce_in;
1512
1513 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1514 smp_wmb();
1515 pvt->mce_in = 0;
1516 count -= l;
1517 m += l;
1518 }
1519 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1520 smp_wmb();
1521 pvt->mce_in += count;
1522
1523 smp_rmb();
1524 if (pvt->mce_overrun) {
1525 sbridge_printk(KERN_ERR, "Lost %d memory errors\n",
1526 pvt->mce_overrun);
1527 smp_wmb();
1528 pvt->mce_overrun = 0;
1529 }
1530
1531 /*
1532 * MCE second step: parse errors and display
1533 */
1534 for (i = 0; i < count; i++)
1535 sbridge_mce_output_error(mci, &pvt->mce_outentry[i]);
1536}
1537
1538/*
1539 * sbridge_mce_check_error Replicates mcelog routine to get errors
1540 * This routine simply queues mcelog errors, and
1541 * return. The error itself should be handled later
1542 * by sbridge_check_error.
1543 * WARNING: As this routine should be called at NMI time, extra care should
1544 * be taken to avoid deadlocks, and to be as fast as possible.
1545 */
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001546static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
1547 void *data)
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001548{
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001549 struct mce *mce = (struct mce *)data;
1550 struct mem_ctl_info *mci;
1551 struct sbridge_pvt *pvt;
1552
1553 mci = get_mci_for_node_id(mce->socketid);
1554 if (!mci)
1555 return NOTIFY_BAD;
1556 pvt = mci->pvt_info;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001557
1558 /*
1559 * Just let mcelog handle it if the error is
1560 * outside the memory controller. A memory error
1561 * is indicated by bit 7 = 1 and bits = 8-11,13-15 = 0.
1562 * bit 12 has an special meaning.
1563 */
1564 if ((mce->status & 0xefff) >> 7 != 1)
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001565 return NOTIFY_DONE;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001566
1567 printk("sbridge: HANDLING MCE MEMORY ERROR\n");
1568
1569 printk("CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n",
1570 mce->extcpu, mce->mcgstatus, mce->bank, mce->status);
1571 printk("TSC %llx ", mce->tsc);
1572 printk("ADDR %llx ", mce->addr);
1573 printk("MISC %llx ", mce->misc);
1574
1575 printk("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
1576 mce->cpuvendor, mce->cpuid, mce->time,
1577 mce->socketid, mce->apicid);
1578
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001579 /* Only handle if it is the right mc controller */
1580 if (cpu_data(mce->cpu).phys_proc_id != pvt->sbridge_dev->mc)
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001581 return NOTIFY_DONE;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001582
1583 smp_rmb();
1584 if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1585 smp_wmb();
1586 pvt->mce_overrun++;
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001587 return NOTIFY_DONE;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001588 }
1589
1590 /* Copy memory error at the ringbuffer */
1591 memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1592 smp_wmb();
1593 pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1594
1595 /* Handle fatal errors immediately */
1596 if (mce->mcgstatus & 1)
1597 sbridge_check_error(mci);
1598
1599 /* Advice mcelog that the error were handled */
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001600 return NOTIFY_STOP;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001601}
1602
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001603static struct notifier_block sbridge_mce_dec = {
1604 .notifier_call = sbridge_mce_check_error,
1605};
1606
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001607/****************************************************************************
1608 EDAC register/unregister logic
1609 ****************************************************************************/
1610
1611static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev)
1612{
1613 struct mem_ctl_info *mci = sbridge_dev->mci;
1614 struct sbridge_pvt *pvt;
1615
1616 if (unlikely(!mci || !mci->pvt_info)) {
Joe Perches956b9ba12012-04-29 17:08:39 -03001617 edac_dbg(0, "MC: dev = %p\n", &sbridge_dev->pdev[0]->dev);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001618
1619 sbridge_printk(KERN_ERR, "Couldn't find mci handler\n");
1620 return;
1621 }
1622
1623 pvt = mci->pvt_info;
1624
Joe Perches956b9ba12012-04-29 17:08:39 -03001625 edac_dbg(0, "MC: mci = %p, dev = %p\n",
1626 mci, &sbridge_dev->pdev[0]->dev);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001627
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001628 /* Remove MC sysfs nodes */
Mauro Carvalho Chehabfd687502012-03-16 07:44:18 -03001629 edac_mc_del_mc(mci->pdev);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001630
Joe Perches956b9ba12012-04-29 17:08:39 -03001631 edac_dbg(1, "%s: free mci struct\n", mci->ctl_name);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001632 kfree(mci->ctl_name);
1633 edac_mc_free(mci);
1634 sbridge_dev->mci = NULL;
1635}
1636
1637static int sbridge_register_mci(struct sbridge_dev *sbridge_dev)
1638{
1639 struct mem_ctl_info *mci;
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001640 struct edac_mc_layer layers[2];
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001641 struct sbridge_pvt *pvt;
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001642 int rc;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001643
1644 /* Check the number of active and not disabled channels */
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001645 rc = check_if_ecc_is_active(sbridge_dev->bus);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001646 if (unlikely(rc < 0))
1647 return rc;
1648
1649 /* allocate a new MC control structure */
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001650 layers[0].type = EDAC_MC_LAYER_CHANNEL;
1651 layers[0].size = NUM_CHANNELS;
1652 layers[0].is_virt_csrow = false;
1653 layers[1].type = EDAC_MC_LAYER_SLOT;
1654 layers[1].size = MAX_DIMMS;
1655 layers[1].is_virt_csrow = true;
Mauro Carvalho Chehabca0907b2012-05-02 14:37:00 -03001656 mci = edac_mc_alloc(sbridge_dev->mc, ARRAY_SIZE(layers), layers,
Mauro Carvalho Chehabc36e3e72012-04-16 15:12:22 -03001657 sizeof(*pvt));
1658
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001659 if (unlikely(!mci))
1660 return -ENOMEM;
1661
Joe Perches956b9ba12012-04-29 17:08:39 -03001662 edac_dbg(0, "MC: mci = %p, dev = %p\n",
1663 mci, &sbridge_dev->pdev[0]->dev);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001664
1665 pvt = mci->pvt_info;
1666 memset(pvt, 0, sizeof(*pvt));
1667
1668 /* Associate sbridge_dev and mci for future usage */
1669 pvt->sbridge_dev = sbridge_dev;
1670 sbridge_dev->mci = mci;
1671
1672 mci->mtype_cap = MEM_FLAG_DDR3;
1673 mci->edac_ctl_cap = EDAC_FLAG_NONE;
1674 mci->edac_cap = EDAC_FLAG_NONE;
1675 mci->mod_name = "sbridge_edac.c";
1676 mci->mod_ver = SBRIDGE_REVISION;
1677 mci->ctl_name = kasprintf(GFP_KERNEL, "Sandy Bridge Socket#%d", mci->mc_idx);
1678 mci->dev_name = pci_name(sbridge_dev->pdev[0]);
1679 mci->ctl_page_to_phys = NULL;
Aristeu Rozanskifb79a502013-10-30 13:26:57 -03001680 pvt->info.get_tolm = sbridge_get_tolm;
Aristeu Rozanski8fd6a432013-10-30 13:26:59 -03001681 pvt->info.get_tohm = sbridge_get_tohm;
Aristeu Rozanski464f1d82013-10-30 13:27:00 -03001682 pvt->info.dram_rule = sbridge_dram_rule;
1683 pvt->info.max_sad = ARRAY_SIZE(sbridge_dram_rule);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001684
1685 /* Set the function pointer to an actual operation function */
1686 mci->edac_check = sbridge_check_error;
1687
1688 /* Store pci devices at mci for faster access */
1689 rc = mci_bind_devs(mci, sbridge_dev);
1690 if (unlikely(rc < 0))
1691 goto fail0;
1692
1693 /* Get dimm basic config and the memory layout */
1694 get_dimm_config(mci);
1695 get_memory_layout(mci);
1696
1697 /* record ptr to the generic device */
Mauro Carvalho Chehabfd687502012-03-16 07:44:18 -03001698 mci->pdev = &sbridge_dev->pdev[0]->dev;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001699
1700 /* add this new MC control structure to EDAC's list of MCs */
1701 if (unlikely(edac_mc_add_mc(mci))) {
Joe Perches956b9ba12012-04-29 17:08:39 -03001702 edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001703 rc = -EINVAL;
1704 goto fail0;
1705 }
1706
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001707 return 0;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001708
1709fail0:
1710 kfree(mci->ctl_name);
1711 edac_mc_free(mci);
1712 sbridge_dev->mci = NULL;
1713 return rc;
1714}
1715
1716/*
1717 * sbridge_probe Probe for ONE instance of device to see if it is
1718 * present.
1719 * return:
1720 * 0 for FOUND a device
1721 * < 0 for error code
1722 */
1723
Greg Kroah-Hartman9b3c6e82012-12-21 13:23:51 -08001724static int sbridge_probe(struct pci_dev *pdev, const struct pci_device_id *id)
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001725{
1726 int rc;
1727 u8 mc, num_mc = 0;
1728 struct sbridge_dev *sbridge_dev;
1729
1730 /* get the pci devices we want to reserve for our use */
1731 mutex_lock(&sbridge_edac_lock);
1732
1733 /*
1734 * All memory controllers are allocated at the first pass.
1735 */
1736 if (unlikely(probed >= 1)) {
1737 mutex_unlock(&sbridge_edac_lock);
1738 return -ENODEV;
1739 }
1740 probed++;
1741
1742 rc = sbridge_get_all_devices(&num_mc);
1743 if (unlikely(rc < 0))
1744 goto fail0;
1745 mc = 0;
1746
1747 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
Joe Perches956b9ba12012-04-29 17:08:39 -03001748 edac_dbg(0, "Registering MC#%d (%d of %d)\n",
1749 mc, mc + 1, num_mc);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001750 sbridge_dev->mc = mc++;
1751 rc = sbridge_register_mci(sbridge_dev);
1752 if (unlikely(rc < 0))
1753 goto fail1;
1754 }
1755
1756 sbridge_printk(KERN_INFO, "Driver loaded.\n");
1757
1758 mutex_unlock(&sbridge_edac_lock);
1759 return 0;
1760
1761fail1:
1762 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list)
1763 sbridge_unregister_mci(sbridge_dev);
1764
1765 sbridge_put_all_devices();
1766fail0:
1767 mutex_unlock(&sbridge_edac_lock);
1768 return rc;
1769}
1770
1771/*
1772 * sbridge_remove destructor for one instance of device
1773 *
1774 */
Greg Kroah-Hartman9b3c6e82012-12-21 13:23:51 -08001775static void sbridge_remove(struct pci_dev *pdev)
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001776{
1777 struct sbridge_dev *sbridge_dev;
1778
Joe Perches956b9ba12012-04-29 17:08:39 -03001779 edac_dbg(0, "\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001780
1781 /*
1782 * we have a trouble here: pdev value for removal will be wrong, since
1783 * it will point to the X58 register used to detect that the machine
1784 * is a Nehalem or upper design. However, due to the way several PCI
1785 * devices are grouped together to provide MC functionality, we need
1786 * to use a different method for releasing the devices
1787 */
1788
1789 mutex_lock(&sbridge_edac_lock);
1790
1791 if (unlikely(!probed)) {
1792 mutex_unlock(&sbridge_edac_lock);
1793 return;
1794 }
1795
1796 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list)
1797 sbridge_unregister_mci(sbridge_dev);
1798
1799 /* Release PCI resources */
1800 sbridge_put_all_devices();
1801
1802 probed--;
1803
1804 mutex_unlock(&sbridge_edac_lock);
1805}
1806
1807MODULE_DEVICE_TABLE(pci, sbridge_pci_tbl);
1808
1809/*
1810 * sbridge_driver pci_driver structure for this module
1811 *
1812 */
1813static struct pci_driver sbridge_driver = {
1814 .name = "sbridge_edac",
1815 .probe = sbridge_probe,
Greg Kroah-Hartman9b3c6e82012-12-21 13:23:51 -08001816 .remove = sbridge_remove,
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001817 .id_table = sbridge_pci_tbl,
1818};
1819
1820/*
1821 * sbridge_init Module entry function
1822 * Try to initialize this module for its devices
1823 */
1824static int __init sbridge_init(void)
1825{
1826 int pci_rc;
1827
Joe Perches956b9ba12012-04-29 17:08:39 -03001828 edac_dbg(2, "\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001829
1830 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
1831 opstate_init();
1832
1833 pci_rc = pci_register_driver(&sbridge_driver);
1834
Chen Gonge35fca42012-05-08 20:40:12 -03001835 if (pci_rc >= 0) {
1836 mce_register_decode_chain(&sbridge_mce_dec);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001837 return 0;
Chen Gonge35fca42012-05-08 20:40:12 -03001838 }
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001839
1840 sbridge_printk(KERN_ERR, "Failed to register device with error %d.\n",
1841 pci_rc);
1842
1843 return pci_rc;
1844}
1845
1846/*
1847 * sbridge_exit() Module exit function
1848 * Unregister the driver
1849 */
1850static void __exit sbridge_exit(void)
1851{
Joe Perches956b9ba12012-04-29 17:08:39 -03001852 edac_dbg(2, "\n");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001853 pci_unregister_driver(&sbridge_driver);
Chen Gonge35fca42012-05-08 20:40:12 -03001854 mce_unregister_decode_chain(&sbridge_mce_dec);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001855}
1856
1857module_init(sbridge_init);
1858module_exit(sbridge_exit);
1859
1860module_param(edac_op_state, int, 0444);
1861MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
1862
1863MODULE_LICENSE("GPL");
1864MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
1865MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
1866MODULE_DESCRIPTION("MC Driver for Intel Sandy Bridge memory controllers - "
1867 SBRIDGE_REVISION);