blob: 91ca4101c7890a595093fce644f0cda10ac5add3 [file] [log] [blame]
Alan Coxda9bb1d2006-01-18 17:44:13 -08001/*
2 * edac_mc kernel module
Doug Thompson49c0dab72006-07-10 04:45:19 -07003 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
Alan Coxda9bb1d2006-01-18 17:44:13 -08004 * This file may be distributed under the terms of the
5 * GNU General Public License.
6 *
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 * http://www.anime.net/~goemon/linux-ecc/
10 *
11 * Modified by Dave Peterson and Doug Thompson
12 *
13 */
14
Alan Coxda9bb1d2006-01-18 17:44:13 -080015#include <linux/module.h>
16#include <linux/proc_fs.h>
17#include <linux/kernel.h>
18#include <linux/types.h>
19#include <linux/smp.h>
20#include <linux/init.h>
21#include <linux/sysctl.h>
22#include <linux/highmem.h>
23#include <linux/timer.h>
24#include <linux/slab.h>
25#include <linux/jiffies.h>
26#include <linux/spinlock.h>
27#include <linux/list.h>
Alan Coxda9bb1d2006-01-18 17:44:13 -080028#include <linux/ctype.h>
Dave Jiangc0d12172007-07-19 01:49:46 -070029#include <linux/edac.h>
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -030030#include <linux/bitops.h>
Alan Coxda9bb1d2006-01-18 17:44:13 -080031#include <asm/uaccess.h>
32#include <asm/page.h>
33#include <asm/edac.h>
Douglas Thompson20bcb7a2007-07-19 01:49:47 -070034#include "edac_core.h"
Douglas Thompson7c9281d2007-07-19 01:49:33 -070035#include "edac_module.h"
Alan Coxda9bb1d2006-01-18 17:44:13 -080036
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -030037#define CREATE_TRACE_POINTS
38#define TRACE_INCLUDE_PATH ../../include/ras
39#include <ras/ras_event.h>
40
Alan Coxda9bb1d2006-01-18 17:44:13 -080041/* lock to memory controller's control array */
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -070042static DEFINE_MUTEX(mem_ctls_mutex);
Robert P. J. Dayff6ac2a2008-04-29 01:03:17 -070043static LIST_HEAD(mc_devices);
Alan Coxda9bb1d2006-01-18 17:44:13 -080044
Mauro Carvalho Chehab6e84d352012-04-30 10:24:43 -030045unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
46 unsigned len)
47{
48 struct mem_ctl_info *mci = dimm->mci;
49 int i, n, count = 0;
50 char *p = buf;
51
52 for (i = 0; i < mci->n_layers; i++) {
53 n = snprintf(p, len, "%s %d ",
54 edac_layer_name[mci->layers[i].type],
55 dimm->location[i]);
56 p += n;
57 len -= n;
58 count += n;
59 if (!len)
60 break;
61 }
62
63 return count;
64}
65
Alan Coxda9bb1d2006-01-18 17:44:13 -080066#ifdef CONFIG_EDAC_DEBUG
67
Mauro Carvalho Chehaba4b4be32012-01-27 10:26:13 -030068static void edac_mc_dump_channel(struct rank_info *chan)
Alan Coxda9bb1d2006-01-18 17:44:13 -080069{
Mauro Carvalho Chehab6e84d352012-04-30 10:24:43 -030070 edac_dbg(4, " channel->chan_idx = %d\n", chan->chan_idx);
71 edac_dbg(4, " channel = %p\n", chan);
72 edac_dbg(4, " channel->csrow = %p\n", chan->csrow);
73 edac_dbg(4, " channel->dimm = %p\n", chan->dimm);
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -030074}
75
Mauro Carvalho Chehab6e84d352012-04-30 10:24:43 -030076static void edac_mc_dump_dimm(struct dimm_info *dimm, int number)
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -030077{
Mauro Carvalho Chehab6e84d352012-04-30 10:24:43 -030078 char location[80];
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -030079
Mauro Carvalho Chehab6e84d352012-04-30 10:24:43 -030080 edac_dimm_info_location(dimm, location, sizeof(location));
81
82 edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n",
83 dimm->mci->mem_is_per_rank ? "rank" : "dimm",
84 number, location, dimm->csrow, dimm->cschannel);
85 edac_dbg(4, " dimm = %p\n", dimm);
86 edac_dbg(4, " dimm->label = '%s'\n", dimm->label);
87 edac_dbg(4, " dimm->nr_pages = 0x%x\n", dimm->nr_pages);
88 edac_dbg(4, " dimm->grain = %d\n", dimm->grain);
89 edac_dbg(4, " dimm->nr_pages = 0x%x\n", dimm->nr_pages);
Alan Coxda9bb1d2006-01-18 17:44:13 -080090}
91
Adrian Bunk2da1c112007-07-19 01:49:32 -070092static void edac_mc_dump_csrow(struct csrow_info *csrow)
Alan Coxda9bb1d2006-01-18 17:44:13 -080093{
Mauro Carvalho Chehab6e84d352012-04-30 10:24:43 -030094 edac_dbg(4, "csrow->csrow_idx = %d\n", csrow->csrow_idx);
95 edac_dbg(4, " csrow = %p\n", csrow);
96 edac_dbg(4, " csrow->first_page = 0x%lx\n", csrow->first_page);
97 edac_dbg(4, " csrow->last_page = 0x%lx\n", csrow->last_page);
98 edac_dbg(4, " csrow->page_mask = 0x%lx\n", csrow->page_mask);
99 edac_dbg(4, " csrow->nr_channels = %d\n", csrow->nr_channels);
100 edac_dbg(4, " csrow->channels = %p\n", csrow->channels);
101 edac_dbg(4, " csrow->mci = %p\n", csrow->mci);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800102}
103
Adrian Bunk2da1c112007-07-19 01:49:32 -0700104static void edac_mc_dump_mci(struct mem_ctl_info *mci)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800105{
Joe Perches956b9ba12012-04-29 17:08:39 -0300106 edac_dbg(3, "\tmci = %p\n", mci);
107 edac_dbg(3, "\tmci->mtype_cap = %lx\n", mci->mtype_cap);
108 edac_dbg(3, "\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
109 edac_dbg(3, "\tmci->edac_cap = %lx\n", mci->edac_cap);
110 edac_dbg(4, "\tmci->edac_check = %p\n", mci->edac_check);
111 edac_dbg(3, "\tmci->nr_csrows = %d, csrows = %p\n",
112 mci->nr_csrows, mci->csrows);
113 edac_dbg(3, "\tmci->nr_dimms = %d, dimms = %p\n",
114 mci->tot_dimms, mci->dimms);
115 edac_dbg(3, "\tdev = %p\n", mci->pdev);
116 edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n",
117 mci->mod_name, mci->ctl_name);
118 edac_dbg(3, "\tpvt_info = %p\n\n", mci->pvt_info);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800119}
120
Borislav Petkov24f9a7f2010-10-07 18:29:15 +0200121#endif /* CONFIG_EDAC_DEBUG */
122
Borislav Petkov239642f2009-11-12 15:33:16 +0100123/*
124 * keep those in sync with the enum mem_type
125 */
126const char *edac_mem_types[] = {
127 "Empty csrow",
128 "Reserved csrow type",
129 "Unknown csrow type",
130 "Fast page mode RAM",
131 "Extended data out RAM",
132 "Burst Extended data out RAM",
133 "Single data rate SDRAM",
134 "Registered single data rate SDRAM",
135 "Double data rate SDRAM",
136 "Registered Double data rate SDRAM",
137 "Rambus DRAM",
138 "Unbuffered DDR2 RAM",
139 "Fully buffered DDR2",
140 "Registered DDR2 RAM",
141 "Rambus XDR",
142 "Unbuffered DDR3 RAM",
143 "Registered DDR3 RAM",
144};
145EXPORT_SYMBOL_GPL(edac_mem_types);
146
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300147/**
148 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
149 * @p: pointer to a pointer with the memory offset to be used. At
150 * return, this will be incremented to point to the next offset
151 * @size: Size of the data structure to be reserved
152 * @n_elems: Number of elements that should be reserved
Alan Coxda9bb1d2006-01-18 17:44:13 -0800153 *
154 * If 'size' is a constant, the compiler will optimize this whole function
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300155 * down to either a no-op or the addition of a constant to the value of '*p'.
156 *
157 * The 'p' pointer is absolutely needed to keep the proper advancing
158 * further in memory to the proper offsets when allocating the struct along
159 * with its embedded structs, as edac_device_alloc_ctl_info() does it
160 * above, for example.
161 *
162 * At return, the pointer 'p' will be incremented to be used on a next call
163 * to this function.
Alan Coxda9bb1d2006-01-18 17:44:13 -0800164 */
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300165void *edac_align_ptr(void **p, unsigned size, int n_elems)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800166{
167 unsigned align, r;
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300168 void *ptr = *p;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800169
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300170 *p += size * n_elems;
171
172 /*
173 * 'p' can possibly be an unaligned item X such that sizeof(X) is
174 * 'size'. Adjust 'p' so that its alignment is at least as
175 * stringent as what the compiler would provide for X and return
176 * the aligned result.
177 * Here we assume that the alignment of a "long long" is the most
Alan Coxda9bb1d2006-01-18 17:44:13 -0800178 * stringent alignment that the compiler will ever provide by default.
179 * As far as I know, this is a reasonable assumption.
180 */
181 if (size > sizeof(long))
182 align = sizeof(long long);
183 else if (size > sizeof(int))
184 align = sizeof(long);
185 else if (size > sizeof(short))
186 align = sizeof(int);
187 else if (size > sizeof(char))
188 align = sizeof(short);
189 else
Douglas Thompson079708b2007-07-19 01:49:58 -0700190 return (char *)ptr;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800191
192 r = size % align;
193
194 if (r == 0)
Douglas Thompson079708b2007-07-19 01:49:58 -0700195 return (char *)ptr;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800196
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300197 *p += align - r;
198
Douglas Thompson7391c6d2007-07-19 01:50:21 -0700199 return (void *)(((unsigned long)ptr) + align - r);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800200}
201
Alan Coxda9bb1d2006-01-18 17:44:13 -0800202/**
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300203 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
204 * @mc_num: Memory controller number
205 * @n_layers: Number of MC hierarchy layers
206 * layers: Describes each layer as seen by the Memory Controller
207 * @size_pvt: size of private storage needed
208 *
Alan Coxda9bb1d2006-01-18 17:44:13 -0800209 *
210 * Everything is kmalloc'ed as one big chunk - more efficient.
211 * Only can be used if all structures have the same lifetime - otherwise
212 * you have to allocate and initialize your own structures.
213 *
214 * Use edac_mc_free() to free mc structures allocated by this function.
215 *
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300216 * NOTE: drivers handle multi-rank memories in different ways: in some
217 * drivers, one multi-rank memory stick is mapped as one entry, while, in
218 * others, a single multi-rank memory stick would be mapped into several
219 * entries. Currently, this function will allocate multiple struct dimm_info
220 * on such scenarios, as grouping the multiple ranks require drivers change.
221 *
Alan Coxda9bb1d2006-01-18 17:44:13 -0800222 * Returns:
Mauro Carvalho Chehabca0907b2012-05-02 14:37:00 -0300223 * On failure: NULL
224 * On success: struct mem_ctl_info pointer
Alan Coxda9bb1d2006-01-18 17:44:13 -0800225 */
Mauro Carvalho Chehabca0907b2012-05-02 14:37:00 -0300226struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
227 unsigned n_layers,
228 struct edac_mc_layer *layers,
229 unsigned sz_pvt)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800230{
231 struct mem_ctl_info *mci;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300232 struct edac_mc_layer *layer;
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300233 struct csrow_info *csr;
234 struct rank_info *chan;
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300235 struct dimm_info *dimm;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300236 u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
237 unsigned pos[EDAC_MAX_LAYERS];
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300238 unsigned size, tot_dimms = 1, count = 1;
239 unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
Mauro Carvalho Chehab5926ff52012-02-09 11:05:20 -0300240 void *pvt, *p, *ptr = NULL;
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300241 int i, j, row, chn, n, len, off;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300242 bool per_rank = false;
243
244 BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
245 /*
246 * Calculate the total amount of dimms and csrows/cschannels while
247 * in the old API emulation mode
248 */
249 for (i = 0; i < n_layers; i++) {
250 tot_dimms *= layers[i].size;
251 if (layers[i].is_virt_csrow)
252 tot_csrows *= layers[i].size;
253 else
254 tot_channels *= layers[i].size;
255
256 if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
257 per_rank = true;
258 }
Alan Coxda9bb1d2006-01-18 17:44:13 -0800259
260 /* Figure out the offsets of the various items from the start of an mc
261 * structure. We want the alignment of each item to be at least as
262 * stringent as what the compiler would provide if we could simply
263 * hardcode everything into a single struct.
264 */
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300265 mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300266 layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300267 for (i = 0; i < n_layers; i++) {
268 count *= layers[i].size;
Joe Perches956b9ba12012-04-29 17:08:39 -0300269 edac_dbg(4, "errcount layer %d size %d\n", i, count);
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300270 ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
271 ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
272 tot_errcount += 2 * count;
273 }
274
Joe Perches956b9ba12012-04-29 17:08:39 -0300275 edac_dbg(4, "allocating %d error counters\n", tot_errcount);
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300276 pvt = edac_align_ptr(&ptr, sz_pvt, 1);
Douglas Thompson079708b2007-07-19 01:49:58 -0700277 size = ((unsigned long)pvt) + sz_pvt;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800278
Joe Perches956b9ba12012-04-29 17:08:39 -0300279 edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
280 size,
281 tot_dimms,
282 per_rank ? "ranks" : "dimms",
283 tot_csrows * tot_channels);
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300284
Doug Thompson8096cfa2007-07-19 01:50:27 -0700285 mci = kzalloc(size, GFP_KERNEL);
286 if (mci == NULL)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800287 return NULL;
288
289 /* Adjust pointers so they point within the memory we just allocated
290 * rather than an imaginary chunk of memory located at address 0.
291 */
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300292 layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300293 for (i = 0; i < n_layers; i++) {
294 mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
295 mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
296 }
Douglas Thompson079708b2007-07-19 01:49:58 -0700297 pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800298
Doug Thompsonb8f6f972007-07-19 01:50:26 -0700299 /* setup index and various internal pointers */
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300300 mci->mc_idx = mc_num;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300301 mci->tot_dimms = tot_dimms;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800302 mci->pvt_info = pvt;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300303 mci->n_layers = n_layers;
304 mci->layers = layer;
305 memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
306 mci->nr_csrows = tot_csrows;
307 mci->num_cschannel = tot_channels;
308 mci->mem_is_per_rank = per_rank;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800309
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300310 /*
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300311 * Alocate and fill the csrow/channels structs
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300312 */
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300313 mci->csrows = kcalloc(sizeof(*mci->csrows), tot_csrows, GFP_KERNEL);
314 if (!mci->csrows)
315 goto error;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300316 for (row = 0; row < tot_csrows; row++) {
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300317 csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
318 if (!csr)
319 goto error;
320 mci->csrows[row] = csr;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300321 csr->csrow_idx = row;
322 csr->mci = mci;
323 csr->nr_channels = tot_channels;
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300324 csr->channels = kcalloc(sizeof(*csr->channels), tot_channels,
325 GFP_KERNEL);
326 if (!csr->channels)
327 goto error;
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300328
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300329 for (chn = 0; chn < tot_channels; chn++) {
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300330 chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
331 if (!chan)
332 goto error;
333 csr->channels[chn] = chan;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800334 chan->chan_idx = chn;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300335 chan->csrow = csr;
336 }
337 }
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300338
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300339 /*
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300340 * Allocate and fill the dimm structs
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300341 */
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300342 mci->dimms = kcalloc(sizeof(*mci->dimms), tot_dimms, GFP_KERNEL);
343 if (!mci->dimms)
344 goto error;
345
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300346 memset(&pos, 0, sizeof(pos));
347 row = 0;
348 chn = 0;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300349 for (i = 0; i < tot_dimms; i++) {
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300350 chan = mci->csrows[row]->channels[chn];
351 off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]);
352 if (off < 0 || off >= tot_dimms) {
353 edac_mc_printk(mci, KERN_ERR, "EDAC core bug: EDAC_DIMM_OFF is trying to do an illegal data access\n");
354 goto error;
355 }
356
357 dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
Dan Carpenter08a4a132012-05-18 15:51:02 +0300358 if (!dimm)
359 goto error;
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300360 mci->dimms[off] = dimm;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300361 dimm->mci = mci;
362
Mauro Carvalho Chehab5926ff52012-02-09 11:05:20 -0300363 /*
364 * Copy DIMM location and initialize it.
365 */
366 len = sizeof(dimm->label);
367 p = dimm->label;
368 n = snprintf(p, len, "mc#%u", mc_num);
369 p += n;
370 len -= n;
371 for (j = 0; j < n_layers; j++) {
372 n = snprintf(p, len, "%s#%u",
373 edac_layer_name[layers[j].type],
374 pos[j]);
375 p += n;
376 len -= n;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300377 dimm->location[j] = pos[j];
378
Mauro Carvalho Chehab5926ff52012-02-09 11:05:20 -0300379 if (len <= 0)
380 break;
381 }
382
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300383 /* Link it to the csrows old API data */
384 chan->dimm = dimm;
385 dimm->csrow = row;
386 dimm->cschannel = chn;
387
388 /* Increment csrow location */
389 row++;
390 if (row == tot_csrows) {
391 row = 0;
392 chn++;
393 }
394
395 /* Increment dimm location */
396 for (j = n_layers - 1; j >= 0; j--) {
397 pos[j]++;
398 if (pos[j] < layers[j].size)
399 break;
400 pos[j] = 0;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800401 }
402 }
403
Dave Jiang81d87cb2007-07-19 01:49:52 -0700404 mci->op_state = OP_ALLOC;
405
Doug Thompson8096cfa2007-07-19 01:50:27 -0700406 /* at this point, the root kobj is valid, and in order to
407 * 'free' the object, then the function:
408 * edac_mc_unregister_sysfs_main_kobj() must be called
409 * which will perform kobj unregistration and the actual free
410 * will occur during the kobject callback operation
411 */
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -0300412
Alan Coxda9bb1d2006-01-18 17:44:13 -0800413 return mci;
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300414
415error:
416 if (mci->dimms) {
417 for (i = 0; i < tot_dimms; i++)
418 kfree(mci->dimms[i]);
419 kfree(mci->dimms);
420 }
421 if (mci->csrows) {
422 for (chn = 0; chn < tot_channels; chn++) {
423 csr = mci->csrows[chn];
424 if (csr) {
425 for (chn = 0; chn < tot_channels; chn++)
426 kfree(csr->channels[chn]);
427 kfree(csr);
428 }
429 kfree(mci->csrows[i]);
430 }
431 kfree(mci->csrows);
432 }
433 kfree(mci);
434
435 return NULL;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800436}
Dave Peterson91105402006-03-26 01:38:55 -0800437EXPORT_SYMBOL_GPL(edac_mc_alloc);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800438
Alan Coxda9bb1d2006-01-18 17:44:13 -0800439/**
Doug Thompson8096cfa2007-07-19 01:50:27 -0700440 * edac_mc_free
441 * 'Free' a previously allocated 'mci' structure
Alan Coxda9bb1d2006-01-18 17:44:13 -0800442 * @mci: pointer to a struct mem_ctl_info structure
Alan Coxda9bb1d2006-01-18 17:44:13 -0800443 */
444void edac_mc_free(struct mem_ctl_info *mci)
445{
Joe Perches956b9ba12012-04-29 17:08:39 -0300446 edac_dbg(1, "\n");
Mauro Carvalho Chehabbbc560a2010-08-16 18:22:43 -0300447
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300448 /* the mci instance is freed here, when the sysfs object is dropped */
Mauro Carvalho Chehab7a623c02012-04-16 16:41:11 -0300449 edac_unregister_sysfs(mci);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800450}
Dave Peterson91105402006-03-26 01:38:55 -0800451EXPORT_SYMBOL_GPL(edac_mc_free);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800452
Doug Thompsonbce19682007-07-26 10:41:14 -0700453
Mauro Carvalho Chehab939747bd2010-08-10 11:22:01 -0300454/**
Doug Thompsonbce19682007-07-26 10:41:14 -0700455 * find_mci_by_dev
456 *
457 * scan list of controllers looking for the one that manages
458 * the 'dev' device
Mauro Carvalho Chehab939747bd2010-08-10 11:22:01 -0300459 * @dev: pointer to a struct device related with the MCI
Doug Thompsonbce19682007-07-26 10:41:14 -0700460 */
Mauro Carvalho Chehab939747bd2010-08-10 11:22:01 -0300461struct mem_ctl_info *find_mci_by_dev(struct device *dev)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800462{
463 struct mem_ctl_info *mci;
464 struct list_head *item;
465
Joe Perches956b9ba12012-04-29 17:08:39 -0300466 edac_dbg(3, "\n");
Alan Coxda9bb1d2006-01-18 17:44:13 -0800467
468 list_for_each(item, &mc_devices) {
469 mci = list_entry(item, struct mem_ctl_info, link);
470
Mauro Carvalho Chehabfd687502012-03-16 07:44:18 -0300471 if (mci->pdev == dev)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800472 return mci;
473 }
474
475 return NULL;
476}
Mauro Carvalho Chehab939747bd2010-08-10 11:22:01 -0300477EXPORT_SYMBOL_GPL(find_mci_by_dev);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800478
Dave Jiang81d87cb2007-07-19 01:49:52 -0700479/*
480 * handler for EDAC to check if NMI type handler has asserted interrupt
481 */
482static int edac_mc_assert_error_check_and_clear(void)
483{
Dave Jiang66ee2f92007-07-19 01:49:54 -0700484 int old_state;
Dave Jiang81d87cb2007-07-19 01:49:52 -0700485
Douglas Thompson079708b2007-07-19 01:49:58 -0700486 if (edac_op_state == EDAC_OPSTATE_POLL)
Dave Jiang81d87cb2007-07-19 01:49:52 -0700487 return 1;
488
Dave Jiang66ee2f92007-07-19 01:49:54 -0700489 old_state = edac_err_assert;
490 edac_err_assert = 0;
Dave Jiang81d87cb2007-07-19 01:49:52 -0700491
Dave Jiang66ee2f92007-07-19 01:49:54 -0700492 return old_state;
Dave Jiang81d87cb2007-07-19 01:49:52 -0700493}
494
495/*
496 * edac_mc_workq_function
497 * performs the operation scheduled by a workq request
498 */
Dave Jiang81d87cb2007-07-19 01:49:52 -0700499static void edac_mc_workq_function(struct work_struct *work_req)
500{
Jean Delvarefbeb4382009-04-13 14:40:21 -0700501 struct delayed_work *d_work = to_delayed_work(work_req);
Dave Jiang81d87cb2007-07-19 01:49:52 -0700502 struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
Dave Jiang81d87cb2007-07-19 01:49:52 -0700503
504 mutex_lock(&mem_ctls_mutex);
505
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700506 /* if this control struct has movd to offline state, we are done */
507 if (mci->op_state == OP_OFFLINE) {
508 mutex_unlock(&mem_ctls_mutex);
509 return;
510 }
511
Dave Jiang81d87cb2007-07-19 01:49:52 -0700512 /* Only poll controllers that are running polled and have a check */
513 if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
514 mci->edac_check(mci);
515
Dave Jiang81d87cb2007-07-19 01:49:52 -0700516 mutex_unlock(&mem_ctls_mutex);
517
518 /* Reschedule */
Dave Jiang4de78c62007-07-19 01:49:54 -0700519 queue_delayed_work(edac_workqueue, &mci->work,
Douglas Thompson052dfb42007-07-19 01:50:13 -0700520 msecs_to_jiffies(edac_mc_get_poll_msec()));
Dave Jiang81d87cb2007-07-19 01:49:52 -0700521}
522
523/*
524 * edac_mc_workq_setup
525 * initialize a workq item for this mci
526 * passing in the new delay period in msec
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700527 *
528 * locking model:
529 *
530 * called with the mem_ctls_mutex held
Dave Jiang81d87cb2007-07-19 01:49:52 -0700531 */
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700532static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
Dave Jiang81d87cb2007-07-19 01:49:52 -0700533{
Joe Perches956b9ba12012-04-29 17:08:39 -0300534 edac_dbg(0, "\n");
Dave Jiang81d87cb2007-07-19 01:49:52 -0700535
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700536 /* if this instance is not in the POLL state, then simply return */
537 if (mci->op_state != OP_RUNNING_POLL)
538 return;
539
Dave Jiang81d87cb2007-07-19 01:49:52 -0700540 INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
Dave Jiang81d87cb2007-07-19 01:49:52 -0700541 queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
542}
543
544/*
545 * edac_mc_workq_teardown
546 * stop the workq processing on this mci
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700547 *
548 * locking model:
549 *
550 * called WITHOUT lock held
Dave Jiang81d87cb2007-07-19 01:49:52 -0700551 */
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700552static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
Dave Jiang81d87cb2007-07-19 01:49:52 -0700553{
554 int status;
555
Borislav Petkov00740c52010-09-26 12:42:23 +0200556 if (mci->op_state != OP_RUNNING_POLL)
557 return;
558
Doug Thompsonbce19682007-07-26 10:41:14 -0700559 status = cancel_delayed_work(&mci->work);
560 if (status == 0) {
Joe Perches956b9ba12012-04-29 17:08:39 -0300561 edac_dbg(0, "not canceled, flush the queue\n");
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700562
Doug Thompsonbce19682007-07-26 10:41:14 -0700563 /* workq instance might be running, wait for it */
564 flush_workqueue(edac_workqueue);
Dave Jiang81d87cb2007-07-19 01:49:52 -0700565 }
566}
567
568/*
Doug Thompsonbce19682007-07-26 10:41:14 -0700569 * edac_mc_reset_delay_period(unsigned long value)
570 *
571 * user space has updated our poll period value, need to
572 * reset our workq delays
Dave Jiang81d87cb2007-07-19 01:49:52 -0700573 */
Doug Thompsonbce19682007-07-26 10:41:14 -0700574void edac_mc_reset_delay_period(int value)
Dave Jiang81d87cb2007-07-19 01:49:52 -0700575{
Doug Thompsonbce19682007-07-26 10:41:14 -0700576 struct mem_ctl_info *mci;
577 struct list_head *item;
Dave Jiang81d87cb2007-07-19 01:49:52 -0700578
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700579 mutex_lock(&mem_ctls_mutex);
580
Doug Thompsonbce19682007-07-26 10:41:14 -0700581 /* scan the list and turn off all workq timers, doing so under lock
582 */
583 list_for_each(item, &mc_devices) {
584 mci = list_entry(item, struct mem_ctl_info, link);
585
586 if (mci->op_state == OP_RUNNING_POLL)
587 cancel_delayed_work(&mci->work);
588 }
589
590 mutex_unlock(&mem_ctls_mutex);
591
592
593 /* re-walk the list, and reset the poll delay */
594 mutex_lock(&mem_ctls_mutex);
595
596 list_for_each(item, &mc_devices) {
597 mci = list_entry(item, struct mem_ctl_info, link);
598
599 edac_mc_workq_setup(mci, (unsigned long) value);
600 }
Dave Jiang81d87cb2007-07-19 01:49:52 -0700601
602 mutex_unlock(&mem_ctls_mutex);
603}
604
Doug Thompsonbce19682007-07-26 10:41:14 -0700605
606
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700607/* Return 0 on success, 1 on failure.
608 * Before calling this function, caller must
609 * assign a unique value to mci->mc_idx.
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700610 *
611 * locking model:
612 *
613 * called with the mem_ctls_mutex lock held
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700614 */
Douglas Thompson079708b2007-07-19 01:49:58 -0700615static int add_mc_to_global_list(struct mem_ctl_info *mci)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800616{
617 struct list_head *item, *insert_before;
618 struct mem_ctl_info *p;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800619
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700620 insert_before = &mc_devices;
621
Mauro Carvalho Chehabfd687502012-03-16 07:44:18 -0300622 p = find_mci_by_dev(mci->pdev);
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700623 if (unlikely(p != NULL))
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700624 goto fail0;
625
626 list_for_each(item, &mc_devices) {
627 p = list_entry(item, struct mem_ctl_info, link);
628
629 if (p->mc_idx >= mci->mc_idx) {
630 if (unlikely(p->mc_idx == mci->mc_idx))
631 goto fail1;
632
633 insert_before = item;
634 break;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800635 }
Alan Coxda9bb1d2006-01-18 17:44:13 -0800636 }
637
638 list_add_tail_rcu(&mci->link, insert_before);
Dave Jiangc0d12172007-07-19 01:49:46 -0700639 atomic_inc(&edac_handlers);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800640 return 0;
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700641
Douglas Thompson052dfb42007-07-19 01:50:13 -0700642fail0:
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700643 edac_printk(KERN_WARNING, EDAC_MC,
Mauro Carvalho Chehabfd687502012-03-16 07:44:18 -0300644 "%s (%s) %s %s already assigned %d\n", dev_name(p->pdev),
Stephen Rothwell17aa7e02008-05-05 13:54:19 +1000645 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700646 return 1;
647
Douglas Thompson052dfb42007-07-19 01:50:13 -0700648fail1:
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700649 edac_printk(KERN_WARNING, EDAC_MC,
Douglas Thompson052dfb42007-07-19 01:50:13 -0700650 "bug in low-level driver: attempt to assign\n"
651 " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700652 return 1;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800653}
654
Dave Petersone7ecd892006-03-26 01:38:52 -0800655static void del_mc_from_global_list(struct mem_ctl_info *mci)
Dave Petersona1d03fc2006-03-26 01:38:46 -0800656{
Dave Jiangc0d12172007-07-19 01:49:46 -0700657 atomic_dec(&edac_handlers);
Dave Petersona1d03fc2006-03-26 01:38:46 -0800658 list_del_rcu(&mci->link);
Lai Jiangshane2e77092011-05-26 16:25:58 -0700659
660 /* these are for safe removal of devices from global list while
661 * NMI handlers may be traversing list
662 */
663 synchronize_rcu();
664 INIT_LIST_HEAD(&mci->link);
Dave Petersona1d03fc2006-03-26 01:38:46 -0800665}
666
Alan Coxda9bb1d2006-01-18 17:44:13 -0800667/**
Douglas Thompson5da08312007-07-19 01:49:31 -0700668 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
669 *
670 * If found, return a pointer to the structure.
671 * Else return NULL.
672 *
673 * Caller must hold mem_ctls_mutex.
674 */
Douglas Thompson079708b2007-07-19 01:49:58 -0700675struct mem_ctl_info *edac_mc_find(int idx)
Douglas Thompson5da08312007-07-19 01:49:31 -0700676{
677 struct list_head *item;
678 struct mem_ctl_info *mci;
679
680 list_for_each(item, &mc_devices) {
681 mci = list_entry(item, struct mem_ctl_info, link);
682
683 if (mci->mc_idx >= idx) {
684 if (mci->mc_idx == idx)
685 return mci;
686
687 break;
688 }
689 }
690
691 return NULL;
692}
693EXPORT_SYMBOL(edac_mc_find);
694
695/**
Dave Peterson472678e2006-03-26 01:38:49 -0800696 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
697 * create sysfs entries associated with mci structure
Alan Coxda9bb1d2006-01-18 17:44:13 -0800698 * @mci: pointer to the mci structure to be added to the list
699 *
700 * Return:
701 * 0 Success
702 * !0 Failure
703 */
704
705/* FIXME - should a warning be printed if no error detection? correction? */
Doug Thompsonb8f6f972007-07-19 01:50:26 -0700706int edac_mc_add_mc(struct mem_ctl_info *mci)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800707{
Joe Perches956b9ba12012-04-29 17:08:39 -0300708 edac_dbg(0, "\n");
Doug Thompsonb8f6f972007-07-19 01:50:26 -0700709
Alan Coxda9bb1d2006-01-18 17:44:13 -0800710#ifdef CONFIG_EDAC_DEBUG
711 if (edac_debug_level >= 3)
712 edac_mc_dump_mci(mci);
Dave Petersone7ecd892006-03-26 01:38:52 -0800713
Alan Coxda9bb1d2006-01-18 17:44:13 -0800714 if (edac_debug_level >= 4) {
715 int i;
716
717 for (i = 0; i < mci->nr_csrows; i++) {
Mauro Carvalho Chehab6e84d352012-04-30 10:24:43 -0300718 struct csrow_info *csrow = mci->csrows[i];
719 u32 nr_pages = 0;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800720 int j;
Dave Petersone7ecd892006-03-26 01:38:52 -0800721
Mauro Carvalho Chehab6e84d352012-04-30 10:24:43 -0300722 for (j = 0; j < csrow->nr_channels; j++)
723 nr_pages += csrow->channels[j]->dimm->nr_pages;
724 if (!nr_pages)
725 continue;
726 edac_mc_dump_csrow(csrow);
727 for (j = 0; j < csrow->nr_channels; j++)
728 if (csrow->channels[j]->dimm->nr_pages)
729 edac_mc_dump_channel(csrow->channels[j]);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800730 }
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300731 for (i = 0; i < mci->tot_dimms; i++)
Mauro Carvalho Chehab6e84d352012-04-30 10:24:43 -0300732 if (mci->dimms[i]->nr_pages)
733 edac_mc_dump_dimm(mci->dimms[i], i);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800734 }
735#endif
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700736 mutex_lock(&mem_ctls_mutex);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800737
738 if (add_mc_to_global_list(mci))
Dave Peterson028a7b62006-03-26 01:38:47 -0800739 goto fail0;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800740
741 /* set load time so that error rate can be tracked */
742 mci->start_time = jiffies;
743
eric wollesen9794f332007-02-12 00:53:08 -0800744 if (edac_create_sysfs_mci_device(mci)) {
745 edac_mc_printk(mci, KERN_WARNING,
Douglas Thompson052dfb42007-07-19 01:50:13 -0700746 "failed to create sysfs device\n");
eric wollesen9794f332007-02-12 00:53:08 -0800747 goto fail1;
748 }
Alan Coxda9bb1d2006-01-18 17:44:13 -0800749
Dave Jiang81d87cb2007-07-19 01:49:52 -0700750 /* If there IS a check routine, then we are running POLLED */
751 if (mci->edac_check != NULL) {
752 /* This instance is NOW RUNNING */
753 mci->op_state = OP_RUNNING_POLL;
754
755 edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
756 } else {
757 mci->op_state = OP_RUNNING_INTERRUPT;
758 }
759
Alan Coxda9bb1d2006-01-18 17:44:13 -0800760 /* Report action taken */
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700761 edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
Stephen Rothwell17aa7e02008-05-05 13:54:19 +1000762 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
Alan Coxda9bb1d2006-01-18 17:44:13 -0800763
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700764 mutex_unlock(&mem_ctls_mutex);
Dave Peterson028a7b62006-03-26 01:38:47 -0800765 return 0;
766
Douglas Thompson052dfb42007-07-19 01:50:13 -0700767fail1:
Dave Peterson028a7b62006-03-26 01:38:47 -0800768 del_mc_from_global_list(mci);
769
Douglas Thompson052dfb42007-07-19 01:50:13 -0700770fail0:
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700771 mutex_unlock(&mem_ctls_mutex);
Dave Peterson028a7b62006-03-26 01:38:47 -0800772 return 1;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800773}
Dave Peterson91105402006-03-26 01:38:55 -0800774EXPORT_SYMBOL_GPL(edac_mc_add_mc);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800775
Alan Coxda9bb1d2006-01-18 17:44:13 -0800776/**
Dave Peterson472678e2006-03-26 01:38:49 -0800777 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
778 * remove mci structure from global list
Doug Thompson37f04582006-06-30 01:56:07 -0700779 * @pdev: Pointer to 'struct device' representing mci structure to remove.
Alan Coxda9bb1d2006-01-18 17:44:13 -0800780 *
Dave Peterson18dbc332006-03-26 01:38:50 -0800781 * Return pointer to removed mci structure, or NULL if device not found.
Alan Coxda9bb1d2006-01-18 17:44:13 -0800782 */
Douglas Thompson079708b2007-07-19 01:49:58 -0700783struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800784{
Dave Peterson18dbc332006-03-26 01:38:50 -0800785 struct mem_ctl_info *mci;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800786
Joe Perches956b9ba12012-04-29 17:08:39 -0300787 edac_dbg(0, "\n");
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700788
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700789 mutex_lock(&mem_ctls_mutex);
Dave Peterson18dbc332006-03-26 01:38:50 -0800790
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700791 /* find the requested mci struct in the global list */
792 mci = find_mci_by_dev(dev);
793 if (mci == NULL) {
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700794 mutex_unlock(&mem_ctls_mutex);
Dave Peterson18dbc332006-03-26 01:38:50 -0800795 return NULL;
796 }
797
Alan Coxda9bb1d2006-01-18 17:44:13 -0800798 del_mc_from_global_list(mci);
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700799 mutex_unlock(&mem_ctls_mutex);
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700800
Borislav Petkovbb31b3122010-12-02 17:48:35 +0100801 /* flush workq processes */
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700802 edac_mc_workq_teardown(mci);
Borislav Petkovbb31b3122010-12-02 17:48:35 +0100803
804 /* marking MCI offline */
805 mci->op_state = OP_OFFLINE;
806
807 /* remove from sysfs */
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700808 edac_remove_sysfs_mci_device(mci);
809
Dave Peterson537fba22006-03-26 01:38:40 -0800810 edac_printk(KERN_INFO, EDAC_MC,
Douglas Thompson052dfb42007-07-19 01:50:13 -0700811 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
Stephen Rothwell17aa7e02008-05-05 13:54:19 +1000812 mci->mod_name, mci->ctl_name, edac_dev_name(mci));
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700813
Dave Peterson18dbc332006-03-26 01:38:50 -0800814 return mci;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800815}
Dave Peterson91105402006-03-26 01:38:55 -0800816EXPORT_SYMBOL_GPL(edac_mc_del_mc);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800817
Adrian Bunk2da1c112007-07-19 01:49:32 -0700818static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
819 u32 size)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800820{
821 struct page *pg;
822 void *virt_addr;
823 unsigned long flags = 0;
824
Joe Perches956b9ba12012-04-29 17:08:39 -0300825 edac_dbg(3, "\n");
Alan Coxda9bb1d2006-01-18 17:44:13 -0800826
827 /* ECC error page was not in our memory. Ignore it. */
Douglas Thompson079708b2007-07-19 01:49:58 -0700828 if (!pfn_valid(page))
Alan Coxda9bb1d2006-01-18 17:44:13 -0800829 return;
830
831 /* Find the actual page structure then map it and fix */
832 pg = pfn_to_page(page);
833
834 if (PageHighMem(pg))
835 local_irq_save(flags);
836
Cong Wang4e5df7c2011-11-25 23:14:19 +0800837 virt_addr = kmap_atomic(pg);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800838
839 /* Perform architecture specific atomic scrub operation */
840 atomic_scrub(virt_addr + offset, size);
841
842 /* Unmap and complete */
Cong Wang4e5df7c2011-11-25 23:14:19 +0800843 kunmap_atomic(virt_addr);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800844
845 if (PageHighMem(pg))
846 local_irq_restore(flags);
847}
848
Alan Coxda9bb1d2006-01-18 17:44:13 -0800849/* FIXME - should return -1 */
Dave Petersone7ecd892006-03-26 01:38:52 -0800850int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800851{
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300852 struct csrow_info **csrows = mci->csrows;
Mauro Carvalho Chehaba895bf82012-01-28 09:09:38 -0300853 int row, i, j, n;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800854
Joe Perches956b9ba12012-04-29 17:08:39 -0300855 edac_dbg(1, "MC%d: 0x%lx\n", mci->mc_idx, page);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800856 row = -1;
857
858 for (i = 0; i < mci->nr_csrows; i++) {
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300859 struct csrow_info *csrow = csrows[i];
Mauro Carvalho Chehaba895bf82012-01-28 09:09:38 -0300860 n = 0;
861 for (j = 0; j < csrow->nr_channels; j++) {
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -0300862 struct dimm_info *dimm = csrow->channels[j]->dimm;
Mauro Carvalho Chehaba895bf82012-01-28 09:09:38 -0300863 n += dimm->nr_pages;
864 }
865 if (n == 0)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800866 continue;
867
Joe Perches956b9ba12012-04-29 17:08:39 -0300868 edac_dbg(3, "MC%d: first(0x%lx) page(0x%lx) last(0x%lx) mask(0x%lx)\n",
869 mci->mc_idx,
870 csrow->first_page, page, csrow->last_page,
871 csrow->page_mask);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800872
873 if ((page >= csrow->first_page) &&
874 (page <= csrow->last_page) &&
875 ((page & csrow->page_mask) ==
876 (csrow->first_page & csrow->page_mask))) {
877 row = i;
878 break;
879 }
880 }
881
882 if (row == -1)
Dave Peterson537fba22006-03-26 01:38:40 -0800883 edac_mc_printk(mci, KERN_ERR,
Douglas Thompson052dfb42007-07-19 01:50:13 -0700884 "could not look up page error address %lx\n",
885 (unsigned long)page);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800886
887 return row;
888}
Dave Peterson91105402006-03-26 01:38:55 -0800889EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800890
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300891const char *edac_layer_name[] = {
892 [EDAC_MC_LAYER_BRANCH] = "branch",
893 [EDAC_MC_LAYER_CHANNEL] = "channel",
894 [EDAC_MC_LAYER_SLOT] = "slot",
895 [EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
896};
897EXPORT_SYMBOL_GPL(edac_layer_name);
898
899static void edac_inc_ce_error(struct mem_ctl_info *mci,
900 bool enable_per_layer_report,
901 const int pos[EDAC_MAX_LAYERS])
Alan Coxda9bb1d2006-01-18 17:44:13 -0800902{
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300903 int i, index = 0;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800904
Mauro Carvalho Chehab5926ff52012-02-09 11:05:20 -0300905 mci->ce_mc++;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300906
907 if (!enable_per_layer_report) {
908 mci->ce_noinfo_count++;
909 return;
910 }
911
912 for (i = 0; i < mci->n_layers; i++) {
913 if (pos[i] < 0)
914 break;
915 index += pos[i];
916 mci->ce_per_layer[i][index]++;
917
918 if (i < mci->n_layers - 1)
919 index *= mci->layers[i + 1].size;
920 }
921}
922
923static void edac_inc_ue_error(struct mem_ctl_info *mci,
924 bool enable_per_layer_report,
925 const int pos[EDAC_MAX_LAYERS])
926{
927 int i, index = 0;
928
Mauro Carvalho Chehab5926ff52012-02-09 11:05:20 -0300929 mci->ue_mc++;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300930
931 if (!enable_per_layer_report) {
932 mci->ce_noinfo_count++;
933 return;
934 }
935
936 for (i = 0; i < mci->n_layers; i++) {
937 if (pos[i] < 0)
938 break;
939 index += pos[i];
940 mci->ue_per_layer[i][index]++;
941
942 if (i < mci->n_layers - 1)
943 index *= mci->layers[i + 1].size;
944 }
945}
946
947static void edac_ce_error(struct mem_ctl_info *mci,
948 const int pos[EDAC_MAX_LAYERS],
949 const char *msg,
950 const char *location,
951 const char *label,
952 const char *detail,
953 const char *other_detail,
954 const bool enable_per_layer_report,
955 const unsigned long page_frame_number,
956 const unsigned long offset_in_page,
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -0300957 long grain)
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300958{
959 unsigned long remapped_page;
960
961 if (edac_mc_get_log_ce()) {
962 if (other_detail && *other_detail)
963 edac_mc_printk(mci, KERN_WARNING,
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -0300964 "CE %s on %s (%s %s - %s)\n",
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300965 msg, label, location,
966 detail, other_detail);
967 else
968 edac_mc_printk(mci, KERN_WARNING,
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -0300969 "CE %s on %s (%s %s)\n",
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300970 msg, label, location,
971 detail);
972 }
973 edac_inc_ce_error(mci, enable_per_layer_report, pos);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800974
975 if (mci->scrub_mode & SCRUB_SW_SRC) {
976 /*
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300977 * Some memory controllers (called MCs below) can remap
978 * memory so that it is still available at a different
979 * address when PCI devices map into memory.
980 * MC's that can't do this, lose the memory where PCI
981 * devices are mapped. This mapping is MC-dependent
982 * and so we call back into the MC driver for it to
983 * map the MC page to a physical (CPU) page which can
984 * then be mapped to a virtual page - which can then
985 * be scrubbed.
986 */
Alan Coxda9bb1d2006-01-18 17:44:13 -0800987 remapped_page = mci->ctl_page_to_phys ?
Douglas Thompson052dfb42007-07-19 01:50:13 -0700988 mci->ctl_page_to_phys(mci, page_frame_number) :
989 page_frame_number;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800990
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300991 edac_mc_scrub_block(remapped_page,
992 offset_in_page, grain);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800993 }
994}
995
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300996static void edac_ue_error(struct mem_ctl_info *mci,
997 const int pos[EDAC_MAX_LAYERS],
998 const char *msg,
999 const char *location,
1000 const char *label,
1001 const char *detail,
1002 const char *other_detail,
1003 const bool enable_per_layer_report)
Alan Coxda9bb1d2006-01-18 17:44:13 -08001004{
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001005 if (edac_mc_get_log_ue()) {
1006 if (other_detail && *other_detail)
1007 edac_mc_printk(mci, KERN_WARNING,
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001008 "UE %s on %s (%s %s - %s)\n",
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001009 msg, label, location, detail,
1010 other_detail);
1011 else
1012 edac_mc_printk(mci, KERN_WARNING,
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001013 "UE %s on %s (%s %s)\n",
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001014 msg, label, location, detail);
1015 }
Dave Petersone7ecd892006-03-26 01:38:52 -08001016
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001017 if (edac_mc_get_panic_on_ue()) {
1018 if (other_detail && *other_detail)
1019 panic("UE %s on %s (%s%s - %s)\n",
1020 msg, label, location, detail, other_detail);
1021 else
1022 panic("UE %s on %s (%s%s)\n",
1023 msg, label, location, detail);
1024 }
1025
1026 edac_inc_ue_error(mci, enable_per_layer_report, pos);
Alan Coxda9bb1d2006-01-18 17:44:13 -08001027}
1028
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001029#define OTHER_LABEL " or "
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001030
1031/**
1032 * edac_mc_handle_error - reports a memory event to userspace
1033 *
1034 * @type: severity of the error (CE/UE/Fatal)
1035 * @mci: a struct mem_ctl_info pointer
1036 * @page_frame_number: mem page where the error occurred
1037 * @offset_in_page: offset of the error inside the page
1038 * @syndrome: ECC syndrome
1039 * @top_layer: Memory layer[0] position
1040 * @mid_layer: Memory layer[1] position
1041 * @low_layer: Memory layer[2] position
1042 * @msg: Message meaningful to the end users that
1043 * explains the event
1044 * @other_detail: Technical details about the event that
1045 * may help hardware manufacturers and
1046 * EDAC developers to analyse the event
1047 * @arch_log: Architecture-specific struct that can
1048 * be used to add extended information to the
1049 * tracepoint, like dumping MCE registers.
1050 */
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001051void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1052 struct mem_ctl_info *mci,
1053 const unsigned long page_frame_number,
1054 const unsigned long offset_in_page,
1055 const unsigned long syndrome,
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001056 const int top_layer,
1057 const int mid_layer,
1058 const int low_layer,
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001059 const char *msg,
1060 const char *other_detail,
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001061 const void *arch_log)
Alan Coxda9bb1d2006-01-18 17:44:13 -08001062{
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001063 /* FIXME: too much for stack: move it to some pre-alocated area */
1064 char detail[80], location[80];
1065 char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
1066 char *p;
1067 int row = -1, chan = -1;
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001068 int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001069 int i;
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001070 long grain;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001071 bool enable_per_layer_report = false;
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001072 u16 error_count; /* FIXME: make it a parameter */
1073 u8 grain_bits;
Alan Coxda9bb1d2006-01-18 17:44:13 -08001074
Joe Perches956b9ba12012-04-29 17:08:39 -03001075 edac_dbg(3, "MC%d\n", mci->mc_idx);
Alan Coxda9bb1d2006-01-18 17:44:13 -08001076
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001077 /*
1078 * Check if the event report is consistent and if the memory
1079 * location is known. If it is known, enable_per_layer_report will be
1080 * true, the DIMM(s) label info will be filled and the per-layer
1081 * error counters will be incremented.
1082 */
1083 for (i = 0; i < mci->n_layers; i++) {
1084 if (pos[i] >= (int)mci->layers[i].size) {
1085 if (type == HW_EVENT_ERR_CORRECTED)
1086 p = "CE";
1087 else
1088 p = "UE";
1089
1090 edac_mc_printk(mci, KERN_ERR,
1091 "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
1092 edac_layer_name[mci->layers[i].type],
1093 pos[i], mci->layers[i].size);
1094 /*
1095 * Instead of just returning it, let's use what's
1096 * known about the error. The increment routines and
1097 * the DIMM filter logic will do the right thing by
1098 * pointing the likely damaged DIMMs.
1099 */
1100 pos[i] = -1;
1101 }
1102 if (pos[i] >= 0)
1103 enable_per_layer_report = true;
Alan Coxda9bb1d2006-01-18 17:44:13 -08001104 }
1105
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001106 /*
1107 * Get the dimm label/grain that applies to the match criteria.
1108 * As the error algorithm may not be able to point to just one memory
1109 * stick, the logic here will get all possible labels that could
1110 * pottentially be affected by the error.
1111 * On FB-DIMM memory controllers, for uncorrected errors, it is common
1112 * to have only the MC channel and the MC dimm (also called "branch")
1113 * but the channel is not known, as the memory is arranged in pairs,
1114 * where each memory belongs to a separate channel within the same
1115 * branch.
1116 */
1117 grain = 0;
1118 p = label;
1119 *p = '\0';
1120 for (i = 0; i < mci->tot_dimms; i++) {
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -03001121 struct dimm_info *dimm = mci->dimms[i];
Dave Petersone7ecd892006-03-26 01:38:52 -08001122
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001123 if (top_layer >= 0 && top_layer != dimm->location[0])
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001124 continue;
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001125 if (mid_layer >= 0 && mid_layer != dimm->location[1])
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001126 continue;
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001127 if (low_layer >= 0 && low_layer != dimm->location[2])
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001128 continue;
1129
1130 /* get the max grain, over the error match range */
1131 if (dimm->grain > grain)
1132 grain = dimm->grain;
1133
1134 /*
1135 * If the error is memory-controller wide, there's no need to
1136 * seek for the affected DIMMs because the whole
1137 * channel/memory controller/... may be affected.
1138 * Also, don't show errors for empty DIMM slots.
1139 */
1140 if (enable_per_layer_report && dimm->nr_pages) {
1141 if (p != label) {
1142 strcpy(p, OTHER_LABEL);
1143 p += strlen(OTHER_LABEL);
1144 }
1145 strcpy(p, dimm->label);
1146 p += strlen(p);
1147 *p = '\0';
1148
1149 /*
1150 * get csrow/channel of the DIMM, in order to allow
1151 * incrementing the compat API counters
1152 */
Joe Perches956b9ba12012-04-29 17:08:39 -03001153 edac_dbg(4, "%s csrows map: (%d,%d)\n",
1154 mci->mem_is_per_rank ? "rank" : "dimm",
1155 dimm->csrow, dimm->cschannel);
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001156 if (row == -1)
1157 row = dimm->csrow;
1158 else if (row >= 0 && row != dimm->csrow)
1159 row = -2;
1160
1161 if (chan == -1)
1162 chan = dimm->cschannel;
1163 else if (chan >= 0 && chan != dimm->cschannel)
1164 chan = -2;
1165 }
Alan Coxda9bb1d2006-01-18 17:44:13 -08001166 }
1167
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001168 if (!enable_per_layer_report) {
1169 strcpy(label, "any memory");
1170 } else {
Joe Perches956b9ba12012-04-29 17:08:39 -03001171 edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001172 if (p == label)
1173 strcpy(label, "unknown memory");
1174 if (type == HW_EVENT_ERR_CORRECTED) {
1175 if (row >= 0) {
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -03001176 mci->csrows[row]->ce_count++;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001177 if (chan >= 0)
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -03001178 mci->csrows[row]->channels[chan]->ce_count++;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001179 }
1180 } else
1181 if (row >= 0)
Mauro Carvalho Chehabde3910eb2012-04-24 15:05:43 -03001182 mci->csrows[row]->ue_count++;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001183 }
Alan Coxda9bb1d2006-01-18 17:44:13 -08001184
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001185 /* Fill the RAM location data */
1186 p = location;
1187 for (i = 0; i < mci->n_layers; i++) {
1188 if (pos[i] < 0)
1189 continue;
1190
1191 p += sprintf(p, "%s:%d ",
1192 edac_layer_name[mci->layers[i].type],
1193 pos[i]);
1194 }
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001195 if (p > location)
1196 *(p - 1) = '\0';
1197
1198 /* Report the error via the trace interface */
1199
1200 error_count = 1; /* FIXME: allow change it */
1201 grain_bits = fls_long(grain) + 1;
1202 trace_mc_event(type, msg, label, error_count,
1203 mci->mc_idx, top_layer, mid_layer, low_layer,
1204 PAGES_TO_MiB(page_frame_number) | offset_in_page,
1205 grain_bits, syndrome, other_detail);
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001206
1207 /* Memory type dependent details about the error */
1208 if (type == HW_EVENT_ERR_CORRECTED) {
1209 snprintf(detail, sizeof(detail),
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001210 "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
Douglas Thompson052dfb42007-07-19 01:50:13 -07001211 page_frame_number, offset_in_page,
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001212 grain, syndrome);
1213 edac_ce_error(mci, pos, msg, location, label, detail,
1214 other_detail, enable_per_layer_report,
1215 page_frame_number, offset_in_page, grain);
1216 } else {
1217 snprintf(detail, sizeof(detail),
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -03001218 "page:0x%lx offset:0x%lx grain:%ld",
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001219 page_frame_number, offset_in_page, grain);
Alan Coxda9bb1d2006-01-18 17:44:13 -08001220
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001221 edac_ue_error(mci, pos, msg, location, label, detail,
1222 other_detail, enable_per_layer_report);
1223 }
Alan Coxda9bb1d2006-01-18 17:44:13 -08001224}
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001225EXPORT_SYMBOL_GPL(edac_mc_handle_error);