blob: 881b00eadf7a5f87552fcf9e07913395eab56b80 [file] [log] [blame]
Douglas Thompson7c9281d2007-07-19 01:49:33 -07001/*
Mauro Carvalho Chehab78d88e82016-10-29 15:16:34 -02002 * Defines, structures, APIs for edac_mc module
Douglas Thompson7c9281d2007-07-19 01:49:33 -07003 *
4 * (C) 2007 Linux Networx (http://lnxi.com)
5 * This file may be distributed under the terms of the
6 * GNU General Public License.
7 *
8 * Written by Thayne Harbaugh
9 * Based on work by Dan Hollis <goemon at anime dot net> and others.
10 * http://www.anime.net/~goemon/linux-ecc/
11 *
12 * NMI handling support added by
13 * Dave Peterson <dsp@llnl.gov> <dave_peterson@pobox.com>
14 *
15 * Refactored for multi-source files:
16 * Doug Thompson <norsk5@xmission.com>
17 *
Mauro Carvalho Chehab66c222a2016-10-29 10:35:23 -020018 * Please look at Documentation/driver-api/edac.rst for more info about
19 * EDAC core structs and functions.
Douglas Thompson7c9281d2007-07-19 01:49:33 -070020 */
21
Mauro Carvalho Chehab78d88e82016-10-29 15:16:34 -020022#ifndef _EDAC_MC_H_
23#define _EDAC_MC_H_
Douglas Thompson7c9281d2007-07-19 01:49:33 -070024
25#include <linux/kernel.h>
26#include <linux/types.h>
27#include <linux/module.h>
28#include <linux/spinlock.h>
29#include <linux/smp.h>
30#include <linux/pci.h>
31#include <linux/time.h>
32#include <linux/nmi.h>
33#include <linux/rcupdate.h>
34#include <linux/completion.h>
35#include <linux/kobject.h>
36#include <linux/platform_device.h>
Douglas Thompsone27e3da2007-07-19 01:49:36 -070037#include <linux/workqueue.h>
Mauro Carvalho Chehabddeb3542011-03-04 15:11:29 -030038#include <linux/edac.h>
Douglas Thompson7c9281d2007-07-19 01:49:33 -070039
Douglas Thompson7c9281d2007-07-19 01:49:33 -070040#if PAGE_SHIFT < 20
Andrei Konovalov76f04f22010-12-07 07:48:00 -050041#define PAGES_TO_MiB(pages) ((pages) >> (20 - PAGE_SHIFT))
42#define MiB_TO_PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
Douglas Thompson7c9281d2007-07-19 01:49:33 -070043#else /* PAGE_SHIFT > 20 */
Andrei Konovalov76f04f22010-12-07 07:48:00 -050044#define PAGES_TO_MiB(pages) ((pages) << (PAGE_SHIFT - 20))
Mauro Carvalho Chehabe9144602010-08-10 20:26:35 -030045#define MiB_TO_PAGES(mb) ((mb) >> (PAGE_SHIFT - 20))
Douglas Thompson7c9281d2007-07-19 01:49:33 -070046#endif
47
48#define edac_printk(level, prefix, fmt, arg...) \
49 printk(level "EDAC " prefix ": " fmt, ##arg)
50
51#define edac_mc_printk(mci, level, fmt, arg...) \
52 printk(level "EDAC MC%d: " fmt, mci->mc_idx, ##arg)
53
54#define edac_mc_chipset_printk(mci, level, prefix, fmt, arg...) \
55 printk(level "EDAC " prefix " MC%d: " fmt, mci->mc_idx, ##arg)
56
Douglas Thompsone27e3da2007-07-19 01:49:36 -070057#define edac_device_printk(ctl, level, fmt, arg...) \
58 printk(level "EDAC DEVICE%d: " fmt, ctl->dev_idx, ##arg)
59
Dave Jiang91b99042007-07-19 01:49:52 -070060#define edac_pci_printk(ctl, level, fmt, arg...) \
61 printk(level "EDAC PCI%d: " fmt, ctl->pci_idx, ##arg)
62
Douglas Thompson7c9281d2007-07-19 01:49:33 -070063/* prefixes for edac_printk() and edac_mc_printk() */
64#define EDAC_MC "MC"
65#define EDAC_PCI "PCI"
66#define EDAC_DEBUG "DEBUG"
67
Borislav Petkovf4ce6ec2014-08-13 23:27:55 +020068extern const char * const edac_mem_types[];
Borislav Petkov24f9a7f2010-10-07 18:29:15 +020069
Douglas Thompson7c9281d2007-07-19 01:49:33 -070070#ifdef CONFIG_EDAC_DEBUG
71extern int edac_debug_level;
72
Joe Perches956b9ba12012-04-29 17:08:39 -030073#define edac_dbg(level, fmt, ...) \
Joe Perches7e881852012-04-28 16:41:46 -030074do { \
75 if (level <= edac_debug_level) \
76 edac_printk(KERN_DEBUG, EDAC_DEBUG, \
77 "%s: " fmt, __func__, ##__VA_ARGS__); \
78} while (0)
Douglas Thompson7c9281d2007-07-19 01:49:33 -070079
Douglas Thompson079708b2007-07-19 01:49:58 -070080#else /* !CONFIG_EDAC_DEBUG */
Douglas Thompson7c9281d2007-07-19 01:49:33 -070081
Joe Perches956b9ba12012-04-29 17:08:39 -030082#define edac_dbg(level, fmt, ...) \
Joe Perches7e881852012-04-28 16:41:46 -030083do { \
84 if (0) \
85 edac_printk(KERN_DEBUG, EDAC_DEBUG, \
86 "%s: " fmt, __func__, ##__VA_ARGS__); \
87} while (0)
Douglas Thompson7c9281d2007-07-19 01:49:33 -070088
Douglas Thompson079708b2007-07-19 01:49:58 -070089#endif /* !CONFIG_EDAC_DEBUG */
Douglas Thompson7c9281d2007-07-19 01:49:33 -070090
Douglas Thompson7c9281d2007-07-19 01:49:33 -070091#define PCI_VEND_DEV(vend, dev) PCI_VENDOR_ID_ ## vend, \
92 PCI_DEVICE_ID_ ## vend ## _ ## dev
93
Stephen Rothwell17aa7e02008-05-05 13:54:19 +100094#define edac_dev_name(dev) (dev)->dev_name
Douglas Thompson7c9281d2007-07-19 01:49:33 -070095
Borislav Petkov7ac8bf92015-09-22 11:56:04 +020096#define to_mci(k) container_of(k, struct mem_ctl_info, dev)
97
Mauro Carvalho Chehabe01aa142016-10-26 15:47:55 -020098/**
Mauro Carvalho Chehab66c222a2016-10-29 10:35:23 -020099 * edac_mc_alloc() - Allocate and partially fill a struct &mem_ctl_info.
100 *
Mauro Carvalho Chehabe01aa142016-10-26 15:47:55 -0200101 * @mc_num: Memory controller number
102 * @n_layers: Number of MC hierarchy layers
103 * @layers: Describes each layer as seen by the Memory Controller
104 * @sz_pvt: size of private storage needed
105 *
106 *
107 * Everything is kmalloc'ed as one big chunk - more efficient.
108 * Only can be used if all structures have the same lifetime - otherwise
109 * you have to allocate and initialize your own structures.
110 *
111 * Use edac_mc_free() to free mc structures allocated by this function.
112 *
113 * .. note::
114 *
115 * drivers handle multi-rank memories in different ways: in some
116 * drivers, one multi-rank memory stick is mapped as one entry, while, in
117 * others, a single multi-rank memory stick would be mapped into several
118 * entries. Currently, this function will allocate multiple struct dimm_info
119 * on such scenarios, as grouping the multiple ranks require drivers change.
120 *
121 * Returns:
Mauro Carvalho Chehab66c222a2016-10-29 10:35:23 -0200122 * On success, return a pointer to struct mem_ctl_info pointer;
123 * %NULL otherwise
Mauro Carvalho Chehabe01aa142016-10-26 15:47:55 -0200124 */
Robert Richterd55c79a2019-09-02 12:33:41 +0000125struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
126 unsigned int n_layers,
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300127 struct edac_mc_layer *layers,
Robert Richterd55c79a2019-09-02 12:33:41 +0000128 unsigned int sz_pvt);
Mauro Carvalho Chehabe01aa142016-10-26 15:47:55 -0200129
130/**
Toshi Kani3877c7d2017-08-23 16:54:46 -0600131 * edac_get_owner - Return the owner's mod_name of EDAC MC
132 *
133 * Returns:
134 * Pointer to mod_name string when EDAC MC is owned. NULL otherwise.
135 */
136extern const char *edac_get_owner(void);
137
138/*
Mauro Carvalho Chehab66c222a2016-10-29 10:35:23 -0200139 * edac_mc_add_mc_with_groups() - Insert the @mci structure into the mci
140 * global list and create sysfs entries associated with @mci structure.
141 *
Mauro Carvalho Chehabe01aa142016-10-26 15:47:55 -0200142 * @mci: pointer to the mci structure to be added to the list
143 * @groups: optional attribute groups for the driver-specific sysfs entries
144 *
Mauro Carvalho Chehab66c222a2016-10-29 10:35:23 -0200145 * Returns:
146 * 0 on Success, or an error code on failure
Mauro Carvalho Chehabe01aa142016-10-26 15:47:55 -0200147 */
Takashi Iwai4e8d2302015-02-04 11:48:52 +0100148extern int edac_mc_add_mc_with_groups(struct mem_ctl_info *mci,
149 const struct attribute_group **groups);
150#define edac_mc_add_mc(mci) edac_mc_add_mc_with_groups(mci, NULL)
Mauro Carvalho Chehabe01aa142016-10-26 15:47:55 -0200151
152/**
Mauro Carvalho Chehab66c222a2016-10-29 10:35:23 -0200153 * edac_mc_free() - Frees a previously allocated @mci structure
154 *
Mauro Carvalho Chehabe01aa142016-10-26 15:47:55 -0200155 * @mci: pointer to a struct mem_ctl_info structure
156 */
Doug Thompsonb8f6f972007-07-19 01:50:26 -0700157extern void edac_mc_free(struct mem_ctl_info *mci);
Mauro Carvalho Chehabe01aa142016-10-26 15:47:55 -0200158
159/**
Yazen Ghannamd7fc9d72017-01-27 11:24:21 -0600160 * edac_has_mcs() - Check if any MCs have been allocated.
161 *
162 * Returns:
163 * True if MC instances have been registered successfully.
164 * False otherwise.
165 */
166extern bool edac_has_mcs(void);
167
168/**
Mauro Carvalho Chehab66c222a2016-10-29 10:35:23 -0200169 * edac_mc_find() - Search for a mem_ctl_info structure whose index is @idx.
Mauro Carvalho Chehabe01aa142016-10-26 15:47:55 -0200170 *
171 * @idx: index to be seek
172 *
173 * If found, return a pointer to the structure.
174 * Else return NULL.
Mauro Carvalho Chehabe01aa142016-10-26 15:47:55 -0200175 */
Douglas Thompson079708b2007-07-19 01:49:58 -0700176extern struct mem_ctl_info *edac_mc_find(int idx);
Mauro Carvalho Chehabe01aa142016-10-26 15:47:55 -0200177
178/**
Mauro Carvalho Chehab66c222a2016-10-29 10:35:23 -0200179 * find_mci_by_dev() - Scan list of controllers looking for the one that
180 * manages the @dev device.
Mauro Carvalho Chehabe01aa142016-10-26 15:47:55 -0200181 *
Mauro Carvalho Chehabe01aa142016-10-26 15:47:55 -0200182 * @dev: pointer to a struct device related with the MCI
Mauro Carvalho Chehab66c222a2016-10-29 10:35:23 -0200183 *
184 * Returns: on success, returns a pointer to struct &mem_ctl_info;
185 * %NULL otherwise.
Mauro Carvalho Chehabe01aa142016-10-26 15:47:55 -0200186 */
Mauro Carvalho Chehab939747bd2010-08-10 11:22:01 -0300187extern struct mem_ctl_info *find_mci_by_dev(struct device *dev);
Mauro Carvalho Chehabe01aa142016-10-26 15:47:55 -0200188
189/**
Mauro Carvalho Chehab66c222a2016-10-29 10:35:23 -0200190 * edac_mc_del_mc() - Remove sysfs entries for mci structure associated with
191 * @dev and remove mci structure from global list.
Mauro Carvalho Chehabe01aa142016-10-26 15:47:55 -0200192 *
193 * @dev: Pointer to struct &device representing mci structure to remove.
194 *
Mauro Carvalho Chehab66c222a2016-10-29 10:35:23 -0200195 * Returns: pointer to removed mci structure, or %NULL if device not found.
Mauro Carvalho Chehabe01aa142016-10-26 15:47:55 -0200196 */
Douglas Thompson079708b2007-07-19 01:49:58 -0700197extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev);
Mauro Carvalho Chehab66c222a2016-10-29 10:35:23 -0200198
199/**
200 * edac_mc_find_csrow_by_page() - Ancillary routine to identify what csrow
201 * contains a memory page.
202 *
203 * @mci: pointer to a struct mem_ctl_info structure
204 * @page: memory page to find
205 *
206 * Returns: on success, returns the csrow. -1 if not found.
207 */
Douglas Thompson7c9281d2007-07-19 01:49:33 -0700208extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
Douglas Thompson079708b2007-07-19 01:49:58 -0700209 unsigned long page);
Mauro Carvalho Chehabe7e24832012-10-31 13:46:11 -0300210
Mauro Carvalho Chehabe01aa142016-10-26 15:47:55 -0200211/**
Mauro Carvalho Chehab66c222a2016-10-29 10:35:23 -0200212 * edac_raw_mc_handle_error() - Reports a memory event to userspace without
213 * doing anything to discover the error location.
Mauro Carvalho Chehabe01aa142016-10-26 15:47:55 -0200214 *
Mauro Carvalho Chehabe01aa142016-10-26 15:47:55 -0200215 * @e: error description
216 *
217 * This raw function is used internally by edac_mc_handle_error(). It should
218 * only be called directly when the hardware error come directly from BIOS,
219 * like in the case of APEI GHES driver.
220 */
Robert Richter91b327f2020-01-23 09:02:56 +0000221void edac_raw_mc_handle_error(struct edac_raw_error_desc *e);
Mauro Carvalho Chehabe7e24832012-10-31 13:46:11 -0300222
Mauro Carvalho Chehabe01aa142016-10-26 15:47:55 -0200223/**
Mauro Carvalho Chehab66c222a2016-10-29 10:35:23 -0200224 * edac_mc_handle_error() - Reports a memory event to userspace.
Mauro Carvalho Chehabe01aa142016-10-26 15:47:55 -0200225 *
226 * @type: severity of the error (CE/UE/Fatal)
227 * @mci: a struct mem_ctl_info pointer
228 * @error_count: Number of errors of the same type
229 * @page_frame_number: mem page where the error occurred
230 * @offset_in_page: offset of the error inside the page
231 * @syndrome: ECC syndrome
232 * @top_layer: Memory layer[0] position
233 * @mid_layer: Memory layer[1] position
234 * @low_layer: Memory layer[2] position
235 * @msg: Message meaningful to the end users that
236 * explains the event
237 * @other_detail: Technical details about the event that
238 * may help hardware manufacturers and
239 * EDAC developers to analyse the event
240 */
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300241void edac_mc_handle_error(const enum hw_event_mc_err_type type,
242 struct mem_ctl_info *mci,
Mauro Carvalho Chehab9eb07a72012-06-04 13:27:43 -0300243 const u16 error_count,
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300244 const unsigned long page_frame_number,
245 const unsigned long offset_in_page,
246 const unsigned long syndrome,
Mauro Carvalho Chehab53f2d022012-02-23 08:10:34 -0300247 const int top_layer,
248 const int mid_layer,
249 const int low_layer,
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300250 const char *msg,
Mauro Carvalho Chehab03f7eae2012-06-04 11:29:25 -0300251 const char *other_detail);
Douglas Thompson7c9281d2007-07-19 01:49:33 -0700252
253/*
Dave Jiang91b99042007-07-19 01:49:52 -0700254 * edac misc APIs
255 */
Douglas Thompson494d0d52007-07-19 01:50:21 -0700256extern char *edac_op_state_to_string(int op_state);
Douglas Thompson7c9281d2007-07-19 01:49:33 -0700257
Mauro Carvalho Chehab78d88e82016-10-29 15:16:34 -0200258#endif /* _EDAC_MC_H_ */