Mauro Carvalho Chehab | 6d8ef24 | 2016-10-29 10:01:41 -0200 | [diff] [blame] | 1 | /* |
| 2 | * Defines, structures, APIs for edac_device |
| 3 | * |
| 4 | * (C) 2007 Linux Networx (http://lnxi.com) |
| 5 | * This file may be distributed under the terms of the |
| 6 | * GNU General Public License. |
| 7 | * |
| 8 | * Written by Thayne Harbaugh |
| 9 | * Based on work by Dan Hollis <goemon at anime dot net> and others. |
| 10 | * http://www.anime.net/~goemon/linux-ecc/ |
| 11 | * |
| 12 | * NMI handling support added by |
| 13 | * Dave Peterson <dsp@llnl.gov> <dave_peterson@pobox.com> |
| 14 | * |
| 15 | * Refactored for multi-source files: |
| 16 | * Doug Thompson <norsk5@xmission.com> |
| 17 | * |
| 18 | * Please look at Documentation/driver-api/edac.rst for more info about |
| 19 | * EDAC core structs and functions. |
| 20 | */ |
| 21 | |
| 22 | #ifndef _EDAC_DEVICE_H_ |
| 23 | #define _EDAC_DEVICE_H_ |
| 24 | |
| 25 | #include <linux/completion.h> |
| 26 | #include <linux/device.h> |
| 27 | #include <linux/edac.h> |
| 28 | #include <linux/kobject.h> |
| 29 | #include <linux/list.h> |
| 30 | #include <linux/types.h> |
| 31 | #include <linux/sysfs.h> |
| 32 | #include <linux/workqueue.h> |
| 33 | |
| 34 | |
| 35 | /* |
| 36 | * The following are the structures to provide for a generic |
| 37 | * or abstract 'edac_device'. This set of structures and the |
| 38 | * code that implements the APIs for the same, provide for |
| 39 | * registering EDAC type devices which are NOT standard memory. |
| 40 | * |
| 41 | * CPU caches (L1 and L2) |
| 42 | * DMA engines |
| 43 | * Core CPU switches |
| 44 | * Fabric switch units |
| 45 | * PCIe interface controllers |
| 46 | * other EDAC/ECC type devices that can be monitored for |
| 47 | * errors, etc. |
| 48 | * |
| 49 | * It allows for a 2 level set of hierarchy. For example: |
| 50 | * |
| 51 | * cache could be composed of L1, L2 and L3 levels of cache. |
| 52 | * Each CPU core would have its own L1 cache, while sharing |
| 53 | * L2 and maybe L3 caches. |
| 54 | * |
| 55 | * View them arranged, via the sysfs presentation: |
| 56 | * /sys/devices/system/edac/.. |
| 57 | * |
| 58 | * mc/ <existing memory device directory> |
| 59 | * cpu/cpu0/.. <L1 and L2 block directory> |
| 60 | * /L1-cache/ce_count |
| 61 | * /ue_count |
| 62 | * /L2-cache/ce_count |
| 63 | * /ue_count |
| 64 | * cpu/cpu1/.. <L1 and L2 block directory> |
| 65 | * /L1-cache/ce_count |
| 66 | * /ue_count |
| 67 | * /L2-cache/ce_count |
| 68 | * /ue_count |
| 69 | * ... |
| 70 | * |
| 71 | * the L1 and L2 directories would be "edac_device_block's" |
| 72 | */ |
| 73 | |
| 74 | struct edac_device_counter { |
| 75 | u32 ue_count; |
| 76 | u32 ce_count; |
| 77 | }; |
| 78 | |
| 79 | /* forward reference */ |
| 80 | struct edac_device_ctl_info; |
| 81 | struct edac_device_block; |
| 82 | |
| 83 | /* edac_dev_sysfs_attribute structure |
| 84 | * used for driver sysfs attributes in mem_ctl_info |
| 85 | * for extra controls and attributes: |
| 86 | * like high level error Injection controls |
| 87 | */ |
| 88 | struct edac_dev_sysfs_attribute { |
| 89 | struct attribute attr; |
| 90 | ssize_t (*show)(struct edac_device_ctl_info *, char *); |
| 91 | ssize_t (*store)(struct edac_device_ctl_info *, const char *, size_t); |
| 92 | }; |
| 93 | |
| 94 | /* edac_dev_sysfs_block_attribute structure |
| 95 | * |
| 96 | * used in leaf 'block' nodes for adding controls/attributes |
| 97 | * |
| 98 | * each block in each instance of the containing control structure |
| 99 | * can have an array of the following. The show and store functions |
| 100 | * will be filled in with the show/store function in the |
| 101 | * low level driver. |
| 102 | * |
| 103 | * The 'value' field will be the actual value field used for |
| 104 | * counting |
| 105 | */ |
| 106 | struct edac_dev_sysfs_block_attribute { |
| 107 | struct attribute attr; |
| 108 | ssize_t (*show)(struct kobject *, struct attribute *, char *); |
| 109 | ssize_t (*store)(struct kobject *, struct attribute *, |
| 110 | const char *, size_t); |
| 111 | struct edac_device_block *block; |
| 112 | |
| 113 | unsigned int value; |
| 114 | }; |
| 115 | |
| 116 | /* device block control structure */ |
| 117 | struct edac_device_block { |
| 118 | struct edac_device_instance *instance; /* Up Pointer */ |
| 119 | char name[EDAC_DEVICE_NAME_LEN + 1]; |
| 120 | |
| 121 | struct edac_device_counter counters; /* basic UE and CE counters */ |
| 122 | |
| 123 | int nr_attribs; /* how many attributes */ |
| 124 | |
| 125 | /* this block's attributes, could be NULL */ |
| 126 | struct edac_dev_sysfs_block_attribute *block_attributes; |
| 127 | |
| 128 | /* edac sysfs device control */ |
| 129 | struct kobject kobj; |
| 130 | }; |
| 131 | |
| 132 | /* device instance control structure */ |
| 133 | struct edac_device_instance { |
| 134 | struct edac_device_ctl_info *ctl; /* Up pointer */ |
| 135 | char name[EDAC_DEVICE_NAME_LEN + 4]; |
| 136 | |
| 137 | struct edac_device_counter counters; /* instance counters */ |
| 138 | |
| 139 | u32 nr_blocks; /* how many blocks */ |
| 140 | struct edac_device_block *blocks; /* block array */ |
| 141 | |
| 142 | /* edac sysfs device control */ |
| 143 | struct kobject kobj; |
| 144 | }; |
| 145 | |
| 146 | |
| 147 | /* |
| 148 | * Abstract edac_device control info structure |
| 149 | * |
| 150 | */ |
| 151 | struct edac_device_ctl_info { |
| 152 | /* for global list of edac_device_ctl_info structs */ |
| 153 | struct list_head link; |
| 154 | |
| 155 | struct module *owner; /* Module owner of this control struct */ |
| 156 | |
| 157 | int dev_idx; |
| 158 | |
| 159 | /* Per instance controls for this edac_device */ |
| 160 | int log_ue; /* boolean for logging UEs */ |
| 161 | int log_ce; /* boolean for logging CEs */ |
| 162 | int panic_on_ue; /* boolean for panic'ing on an UE */ |
| 163 | unsigned poll_msec; /* number of milliseconds to poll interval */ |
| 164 | unsigned long delay; /* number of jiffies for poll_msec */ |
| 165 | |
| 166 | /* Additional top controller level attributes, but specified |
| 167 | * by the low level driver. |
| 168 | * |
| 169 | * Set by the low level driver to provide attributes at the |
| 170 | * controller level, same level as 'ue_count' and 'ce_count' above. |
| 171 | * An array of structures, NULL terminated |
| 172 | * |
| 173 | * If attributes are desired, then set to array of attributes |
| 174 | * If no attributes are desired, leave NULL |
| 175 | */ |
| 176 | struct edac_dev_sysfs_attribute *sysfs_attributes; |
| 177 | |
| 178 | /* pointer to main 'edac' subsys in sysfs */ |
| 179 | struct bus_type *edac_subsys; |
| 180 | |
| 181 | /* the internal state of this controller instance */ |
| 182 | int op_state; |
| 183 | /* work struct for this instance */ |
| 184 | struct delayed_work work; |
| 185 | |
| 186 | /* pointer to edac polling checking routine: |
| 187 | * If NOT NULL: points to polling check routine |
| 188 | * If NULL: Then assumes INTERRUPT operation, where |
| 189 | * MC driver will receive events |
| 190 | */ |
| 191 | void (*edac_check) (struct edac_device_ctl_info * edac_dev); |
| 192 | |
| 193 | struct device *dev; /* pointer to device structure */ |
| 194 | |
| 195 | const char *mod_name; /* module name */ |
| 196 | const char *ctl_name; /* edac controller name */ |
| 197 | const char *dev_name; /* pci/platform/etc... name */ |
| 198 | |
| 199 | void *pvt_info; /* pointer to 'private driver' info */ |
| 200 | |
| 201 | unsigned long start_time; /* edac_device load start time (jiffies) */ |
| 202 | |
| 203 | struct completion removal_complete; |
| 204 | |
| 205 | /* sysfs top name under 'edac' directory |
| 206 | * and instance name: |
| 207 | * cpu/cpu0/... |
| 208 | * cpu/cpu1/... |
| 209 | * cpu/cpu2/... |
| 210 | * ... |
| 211 | */ |
| 212 | char name[EDAC_DEVICE_NAME_LEN + 1]; |
| 213 | |
| 214 | /* Number of instances supported on this control structure |
| 215 | * and the array of those instances |
| 216 | */ |
| 217 | u32 nr_instances; |
| 218 | struct edac_device_instance *instances; |
| 219 | |
| 220 | /* Event counters for the this whole EDAC Device */ |
| 221 | struct edac_device_counter counters; |
| 222 | |
| 223 | /* edac sysfs device control for the 'name' |
| 224 | * device this structure controls |
| 225 | */ |
| 226 | struct kobject kobj; |
| 227 | }; |
| 228 | |
| 229 | /* To get from the instance's wq to the beginning of the ctl structure */ |
| 230 | #define to_edac_mem_ctl_work(w) \ |
| 231 | container_of(w, struct mem_ctl_info, work) |
| 232 | |
| 233 | #define to_edac_device_ctl_work(w) \ |
| 234 | container_of(w,struct edac_device_ctl_info,work) |
| 235 | |
| 236 | /* |
| 237 | * The alloc() and free() functions for the 'edac_device' control info |
| 238 | * structure. A MC driver will allocate one of these for each edac_device |
| 239 | * it is going to control/register with the EDAC CORE. |
| 240 | */ |
| 241 | extern struct edac_device_ctl_info *edac_device_alloc_ctl_info( |
| 242 | unsigned sizeof_private, |
| 243 | char *edac_device_name, unsigned nr_instances, |
| 244 | char *edac_block_name, unsigned nr_blocks, |
| 245 | unsigned offset_value, |
| 246 | struct edac_dev_sysfs_block_attribute *block_attributes, |
| 247 | unsigned nr_attribs, |
| 248 | int device_index); |
| 249 | |
| 250 | /* The offset value can be: |
| 251 | * -1 indicating no offset value |
| 252 | * 0 for zero-based block numbers |
| 253 | * 1 for 1-based block number |
| 254 | * other for other-based block number |
| 255 | */ |
| 256 | #define BLOCK_OFFSET_VALUE_OFF ((unsigned) -1) |
| 257 | |
| 258 | extern void edac_device_free_ctl_info(struct edac_device_ctl_info *ctl_info); |
| 259 | |
Mauro Carvalho Chehab | 5336f75 | 2016-10-26 16:01:47 -0200 | [diff] [blame] | 260 | /** |
| 261 | * edac_device_add_device: Insert the 'edac_dev' structure into the |
| 262 | * edac_device global list and create sysfs entries associated with |
| 263 | * edac_device structure. |
| 264 | * |
| 265 | * @edac_dev: pointer to edac_device structure to be added to the list |
| 266 | * 'edac_device' structure. |
| 267 | * |
| 268 | * Returns: |
| 269 | * 0 on Success, or an error code on failure |
| 270 | */ |
Mauro Carvalho Chehab | 6d8ef24 | 2016-10-29 10:01:41 -0200 | [diff] [blame] | 271 | extern int edac_device_add_device(struct edac_device_ctl_info *edac_dev); |
Mauro Carvalho Chehab | 5336f75 | 2016-10-26 16:01:47 -0200 | [diff] [blame] | 272 | |
| 273 | /** |
| 274 | * edac_device_del_device: |
| 275 | * Remove sysfs entries for specified edac_device structure and |
| 276 | * then remove edac_device structure from global list |
| 277 | * |
| 278 | * @dev: |
| 279 | * Pointer to struct &device representing the edac device |
| 280 | * structure to remove. |
| 281 | * |
| 282 | * Returns: |
| 283 | * Pointer to removed edac_device structure, |
| 284 | * or %NULL if device not found. |
| 285 | */ |
Mauro Carvalho Chehab | 6d8ef24 | 2016-10-29 10:01:41 -0200 | [diff] [blame] | 286 | extern struct edac_device_ctl_info *edac_device_del_device(struct device *dev); |
Mauro Carvalho Chehab | 5336f75 | 2016-10-26 16:01:47 -0200 | [diff] [blame] | 287 | |
| 288 | /** |
Hanna Hawa | 9816b4a | 2019-09-23 20:17:40 +0100 | [diff] [blame] | 289 | * Log correctable errors. |
Mauro Carvalho Chehab | 5336f75 | 2016-10-26 16:01:47 -0200 | [diff] [blame] | 290 | * |
| 291 | * @edac_dev: pointer to struct &edac_device_ctl_info |
Hanna Hawa | 9816b4a | 2019-09-23 20:17:40 +0100 | [diff] [blame] | 292 | * @inst_nr: number of the instance where the CE error happened |
| 293 | * @count: Number of errors to log. |
| 294 | * @block_nr: number of the block where the CE error happened |
Mauro Carvalho Chehab | 5336f75 | 2016-10-26 16:01:47 -0200 | [diff] [blame] | 295 | * @msg: message to be printed |
| 296 | */ |
Hanna Hawa | 9816b4a | 2019-09-23 20:17:40 +0100 | [diff] [blame] | 297 | void edac_device_handle_ce_count(struct edac_device_ctl_info *edac_dev, |
| 298 | unsigned int count, int inst_nr, int block_nr, |
| 299 | const char *msg); |
| 300 | |
Mauro Carvalho Chehab | 5336f75 | 2016-10-26 16:01:47 -0200 | [diff] [blame] | 301 | /** |
Hanna Hawa | 9816b4a | 2019-09-23 20:17:40 +0100 | [diff] [blame] | 302 | * Log uncorrectable errors. |
| 303 | * |
| 304 | * @edac_dev: pointer to struct &edac_device_ctl_info |
| 305 | * @inst_nr: number of the instance where the CE error happened |
| 306 | * @count: Number of errors to log. |
| 307 | * @block_nr: number of the block where the CE error happened |
| 308 | * @msg: message to be printed |
| 309 | */ |
| 310 | void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev, |
| 311 | unsigned int count, int inst_nr, int block_nr, |
| 312 | const char *msg); |
| 313 | |
| 314 | /** |
| 315 | * edac_device_handle_ce(): Log a single correctable error |
Mauro Carvalho Chehab | 5336f75 | 2016-10-26 16:01:47 -0200 | [diff] [blame] | 316 | * |
| 317 | * @edac_dev: pointer to struct &edac_device_ctl_info |
| 318 | * @inst_nr: number of the instance where the CE error happened |
| 319 | * @block_nr: number of the block where the CE error happened |
| 320 | * @msg: message to be printed |
| 321 | */ |
Hanna Hawa | 9816b4a | 2019-09-23 20:17:40 +0100 | [diff] [blame] | 322 | static inline void |
| 323 | edac_device_handle_ce(struct edac_device_ctl_info *edac_dev, int inst_nr, |
| 324 | int block_nr, const char *msg) |
| 325 | { |
| 326 | edac_device_handle_ce_count(edac_dev, 1, inst_nr, block_nr, msg); |
| 327 | } |
| 328 | |
| 329 | /** |
| 330 | * edac_device_handle_ue(): Log a single uncorrectable error |
| 331 | * |
| 332 | * @edac_dev: pointer to struct &edac_device_ctl_info |
| 333 | * @inst_nr: number of the instance where the UE error happened |
| 334 | * @block_nr: number of the block where the UE error happened |
| 335 | * @msg: message to be printed |
| 336 | */ |
| 337 | static inline void |
| 338 | edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, int inst_nr, |
| 339 | int block_nr, const char *msg) |
| 340 | { |
| 341 | edac_device_handle_ue_count(edac_dev, 1, inst_nr, block_nr, msg); |
| 342 | } |
Mauro Carvalho Chehab | 5336f75 | 2016-10-26 16:01:47 -0200 | [diff] [blame] | 343 | |
| 344 | /** |
| 345 | * edac_device_alloc_index: Allocate a unique device index number |
| 346 | * |
| 347 | * Returns: |
| 348 | * allocated index number |
| 349 | */ |
Mauro Carvalho Chehab | 6d8ef24 | 2016-10-29 10:01:41 -0200 | [diff] [blame] | 350 | extern int edac_device_alloc_index(void); |
| 351 | extern const char *edac_layer_name[]; |
Mauro Carvalho Chehab | 6d8ef24 | 2016-10-29 10:01:41 -0200 | [diff] [blame] | 352 | #endif |