Shravan Kumar Ramani | 82413e5 | 2019-06-25 15:13:59 -0400 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * Bluefield-specific EDAC driver. |
| 4 | * |
| 5 | * Copyright (c) 2019 Mellanox Technologies. |
| 6 | */ |
| 7 | |
| 8 | #include <linux/acpi.h> |
| 9 | #include <linux/arm-smccc.h> |
| 10 | #include <linux/bitfield.h> |
| 11 | #include <linux/edac.h> |
| 12 | #include <linux/io.h> |
| 13 | #include <linux/module.h> |
| 14 | #include <linux/platform_device.h> |
| 15 | |
| 16 | #include "edac_module.h" |
| 17 | |
| 18 | #define DRIVER_NAME "bluefield-edac" |
| 19 | |
| 20 | /* |
| 21 | * Mellanox BlueField EMI (External Memory Interface) register definitions. |
| 22 | */ |
| 23 | |
| 24 | #define MLXBF_ECC_CNT 0x340 |
| 25 | #define MLXBF_ECC_CNT__SERR_CNT GENMASK(15, 0) |
| 26 | #define MLXBF_ECC_CNT__DERR_CNT GENMASK(31, 16) |
| 27 | |
| 28 | #define MLXBF_ECC_ERR 0x348 |
| 29 | #define MLXBF_ECC_ERR__SECC BIT(0) |
| 30 | #define MLXBF_ECC_ERR__DECC BIT(16) |
| 31 | |
| 32 | #define MLXBF_ECC_LATCH_SEL 0x354 |
| 33 | #define MLXBF_ECC_LATCH_SEL__START BIT(24) |
| 34 | |
| 35 | #define MLXBF_ERR_ADDR_0 0x358 |
| 36 | |
| 37 | #define MLXBF_ERR_ADDR_1 0x37c |
| 38 | |
| 39 | #define MLXBF_SYNDROM 0x35c |
| 40 | #define MLXBF_SYNDROM__DERR BIT(0) |
| 41 | #define MLXBF_SYNDROM__SERR BIT(1) |
| 42 | #define MLXBF_SYNDROM__SYN GENMASK(25, 16) |
| 43 | |
| 44 | #define MLXBF_ADD_INFO 0x364 |
| 45 | #define MLXBF_ADD_INFO__ERR_PRANK GENMASK(9, 8) |
| 46 | |
| 47 | #define MLXBF_EDAC_MAX_DIMM_PER_MC 2 |
| 48 | #define MLXBF_EDAC_ERROR_GRAIN 8 |
| 49 | |
| 50 | /* |
| 51 | * Request MLNX_SIP_GET_DIMM_INFO |
| 52 | * |
| 53 | * Retrieve information about DIMM on a certain slot. |
| 54 | * |
| 55 | * Call register usage: |
| 56 | * a0: MLNX_SIP_GET_DIMM_INFO |
| 57 | * a1: (Memory controller index) << 16 | (Dimm index in memory controller) |
| 58 | * a2-7: not used. |
| 59 | * |
| 60 | * Return status: |
| 61 | * a0: MLXBF_DIMM_INFO defined below describing the DIMM. |
| 62 | * a1-3: not used. |
| 63 | */ |
| 64 | #define MLNX_SIP_GET_DIMM_INFO 0x82000008 |
| 65 | |
| 66 | /* Format for the SMC response about the memory information */ |
| 67 | #define MLXBF_DIMM_INFO__SIZE_GB GENMASK_ULL(15, 0) |
| 68 | #define MLXBF_DIMM_INFO__IS_RDIMM BIT(16) |
| 69 | #define MLXBF_DIMM_INFO__IS_LRDIMM BIT(17) |
| 70 | #define MLXBF_DIMM_INFO__IS_NVDIMM BIT(18) |
| 71 | #define MLXBF_DIMM_INFO__RANKS GENMASK_ULL(23, 21) |
| 72 | #define MLXBF_DIMM_INFO__PACKAGE_X GENMASK_ULL(31, 24) |
| 73 | |
| 74 | struct bluefield_edac_priv { |
| 75 | int dimm_ranks[MLXBF_EDAC_MAX_DIMM_PER_MC]; |
| 76 | void __iomem *emi_base; |
| 77 | int dimm_per_mc; |
| 78 | }; |
| 79 | |
| 80 | static u64 smc_call1(u64 smc_op, u64 smc_arg) |
| 81 | { |
| 82 | struct arm_smccc_res res; |
| 83 | |
| 84 | arm_smccc_smc(smc_op, smc_arg, 0, 0, 0, 0, 0, 0, &res); |
| 85 | |
| 86 | return res.a0; |
| 87 | } |
| 88 | |
| 89 | /* |
| 90 | * Gather the ECC information from the External Memory Interface registers |
| 91 | * and report it to the edac handler. |
| 92 | */ |
| 93 | static void bluefield_gather_report_ecc(struct mem_ctl_info *mci, |
| 94 | int error_cnt, |
| 95 | int is_single_ecc) |
| 96 | { |
| 97 | struct bluefield_edac_priv *priv = mci->pvt_info; |
| 98 | u32 dram_additional_info, err_prank, edea0, edea1; |
| 99 | u32 ecc_latch_select, dram_syndrom, serr, derr, syndrom; |
| 100 | enum hw_event_mc_err_type ecc_type; |
| 101 | u64 ecc_dimm_addr; |
| 102 | int ecc_dimm; |
| 103 | |
| 104 | ecc_type = is_single_ecc ? HW_EVENT_ERR_CORRECTED : |
| 105 | HW_EVENT_ERR_UNCORRECTED; |
| 106 | |
| 107 | /* |
| 108 | * Tell the External Memory Interface to populate the relevant |
| 109 | * registers with information about the last ECC error occurrence. |
| 110 | */ |
| 111 | ecc_latch_select = MLXBF_ECC_LATCH_SEL__START; |
| 112 | writel(ecc_latch_select, priv->emi_base + MLXBF_ECC_LATCH_SEL); |
| 113 | |
| 114 | /* |
| 115 | * Verify that the ECC reported info in the registers is of the |
| 116 | * same type as the one asked to report. If not, just report the |
| 117 | * error without the detailed information. |
| 118 | */ |
| 119 | dram_syndrom = readl(priv->emi_base + MLXBF_SYNDROM); |
| 120 | serr = FIELD_GET(MLXBF_SYNDROM__SERR, dram_syndrom); |
| 121 | derr = FIELD_GET(MLXBF_SYNDROM__DERR, dram_syndrom); |
| 122 | syndrom = FIELD_GET(MLXBF_SYNDROM__SYN, dram_syndrom); |
| 123 | |
| 124 | if ((is_single_ecc && !serr) || (!is_single_ecc && !derr)) { |
| 125 | edac_mc_handle_error(ecc_type, mci, error_cnt, 0, 0, 0, |
| 126 | 0, 0, -1, mci->ctl_name, ""); |
| 127 | return; |
| 128 | } |
| 129 | |
| 130 | dram_additional_info = readl(priv->emi_base + MLXBF_ADD_INFO); |
| 131 | err_prank = FIELD_GET(MLXBF_ADD_INFO__ERR_PRANK, dram_additional_info); |
| 132 | |
| 133 | ecc_dimm = (err_prank >= 2 && priv->dimm_ranks[0] <= 2) ? 1 : 0; |
| 134 | |
| 135 | edea0 = readl(priv->emi_base + MLXBF_ERR_ADDR_0); |
| 136 | edea1 = readl(priv->emi_base + MLXBF_ERR_ADDR_1); |
| 137 | |
| 138 | ecc_dimm_addr = ((u64)edea1 << 32) | edea0; |
| 139 | |
| 140 | edac_mc_handle_error(ecc_type, mci, error_cnt, |
| 141 | PFN_DOWN(ecc_dimm_addr), |
| 142 | offset_in_page(ecc_dimm_addr), |
| 143 | syndrom, ecc_dimm, 0, 0, mci->ctl_name, ""); |
| 144 | } |
| 145 | |
| 146 | static void bluefield_edac_check(struct mem_ctl_info *mci) |
| 147 | { |
| 148 | struct bluefield_edac_priv *priv = mci->pvt_info; |
| 149 | u32 ecc_count, single_error_count, double_error_count, ecc_error = 0; |
| 150 | |
| 151 | /* |
| 152 | * The memory controller might not be initialized by the firmware |
| 153 | * when there isn't memory, which may lead to bad register readings. |
| 154 | */ |
| 155 | if (mci->edac_cap == EDAC_FLAG_NONE) |
| 156 | return; |
| 157 | |
| 158 | ecc_count = readl(priv->emi_base + MLXBF_ECC_CNT); |
| 159 | single_error_count = FIELD_GET(MLXBF_ECC_CNT__SERR_CNT, ecc_count); |
| 160 | double_error_count = FIELD_GET(MLXBF_ECC_CNT__DERR_CNT, ecc_count); |
| 161 | |
| 162 | if (single_error_count) { |
| 163 | ecc_error |= MLXBF_ECC_ERR__SECC; |
| 164 | |
| 165 | bluefield_gather_report_ecc(mci, single_error_count, 1); |
| 166 | } |
| 167 | |
| 168 | if (double_error_count) { |
| 169 | ecc_error |= MLXBF_ECC_ERR__DECC; |
| 170 | |
| 171 | bluefield_gather_report_ecc(mci, double_error_count, 0); |
| 172 | } |
| 173 | |
| 174 | /* Write to clear reported errors. */ |
| 175 | if (ecc_count) |
| 176 | writel(ecc_error, priv->emi_base + MLXBF_ECC_ERR); |
| 177 | } |
| 178 | |
| 179 | /* Initialize the DIMMs information for the given memory controller. */ |
| 180 | static void bluefield_edac_init_dimms(struct mem_ctl_info *mci) |
| 181 | { |
| 182 | struct bluefield_edac_priv *priv = mci->pvt_info; |
| 183 | int mem_ctrl_idx = mci->mc_idx; |
| 184 | struct dimm_info *dimm; |
| 185 | u64 smc_info, smc_arg; |
| 186 | int is_empty = 1, i; |
| 187 | |
| 188 | for (i = 0; i < priv->dimm_per_mc; i++) { |
| 189 | dimm = mci->dimms[i]; |
| 190 | |
| 191 | smc_arg = mem_ctrl_idx << 16 | i; |
| 192 | smc_info = smc_call1(MLNX_SIP_GET_DIMM_INFO, smc_arg); |
| 193 | |
| 194 | if (!FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info)) { |
| 195 | dimm->mtype = MEM_EMPTY; |
| 196 | continue; |
| 197 | } |
| 198 | |
| 199 | is_empty = 0; |
| 200 | |
| 201 | dimm->edac_mode = EDAC_SECDED; |
| 202 | |
| 203 | if (FIELD_GET(MLXBF_DIMM_INFO__IS_NVDIMM, smc_info)) |
| 204 | dimm->mtype = MEM_NVDIMM; |
| 205 | else if (FIELD_GET(MLXBF_DIMM_INFO__IS_LRDIMM, smc_info)) |
| 206 | dimm->mtype = MEM_LRDDR4; |
| 207 | else if (FIELD_GET(MLXBF_DIMM_INFO__IS_RDIMM, smc_info)) |
| 208 | dimm->mtype = MEM_RDDR4; |
| 209 | else |
| 210 | dimm->mtype = MEM_DDR4; |
| 211 | |
| 212 | dimm->nr_pages = |
| 213 | FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info) * |
| 214 | (SZ_1G / PAGE_SIZE); |
| 215 | dimm->grain = MLXBF_EDAC_ERROR_GRAIN; |
| 216 | |
| 217 | /* Mem controller for BlueField only supports x4, x8 and x16 */ |
| 218 | switch (FIELD_GET(MLXBF_DIMM_INFO__PACKAGE_X, smc_info)) { |
| 219 | case 4: |
| 220 | dimm->dtype = DEV_X4; |
| 221 | break; |
| 222 | case 8: |
| 223 | dimm->dtype = DEV_X8; |
| 224 | break; |
| 225 | case 16: |
| 226 | dimm->dtype = DEV_X16; |
| 227 | break; |
| 228 | default: |
| 229 | dimm->dtype = DEV_UNKNOWN; |
| 230 | } |
| 231 | |
| 232 | priv->dimm_ranks[i] = |
| 233 | FIELD_GET(MLXBF_DIMM_INFO__RANKS, smc_info); |
| 234 | } |
| 235 | |
| 236 | if (is_empty) |
| 237 | mci->edac_cap = EDAC_FLAG_NONE; |
| 238 | else |
| 239 | mci->edac_cap = EDAC_FLAG_SECDED; |
| 240 | } |
| 241 | |
| 242 | static int bluefield_edac_mc_probe(struct platform_device *pdev) |
| 243 | { |
| 244 | struct bluefield_edac_priv *priv; |
| 245 | struct device *dev = &pdev->dev; |
| 246 | struct edac_mc_layer layers[1]; |
| 247 | struct mem_ctl_info *mci; |
| 248 | struct resource *emi_res; |
| 249 | unsigned int mc_idx, dimm_count; |
| 250 | int rc, ret; |
| 251 | |
| 252 | /* Read the MSS (Memory SubSystem) index from ACPI table. */ |
| 253 | if (device_property_read_u32(dev, "mss_number", &mc_idx)) { |
| 254 | dev_warn(dev, "bf_edac: MSS number unknown\n"); |
| 255 | return -EINVAL; |
| 256 | } |
| 257 | |
| 258 | /* Read the DIMMs per MC from ACPI table. */ |
| 259 | if (device_property_read_u32(dev, "dimm_per_mc", &dimm_count)) { |
| 260 | dev_warn(dev, "bf_edac: DIMMs per MC unknown\n"); |
| 261 | return -EINVAL; |
| 262 | } |
| 263 | |
| 264 | if (dimm_count > MLXBF_EDAC_MAX_DIMM_PER_MC) { |
| 265 | dev_warn(dev, "bf_edac: DIMMs per MC not valid\n"); |
| 266 | return -EINVAL; |
| 267 | } |
| 268 | |
| 269 | emi_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); |
| 270 | if (!emi_res) |
| 271 | return -EINVAL; |
| 272 | |
| 273 | layers[0].type = EDAC_MC_LAYER_SLOT; |
| 274 | layers[0].size = dimm_count; |
| 275 | layers[0].is_virt_csrow = true; |
| 276 | |
| 277 | mci = edac_mc_alloc(mc_idx, ARRAY_SIZE(layers), layers, sizeof(*priv)); |
| 278 | if (!mci) |
| 279 | return -ENOMEM; |
| 280 | |
| 281 | priv = mci->pvt_info; |
| 282 | |
| 283 | priv->dimm_per_mc = dimm_count; |
| 284 | priv->emi_base = devm_ioremap_resource(dev, emi_res); |
| 285 | if (IS_ERR(priv->emi_base)) { |
| 286 | dev_err(dev, "failed to map EMI IO resource\n"); |
| 287 | ret = PTR_ERR(priv->emi_base); |
| 288 | goto err; |
| 289 | } |
| 290 | |
| 291 | mci->pdev = dev; |
| 292 | mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_RDDR4 | |
| 293 | MEM_FLAG_LRDDR4 | MEM_FLAG_NVDIMM; |
| 294 | mci->edac_ctl_cap = EDAC_FLAG_SECDED; |
| 295 | |
| 296 | mci->mod_name = DRIVER_NAME; |
| 297 | mci->ctl_name = "BlueField_Memory_Controller"; |
| 298 | mci->dev_name = dev_name(dev); |
| 299 | mci->edac_check = bluefield_edac_check; |
| 300 | |
| 301 | /* Initialize mci with the actual populated DIMM information. */ |
| 302 | bluefield_edac_init_dimms(mci); |
| 303 | |
| 304 | platform_set_drvdata(pdev, mci); |
| 305 | |
| 306 | /* Register with EDAC core */ |
| 307 | rc = edac_mc_add_mc(mci); |
| 308 | if (rc) { |
| 309 | dev_err(dev, "failed to register with EDAC core\n"); |
| 310 | ret = rc; |
| 311 | goto err; |
| 312 | } |
| 313 | |
| 314 | /* Only POLL mode supported so far. */ |
| 315 | edac_op_state = EDAC_OPSTATE_POLL; |
| 316 | |
| 317 | return 0; |
| 318 | |
| 319 | err: |
| 320 | edac_mc_free(mci); |
| 321 | |
| 322 | return ret; |
| 323 | |
| 324 | } |
| 325 | |
| 326 | static int bluefield_edac_mc_remove(struct platform_device *pdev) |
| 327 | { |
| 328 | struct mem_ctl_info *mci = platform_get_drvdata(pdev); |
| 329 | |
| 330 | edac_mc_del_mc(&pdev->dev); |
| 331 | edac_mc_free(mci); |
| 332 | |
| 333 | return 0; |
| 334 | } |
| 335 | |
| 336 | static const struct acpi_device_id bluefield_mc_acpi_ids[] = { |
| 337 | {"MLNXBF08", 0}, |
| 338 | {} |
| 339 | }; |
| 340 | |
| 341 | MODULE_DEVICE_TABLE(acpi, bluefield_mc_acpi_ids); |
| 342 | |
| 343 | static struct platform_driver bluefield_edac_mc_driver = { |
| 344 | .driver = { |
| 345 | .name = DRIVER_NAME, |
| 346 | .acpi_match_table = bluefield_mc_acpi_ids, |
| 347 | }, |
| 348 | .probe = bluefield_edac_mc_probe, |
| 349 | .remove = bluefield_edac_mc_remove, |
| 350 | }; |
| 351 | |
| 352 | module_platform_driver(bluefield_edac_mc_driver); |
| 353 | |
| 354 | MODULE_DESCRIPTION("Mellanox BlueField memory edac driver"); |
| 355 | MODULE_AUTHOR("Mellanox Technologies"); |
| 356 | MODULE_LICENSE("GPL v2"); |