|
| 1 | +// SPDX-License-Identifier: GPL-2.0 |
| 2 | +/* |
| 3 | + * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. |
| 4 | + */ |
| 5 | +#include <linux/bitfield.h> |
| 6 | +#include <linux/bitops.h> |
| 7 | +#include <linux/edac.h> |
| 8 | +#include <linux/of_irq.h> |
| 9 | +#include <linux/platform_device.h> |
| 10 | +#include <linux/spinlock.h> |
| 11 | +#include "edac_module.h" |
| 12 | + |
| 13 | +/* Registers Offset */ |
| 14 | +#define AL_MC_ECC_CFG 0x70 |
| 15 | +#define AL_MC_ECC_CLEAR 0x7c |
| 16 | +#define AL_MC_ECC_ERR_COUNT 0x80 |
| 17 | +#define AL_MC_ECC_CE_ADDR0 0x84 |
| 18 | +#define AL_MC_ECC_CE_ADDR1 0x88 |
| 19 | +#define AL_MC_ECC_UE_ADDR0 0xa4 |
| 20 | +#define AL_MC_ECC_UE_ADDR1 0xa8 |
| 21 | +#define AL_MC_ECC_CE_SYND0 0x8c |
| 22 | +#define AL_MC_ECC_CE_SYND1 0x90 |
| 23 | +#define AL_MC_ECC_CE_SYND2 0x94 |
| 24 | +#define AL_MC_ECC_UE_SYND0 0xac |
| 25 | +#define AL_MC_ECC_UE_SYND1 0xb0 |
| 26 | +#define AL_MC_ECC_UE_SYND2 0xb4 |
| 27 | + |
| 28 | +/* Registers Fields */ |
| 29 | +#define AL_MC_ECC_CFG_SCRUB_DISABLED BIT(4) |
| 30 | + |
| 31 | +#define AL_MC_ECC_CLEAR_UE_COUNT BIT(3) |
| 32 | +#define AL_MC_ECC_CLEAR_CE_COUNT BIT(2) |
| 33 | +#define AL_MC_ECC_CLEAR_UE_ERR BIT(1) |
| 34 | +#define AL_MC_ECC_CLEAR_CE_ERR BIT(0) |
| 35 | + |
| 36 | +#define AL_MC_ECC_ERR_COUNT_UE GENMASK(31, 16) |
| 37 | +#define AL_MC_ECC_ERR_COUNT_CE GENMASK(15, 0) |
| 38 | + |
| 39 | +#define AL_MC_ECC_CE_ADDR0_RANK GENMASK(25, 24) |
| 40 | +#define AL_MC_ECC_CE_ADDR0_ROW GENMASK(17, 0) |
| 41 | + |
| 42 | +#define AL_MC_ECC_CE_ADDR1_BG GENMASK(25, 24) |
| 43 | +#define AL_MC_ECC_CE_ADDR1_BANK GENMASK(18, 16) |
| 44 | +#define AL_MC_ECC_CE_ADDR1_COLUMN GENMASK(11, 0) |
| 45 | + |
| 46 | +#define AL_MC_ECC_UE_ADDR0_RANK GENMASK(25, 24) |
| 47 | +#define AL_MC_ECC_UE_ADDR0_ROW GENMASK(17, 0) |
| 48 | + |
| 49 | +#define AL_MC_ECC_UE_ADDR1_BG GENMASK(25, 24) |
| 50 | +#define AL_MC_ECC_UE_ADDR1_BANK GENMASK(18, 16) |
| 51 | +#define AL_MC_ECC_UE_ADDR1_COLUMN GENMASK(11, 0) |
| 52 | + |
| 53 | +#define DRV_NAME "al_mc_edac" |
| 54 | +#define AL_MC_EDAC_MSG_MAX 256 |
| 55 | + |
| 56 | +struct al_mc_edac { |
| 57 | + void __iomem *mmio_base; |
| 58 | + spinlock_t lock; |
| 59 | + int irq_ce; |
| 60 | + int irq_ue; |
| 61 | +}; |
| 62 | + |
| 63 | +static void prepare_msg(char *message, size_t buffer_size, |
| 64 | + enum hw_event_mc_err_type type, |
| 65 | + u8 rank, u32 row, u8 bg, u8 bank, u16 column, |
| 66 | + u32 syn0, u32 syn1, u32 syn2) |
| 67 | +{ |
| 68 | + snprintf(message, buffer_size, |
| 69 | + "%s rank=0x%x row=0x%x bg=0x%x bank=0x%x col=0x%x syn0: 0x%x syn1: 0x%x syn2: 0x%x", |
| 70 | + type == HW_EVENT_ERR_UNCORRECTED ? "UE" : "CE", |
| 71 | + rank, row, bg, bank, column, syn0, syn1, syn2); |
| 72 | +} |
| 73 | + |
| 74 | +static int handle_ce(struct mem_ctl_info *mci) |
| 75 | +{ |
| 76 | + u32 eccerrcnt, ecccaddr0, ecccaddr1, ecccsyn0, ecccsyn1, ecccsyn2, row; |
| 77 | + struct al_mc_edac *al_mc = mci->pvt_info; |
| 78 | + char msg[AL_MC_EDAC_MSG_MAX]; |
| 79 | + u16 ce_count, column; |
| 80 | + unsigned long flags; |
| 81 | + u8 rank, bg, bank; |
| 82 | + |
| 83 | + eccerrcnt = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_ERR_COUNT); |
| 84 | + ce_count = FIELD_GET(AL_MC_ECC_ERR_COUNT_CE, eccerrcnt); |
| 85 | + if (!ce_count) |
| 86 | + return 0; |
| 87 | + |
| 88 | + ecccaddr0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_ADDR0); |
| 89 | + ecccaddr1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_ADDR1); |
| 90 | + ecccsyn0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND0); |
| 91 | + ecccsyn1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND1); |
| 92 | + ecccsyn2 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND2); |
| 93 | + |
| 94 | + writel_relaxed(AL_MC_ECC_CLEAR_CE_COUNT | AL_MC_ECC_CLEAR_CE_ERR, |
| 95 | + al_mc->mmio_base + AL_MC_ECC_CLEAR); |
| 96 | + |
| 97 | + dev_dbg(mci->pdev, "eccuaddr0=0x%08x eccuaddr1=0x%08x\n", |
| 98 | + ecccaddr0, ecccaddr1); |
| 99 | + |
| 100 | + rank = FIELD_GET(AL_MC_ECC_CE_ADDR0_RANK, ecccaddr0); |
| 101 | + row = FIELD_GET(AL_MC_ECC_CE_ADDR0_ROW, ecccaddr0); |
| 102 | + |
| 103 | + bg = FIELD_GET(AL_MC_ECC_CE_ADDR1_BG, ecccaddr1); |
| 104 | + bank = FIELD_GET(AL_MC_ECC_CE_ADDR1_BANK, ecccaddr1); |
| 105 | + column = FIELD_GET(AL_MC_ECC_CE_ADDR1_COLUMN, ecccaddr1); |
| 106 | + |
| 107 | + prepare_msg(msg, sizeof(msg), HW_EVENT_ERR_CORRECTED, |
| 108 | + rank, row, bg, bank, column, |
| 109 | + ecccsyn0, ecccsyn1, ecccsyn2); |
| 110 | + |
| 111 | + spin_lock_irqsave(&al_mc->lock, flags); |
| 112 | + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, |
| 113 | + ce_count, 0, 0, 0, 0, 0, -1, mci->ctl_name, msg); |
| 114 | + spin_unlock_irqrestore(&al_mc->lock, flags); |
| 115 | + |
| 116 | + return ce_count; |
| 117 | +} |
| 118 | + |
| 119 | +static int handle_ue(struct mem_ctl_info *mci) |
| 120 | +{ |
| 121 | + u32 eccerrcnt, eccuaddr0, eccuaddr1, eccusyn0, eccusyn1, eccusyn2, row; |
| 122 | + struct al_mc_edac *al_mc = mci->pvt_info; |
| 123 | + char msg[AL_MC_EDAC_MSG_MAX]; |
| 124 | + u16 ue_count, column; |
| 125 | + unsigned long flags; |
| 126 | + u8 rank, bg, bank; |
| 127 | + |
| 128 | + eccerrcnt = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_ERR_COUNT); |
| 129 | + ue_count = FIELD_GET(AL_MC_ECC_ERR_COUNT_UE, eccerrcnt); |
| 130 | + if (!ue_count) |
| 131 | + return 0; |
| 132 | + |
| 133 | + eccuaddr0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_ADDR0); |
| 134 | + eccuaddr1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_ADDR1); |
| 135 | + eccusyn0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND0); |
| 136 | + eccusyn1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND1); |
| 137 | + eccusyn2 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND2); |
| 138 | + |
| 139 | + writel_relaxed(AL_MC_ECC_CLEAR_UE_COUNT | AL_MC_ECC_CLEAR_UE_ERR, |
| 140 | + al_mc->mmio_base + AL_MC_ECC_CLEAR); |
| 141 | + |
| 142 | + dev_dbg(mci->pdev, "eccuaddr0=0x%08x eccuaddr1=0x%08x\n", |
| 143 | + eccuaddr0, eccuaddr1); |
| 144 | + |
| 145 | + rank = FIELD_GET(AL_MC_ECC_UE_ADDR0_RANK, eccuaddr0); |
| 146 | + row = FIELD_GET(AL_MC_ECC_UE_ADDR0_ROW, eccuaddr0); |
| 147 | + |
| 148 | + bg = FIELD_GET(AL_MC_ECC_UE_ADDR1_BG, eccuaddr1); |
| 149 | + bank = FIELD_GET(AL_MC_ECC_UE_ADDR1_BANK, eccuaddr1); |
| 150 | + column = FIELD_GET(AL_MC_ECC_UE_ADDR1_COLUMN, eccuaddr1); |
| 151 | + |
| 152 | + prepare_msg(msg, sizeof(msg), HW_EVENT_ERR_UNCORRECTED, |
| 153 | + rank, row, bg, bank, column, |
| 154 | + eccusyn0, eccusyn1, eccusyn2); |
| 155 | + |
| 156 | + spin_lock_irqsave(&al_mc->lock, flags); |
| 157 | + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, |
| 158 | + ue_count, 0, 0, 0, 0, 0, -1, mci->ctl_name, msg); |
| 159 | + spin_unlock_irqrestore(&al_mc->lock, flags); |
| 160 | + |
| 161 | + return ue_count; |
| 162 | +} |
| 163 | + |
| 164 | +static void al_mc_edac_check(struct mem_ctl_info *mci) |
| 165 | +{ |
| 166 | + struct al_mc_edac *al_mc = mci->pvt_info; |
| 167 | + |
| 168 | + if (al_mc->irq_ue <= 0) |
| 169 | + handle_ue(mci); |
| 170 | + |
| 171 | + if (al_mc->irq_ce <= 0) |
| 172 | + handle_ce(mci); |
| 173 | +} |
| 174 | + |
| 175 | +static irqreturn_t al_mc_edac_irq_handler_ue(int irq, void *info) |
| 176 | +{ |
| 177 | + struct platform_device *pdev = info; |
| 178 | + struct mem_ctl_info *mci = platform_get_drvdata(pdev); |
| 179 | + |
| 180 | + if (handle_ue(mci)) |
| 181 | + return IRQ_HANDLED; |
| 182 | + return IRQ_NONE; |
| 183 | +} |
| 184 | + |
| 185 | +static irqreturn_t al_mc_edac_irq_handler_ce(int irq, void *info) |
| 186 | +{ |
| 187 | + struct platform_device *pdev = info; |
| 188 | + struct mem_ctl_info *mci = platform_get_drvdata(pdev); |
| 189 | + |
| 190 | + if (handle_ce(mci)) |
| 191 | + return IRQ_HANDLED; |
| 192 | + return IRQ_NONE; |
| 193 | +} |
| 194 | + |
| 195 | +static enum scrub_type get_scrub_mode(void __iomem *mmio_base) |
| 196 | +{ |
| 197 | + u32 ecccfg0; |
| 198 | + |
| 199 | + ecccfg0 = readl(mmio_base + AL_MC_ECC_CFG); |
| 200 | + |
| 201 | + if (FIELD_GET(AL_MC_ECC_CFG_SCRUB_DISABLED, ecccfg0)) |
| 202 | + return SCRUB_NONE; |
| 203 | + else |
| 204 | + return SCRUB_HW_SRC; |
| 205 | +} |
| 206 | + |
| 207 | +static void devm_al_mc_edac_free(void *data) |
| 208 | +{ |
| 209 | + edac_mc_free(data); |
| 210 | +} |
| 211 | + |
| 212 | +static void devm_al_mc_edac_del(void *data) |
| 213 | +{ |
| 214 | + edac_mc_del_mc(data); |
| 215 | +} |
| 216 | + |
| 217 | +static int al_mc_edac_probe(struct platform_device *pdev) |
| 218 | +{ |
| 219 | + struct edac_mc_layer layers[1]; |
| 220 | + struct mem_ctl_info *mci; |
| 221 | + struct al_mc_edac *al_mc; |
| 222 | + void __iomem *mmio_base; |
| 223 | + struct dimm_info *dimm; |
| 224 | + int ret; |
| 225 | + |
| 226 | + mmio_base = devm_platform_ioremap_resource(pdev, 0); |
| 227 | + if (IS_ERR(mmio_base)) { |
| 228 | + dev_err(&pdev->dev, "failed to ioremap memory (%ld)\n", |
| 229 | + PTR_ERR(mmio_base)); |
| 230 | + return PTR_ERR(mmio_base); |
| 231 | + } |
| 232 | + |
| 233 | + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; |
| 234 | + layers[0].size = 1; |
| 235 | + layers[0].is_virt_csrow = false; |
| 236 | + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, |
| 237 | + sizeof(struct al_mc_edac)); |
| 238 | + if (!mci) |
| 239 | + return -ENOMEM; |
| 240 | + |
| 241 | + ret = devm_add_action(&pdev->dev, devm_al_mc_edac_free, mci); |
| 242 | + if (ret) { |
| 243 | + edac_mc_free(mci); |
| 244 | + return ret; |
| 245 | + } |
| 246 | + |
| 247 | + platform_set_drvdata(pdev, mci); |
| 248 | + al_mc = mci->pvt_info; |
| 249 | + |
| 250 | + al_mc->mmio_base = mmio_base; |
| 251 | + |
| 252 | + al_mc->irq_ue = of_irq_get_byname(pdev->dev.of_node, "ue"); |
| 253 | + if (al_mc->irq_ue <= 0) |
| 254 | + dev_dbg(&pdev->dev, |
| 255 | + "no IRQ defined for UE - falling back to polling\n"); |
| 256 | + |
| 257 | + al_mc->irq_ce = of_irq_get_byname(pdev->dev.of_node, "ce"); |
| 258 | + if (al_mc->irq_ce <= 0) |
| 259 | + dev_dbg(&pdev->dev, |
| 260 | + "no IRQ defined for CE - falling back to polling\n"); |
| 261 | + |
| 262 | + /* |
| 263 | + * In case both interrupts (ue/ce) are to be found, use interrupt mode. |
| 264 | + * In case none of the interrupt are foud, use polling mode. |
| 265 | + * In case only one interrupt is found, use interrupt mode for it but |
| 266 | + * keep polling mode enable for the other. |
| 267 | + */ |
| 268 | + if (al_mc->irq_ue <= 0 || al_mc->irq_ce <= 0) { |
| 269 | + edac_op_state = EDAC_OPSTATE_POLL; |
| 270 | + mci->edac_check = al_mc_edac_check; |
| 271 | + } else { |
| 272 | + edac_op_state = EDAC_OPSTATE_INT; |
| 273 | + } |
| 274 | + |
| 275 | + spin_lock_init(&al_mc->lock); |
| 276 | + |
| 277 | + mci->mtype_cap = MEM_FLAG_DDR3 | MEM_FLAG_DDR4; |
| 278 | + mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; |
| 279 | + mci->edac_cap = EDAC_FLAG_SECDED; |
| 280 | + mci->mod_name = DRV_NAME; |
| 281 | + mci->ctl_name = "al_mc"; |
| 282 | + mci->pdev = &pdev->dev; |
| 283 | + mci->scrub_mode = get_scrub_mode(mmio_base); |
| 284 | + |
| 285 | + dimm = *mci->dimms; |
| 286 | + dimm->grain = 1; |
| 287 | + |
| 288 | + ret = edac_mc_add_mc(mci); |
| 289 | + if (ret < 0) { |
| 290 | + dev_err(&pdev->dev, |
| 291 | + "fail to add memory controller device (%d)\n", |
| 292 | + ret); |
| 293 | + return ret; |
| 294 | + } |
| 295 | + |
| 296 | + ret = devm_add_action(&pdev->dev, devm_al_mc_edac_del, &pdev->dev); |
| 297 | + if (ret) { |
| 298 | + edac_mc_del_mc(&pdev->dev); |
| 299 | + return ret; |
| 300 | + } |
| 301 | + |
| 302 | + if (al_mc->irq_ue > 0) { |
| 303 | + ret = devm_request_irq(&pdev->dev, |
| 304 | + al_mc->irq_ue, |
| 305 | + al_mc_edac_irq_handler_ue, |
| 306 | + IRQF_SHARED, |
| 307 | + pdev->name, |
| 308 | + pdev); |
| 309 | + if (ret != 0) { |
| 310 | + dev_err(&pdev->dev, |
| 311 | + "failed to request UE IRQ %d (%d)\n", |
| 312 | + al_mc->irq_ue, ret); |
| 313 | + return ret; |
| 314 | + } |
| 315 | + } |
| 316 | + |
| 317 | + if (al_mc->irq_ce > 0) { |
| 318 | + ret = devm_request_irq(&pdev->dev, |
| 319 | + al_mc->irq_ce, |
| 320 | + al_mc_edac_irq_handler_ce, |
| 321 | + IRQF_SHARED, |
| 322 | + pdev->name, |
| 323 | + pdev); |
| 324 | + if (ret != 0) { |
| 325 | + dev_err(&pdev->dev, |
| 326 | + "failed to request CE IRQ %d (%d)\n", |
| 327 | + al_mc->irq_ce, ret); |
| 328 | + return ret; |
| 329 | + } |
| 330 | + } |
| 331 | + |
| 332 | + return 0; |
| 333 | +} |
| 334 | + |
| 335 | +static const struct of_device_id al_mc_edac_of_match[] = { |
| 336 | + { .compatible = "amazon,al-mc-edac", }, |
| 337 | + {}, |
| 338 | +}; |
| 339 | + |
| 340 | +MODULE_DEVICE_TABLE(of, al_mc_edac_of_match); |
| 341 | + |
| 342 | +static struct platform_driver al_mc_edac_driver = { |
| 343 | + .probe = al_mc_edac_probe, |
| 344 | + .driver = { |
| 345 | + .name = DRV_NAME, |
| 346 | + .of_match_table = al_mc_edac_of_match, |
| 347 | + }, |
| 348 | +}; |
| 349 | + |
| 350 | +module_platform_driver(al_mc_edac_driver); |
| 351 | + |
| 352 | +MODULE_LICENSE("GPL v2"); |
| 353 | +MODULE_AUTHOR("Talel Shenhar"); |
| 354 | +MODULE_DESCRIPTION("Amazon's Annapurna Lab's Memory Controller EDAC Driver"); |
0 commit comments