@@ -441,28 +441,35 @@ static void ghes_kick_task_work(struct callback_head *head)
gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, node_len);
}
-static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
- int sev)
+static bool ghes_do_memory_failure(u64 physical_addr, int flags)
{
unsigned long pfn;
- int flags = -1;
- int sec_sev = ghes_severity(gdata->error_severity);
- struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
if (!IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE))
return false;
- if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
- return false;
-
- pfn = mem_err->physical_addr >> PAGE_SHIFT;
+ pfn = PHYS_PFN(physical_addr);
if (!pfn_valid(pfn)) {
pr_warn_ratelimited(FW_WARN GHES_PFX
"Invalid address in generic error data: %#llx\n",
- mem_err->physical_addr);
+ physical_addr);
return false;
}
+ memory_failure_queue(pfn, flags);
+ return true;
+}
+
+static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
+ int sev)
+{
+ int flags = -1;
+ int sec_sev = ghes_severity(gdata->error_severity);
+ struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
+
+ if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
+ return false;
+
/* iff following two events can be handled properly by now */
if (sec_sev == GHES_SEV_CORRECTED &&
(gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED))
@@ -470,14 +477,56 @@ static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE)
flags = 0;
- if (flags != -1) {
- memory_failure_queue(pfn, flags);
- return true;
- }
+ if (flags != -1)
+ return ghes_do_memory_failure(mem_err->physical_addr, flags);
return false;
}
+static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int sev)
+{
+ struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
+ bool queued = false;
+ int sec_sev, i;
+ char *p;
+
+ log_arm_hw_error(err);
+
+ sec_sev = ghes_severity(gdata->error_severity);
+ if (sev != GHES_SEV_RECOVERABLE || sec_sev != GHES_SEV_RECOVERABLE)
+ return false;
+
+ p = (char *)(err + 1);
+ for (i = 0; i < err->err_info_num; i++) {
+ struct cper_arm_err_info *err_info = (struct cper_arm_err_info *)p;
+ bool is_cache = (err_info->type == CPER_ARM_CACHE_ERROR);
+ bool has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR);
+ const char *error_type = "unknown error";
+
+ /*
+ * The field (err_info->error_info & BIT(26)) is fixed to set to
+ * 1 in some old firmware of HiSilicon Kunpeng920. We assume that
+ * firmware won't mix corrected errors in an uncorrected section,
+ * and don't filter out 'corrected' error here.
+ */
+ if (is_cache && has_pa) {
+ queued = ghes_do_memory_failure(err_info->physical_fault_addr, 0);
+ p += err_info->length;
+ continue;
+ }
+
+ if (err_info->type < ARRAY_SIZE(cper_proc_error_type_strs))
+ error_type = cper_proc_error_type_strs[err_info->type];
+
+ pr_warn_ratelimited(FW_WARN GHES_PFX
+ "Unhandled processor error type: %s\n",
+ error_type);
+ p += err_info->length;
+ }
+
+ return queued;
+}
+
/*
* PCIe AER errors need to be sent to the AER driver for reporting and
* recovery. The GHES severities map to the following AER severities and
@@ -605,9 +654,7 @@ static bool ghes_do_proc(struct ghes *ghes,
ghes_handle_aer(gdata);
}
else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
- struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
-
- log_arm_hw_error(err);
+ queued = ghes_handle_arm_hw_error(gdata, sev);
} else {
void *err = acpi_hest_get_payload(gdata);