Message ID | eed2c4a4fbbb71226ca1944bc7e319bfa9f8aec0.1719471257.git.mchehab+huawei@kernel.org |
---|---|
State | Superseded |
Headers | show |
Series | Add other fields to ARM trace event | expand |
Hi Mauro, kernel test robot noticed the following build errors: [auto build test ERROR on rafael-pm/linux-next] [also build test ERROR on rafael-pm/bleeding-edge linus/master v6.10-rc5 next-20240627] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Mauro-Carvalho-Chehab/RAS-ACPI-APEI-add-conditional-compilation-to-ARM-error-report-functions/20240627-225843 base: https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git linux-next patch link: https://lore.kernel.org/r/eed2c4a4fbbb71226ca1944bc7e319bfa9f8aec0.1719471257.git.mchehab%2Bhuawei%40kernel.org patch subject: [PATCH 2/2] RAS: Report all ARM processor CPER information to userspace config: arm64-randconfig-002-20240628 (https://download.01.org/0day-ci/archive/20240628/202406281339.b9yJADtu-lkp@intel.com/config) compiler: aarch64-linux-gcc (GCC) 13.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240628/202406281339.b9yJADtu-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202406281339.b9yJADtu-lkp@intel.com/ All errors (new ones prefixed by >>): drivers/ras/ras.c: In function 'log_arm_hw_error': >> drivers/ras/ras.c:73:17: error: assignment to 'u8 *' {aka 'unsigned char *'} from incompatible pointer type 'struct cper_arm_ctx_info *' [-Werror=incompatible-pointer-types] 73 | ctx_err = ctx_info; | ^ cc1: some warnings being treated as errors vim +73 drivers/ras/ras.c 54 55 void log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev) 56 { 57 #if defined(CONFIG_ARM) || defined(CONFIG_ARM64) 58 struct cper_arm_err_info *err_info; 59 struct cper_arm_ctx_info *ctx_info; 60 u8 *ven_err_data; 61 u32 ctx_len = 0; 62 int n, sz, cpu; 63 s32 vsei_len; 64 u32 pei_len; 65 u8 *pei_err; 66 u8 *ctx_err; 67 68 pei_len = sizeof(struct cper_arm_err_info) * err->err_info_num; 69 pei_err = (u8 *)err + sizeof(struct cper_sec_proc_arm); 70 71 err_info = (struct cper_arm_err_info *)(err + 1); 72 ctx_info = (struct cper_arm_ctx_info *)(err_info + err->err_info_num); > 73 ctx_err = ctx_info; 74 for (n = 0; n < err->context_info_num; n++) { 75 sz = sizeof(struct cper_arm_ctx_info) + ctx_info->size; 76 ctx_info = (struct cper_arm_ctx_info *)((long)ctx_info + sz); 77 ctx_len += sz; 78 } 79 80 vsei_len = err->section_length - (sizeof(struct cper_sec_proc_arm) + 81 pei_len + ctx_len); 82 if (vsei_len < 0) { 83 pr_warn(FW_BUG 84 "section length: %d\n", err->section_length); 85 pr_warn(FW_BUG 86 "section length is too small\n"); 87 pr_warn(FW_BUG 88 "firmware-generated error record is incorrect\n"); 89 vsei_len = 0; 90 } 91 ven_err_data = (u8 *)ctx_info; 92 93 cpu = GET_LOGICAL_INDEX(err->mpidr); 94 /* when return value is invalid, set cpu index to -1 */ 95 if (cpu < 0) 96 cpu = -1; 97 98 trace_arm_event(err, pei_err, pei_len, ctx_err, ctx_len, 99 ven_err_data, (u32)vsei_len, sev, cpu); 100 #endif 101 } 102
Hi Mauro, kernel test robot noticed the following build errors: [auto build test ERROR on rafael-pm/linux-next] [also build test ERROR on rafael-pm/bleeding-edge linus/master v6.10-rc5 next-20240627] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Mauro-Carvalho-Chehab/RAS-ACPI-APEI-add-conditional-compilation-to-ARM-error-report-functions/20240627-225843 base: https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git linux-next patch link: https://lore.kernel.org/r/eed2c4a4fbbb71226ca1944bc7e319bfa9f8aec0.1719471257.git.mchehab%2Bhuawei%40kernel.org patch subject: [PATCH 2/2] RAS: Report all ARM processor CPER information to userspace config: arm64-randconfig-003-20240628 (https://download.01.org/0day-ci/archive/20240628/202406281751.Wuf4JcIZ-lkp@intel.com/config) compiler: clang version 19.0.0git (https://github.com/llvm/llvm-project 326ba38a991250a8587a399a260b0f7af2c9166a) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240628/202406281751.Wuf4JcIZ-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202406281751.Wuf4JcIZ-lkp@intel.com/ All errors (new ones prefixed by >>): In file included from drivers/ras/ras.c:46: In file included from include/ras/ras_event.h:12: In file included from include/linux/pci.h:1650: In file included from include/linux/dmapool.h:14: In file included from include/linux/scatterlist.h:8: In file included from include/linux/mm.h:2253: include/linux/vmstat.h:514:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion] 514 | return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_" | ~~~~~~~~~~~ ^ ~~~ >> drivers/ras/ras.c:73:10: error: incompatible pointer types assigning to 'u8 *' (aka 'unsigned char *') from 'struct cper_arm_ctx_info *' [-Werror,-Wincompatible-pointer-types] 73 | ctx_err = ctx_info; | ^ ~~~~~~~~ 1 warning and 1 error generated. vim +73 drivers/ras/ras.c 54 55 void log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev) 56 { 57 #if defined(CONFIG_ARM) || defined(CONFIG_ARM64) 58 struct cper_arm_err_info *err_info; 59 struct cper_arm_ctx_info *ctx_info; 60 u8 *ven_err_data; 61 u32 ctx_len = 0; 62 int n, sz, cpu; 63 s32 vsei_len; 64 u32 pei_len; 65 u8 *pei_err; 66 u8 *ctx_err; 67 68 pei_len = sizeof(struct cper_arm_err_info) * err->err_info_num; 69 pei_err = (u8 *)err + sizeof(struct cper_sec_proc_arm); 70 71 err_info = (struct cper_arm_err_info *)(err + 1); 72 ctx_info = (struct cper_arm_ctx_info *)(err_info + err->err_info_num); > 73 ctx_err = ctx_info; 74 for (n = 0; n < err->context_info_num; n++) { 75 sz = sizeof(struct cper_arm_ctx_info) + ctx_info->size; 76 ctx_info = (struct cper_arm_ctx_info *)((long)ctx_info + sz); 77 ctx_len += sz; 78 } 79 80 vsei_len = err->section_length - (sizeof(struct cper_sec_proc_arm) + 81 pei_len + ctx_len); 82 if (vsei_len < 0) { 83 pr_warn(FW_BUG 84 "section length: %d\n", err->section_length); 85 pr_warn(FW_BUG 86 "section length is too small\n"); 87 pr_warn(FW_BUG 88 "firmware-generated error record is incorrect\n"); 89 vsei_len = 0; 90 } 91 ven_err_data = (u8 *)ctx_info; 92 93 cpu = GET_LOGICAL_INDEX(err->mpidr); 94 /* when return value is invalid, set cpu index to -1 */ 95 if (cpu < 0) 96 cpu = -1; 97 98 trace_arm_event(err, pei_err, pei_len, ctx_err, ctx_len, 99 ven_err_data, (u32)vsei_len, sev, cpu); 100 #endif 101 } 102
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 25f876b8fa4c..59ace17c8fd8 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -538,9 +538,8 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int sec_sev, i; char *p; - log_arm_hw_error(err); - sec_sev = ghes_severity(gdata->error_severity); + log_arm_hw_error(err, sec_sev); if (sev != GHES_SEV_RECOVERABLE || sec_sev != GHES_SEV_RECOVERABLE) return false; diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c index 5d94ab79c8c3..b515659cc8cc 100644 --- a/drivers/ras/ras.c +++ b/drivers/ras/ras.c @@ -52,10 +52,51 @@ void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id, trace_non_standard_event(sec_type, fru_id, fru_text, sev, err, len); } -void log_arm_hw_error(struct cper_sec_proc_arm *err) +void log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev) { #if defined(CONFIG_ARM) || defined(CONFIG_ARM64) - trace_arm_event(err); + struct cper_arm_err_info *err_info; + struct cper_arm_ctx_info *ctx_info; + u8 *ven_err_data; + u32 ctx_len = 0; + int n, sz, cpu; + s32 vsei_len; + u32 pei_len; + u8 *pei_err; + u8 *ctx_err; + + pei_len = sizeof(struct cper_arm_err_info) * err->err_info_num; + pei_err = (u8 *)err + sizeof(struct cper_sec_proc_arm); + + err_info = (struct cper_arm_err_info *)(err + 1); + ctx_info = (struct cper_arm_ctx_info *)(err_info + err->err_info_num); + ctx_err = ctx_info; + for (n = 0; n < err->context_info_num; n++) { + sz = sizeof(struct cper_arm_ctx_info) + ctx_info->size; + ctx_info = (struct cper_arm_ctx_info *)((long)ctx_info + sz); + ctx_len += sz; + } + + vsei_len = err->section_length - (sizeof(struct cper_sec_proc_arm) + + pei_len + ctx_len); + if (vsei_len < 0) { + pr_warn(FW_BUG + "section length: %d\n", err->section_length); + pr_warn(FW_BUG + "section length is too small\n"); + pr_warn(FW_BUG + "firmware-generated error record is incorrect\n"); + vsei_len = 0; + } + ven_err_data = (u8 *)ctx_info; + + cpu = GET_LOGICAL_INDEX(err->mpidr); + /* when return value is invalid, set cpu index to -1 */ + if (cpu < 0) + cpu = -1; + + trace_arm_event(err, pei_err, pei_len, ctx_err, ctx_len, + ven_err_data, (u32)vsei_len, sev, cpu); #endif } diff --git a/include/linux/ras.h b/include/linux/ras.h index a64182bc72ad..6025afe5736a 100644 --- a/include/linux/ras.h +++ b/include/linux/ras.h @@ -24,8 +24,7 @@ int __init parse_cec_param(char *str); void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id, const char *fru_text, const u8 sev, const u8 *err, const u32 len); -void log_arm_hw_error(struct cper_sec_proc_arm *err); - +void log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev); #else static inline void log_non_standard_event(const guid_t *sec_type, @@ -33,7 +32,7 @@ log_non_standard_event(const guid_t *sec_type, const u8 sev, const u8 *err, const u32 len) { return; } static inline void -log_arm_hw_error(struct cper_sec_proc_arm *err) { return; } +log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev) { return; } #endif struct atl_err { @@ -52,5 +51,14 @@ static inline void amd_retire_dram_row(struct atl_err *err) { } static inline unsigned long amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err) { return -EINVAL; } #endif /* CONFIG_AMD_ATL */ - +#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) +#include <asm/smp_plat.h> +/* + * Include ARM specific SMP header which provides a function mapping mpidr to + * cpu logical index. + */ +#define GET_LOGICAL_INDEX(mpidr) get_logical_index(mpidr & MPIDR_HWID_BITMASK) +#else +#define GET_LOGICAL_INDEX(mpidr) -EINVAL +#endif /* CONFIG_ARM || CONFIG_ARM64 */ #endif /* __RAS_H__ */ diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index 7c47151d5c72..ce5214f008eb 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -168,11 +168,24 @@ TRACE_EVENT(mc_event, * This event is generated when hardware detects an ARM processor error * has occurred. UEFI 2.6 spec section N.2.4.4. */ +#define APEIL "ARM Processor Err Info data len" +#define APEID "ARM Processor Err Info raw data" +#define APECIL "ARM Processor Err Context Info data len" +#define APECID "ARM Processor Err Context Info raw data" +#define VSEIL "Vendor Specific Err Info data len" +#define VSEID "Vendor Specific Err Info raw data" TRACE_EVENT(arm_event, - TP_PROTO(const struct cper_sec_proc_arm *proc), + TP_PROTO(const struct cper_sec_proc_arm *proc, const u8 *pei_err, + const u32 pei_len, + const u8 *ctx_err, + const u32 ctx_len, + const u8 *oem, + const u32 oem_len, + u8 sev, + int cpu), - TP_ARGS(proc), + TP_ARGS(proc, pei_err, pei_len, ctx_err, ctx_len, oem, oem_len, sev, cpu), TP_STRUCT__entry( __field(u64, mpidr) @@ -180,6 +193,14 @@ TRACE_EVENT(arm_event, __field(u32, running_state) __field(u32, psci_state) __field(u8, affinity) + __field(u32, pei_len) + __dynamic_array(u8, buf, pei_len) + __field(u32, ctx_len) + __dynamic_array(u8, buf1, ctx_len) + __field(u32, oem_len) + __dynamic_array(u8, buf2, oem_len) + __field(u8, sev) + __field(int, cpu) ), TP_fast_assign( @@ -199,12 +220,29 @@ TRACE_EVENT(arm_event, __entry->running_state = ~0; __entry->psci_state = ~0; } + __entry->pei_len = pei_len; + memcpy(__get_dynamic_array(buf), pei_err, pei_len); + __entry->ctx_len = ctx_len; + memcpy(__get_dynamic_array(buf1), ctx_err, ctx_len); + __entry->oem_len = oem_len; + memcpy(__get_dynamic_array(buf2), oem, oem_len); + __entry->sev = sev; + __entry->cpu = cpu; ), - TP_printk("affinity level: %d; MPIDR: %016llx; MIDR: %016llx; " - "running state: %d; PSCI state: %d", + TP_printk("cpu: %d; error: %d; affinity level: %d; MPIDR: %016llx; MIDR: %016llx; " + "running state: %d; PSCI state: %d; " + "%s: %d; %s: %s; %s: %d; %s: %s; %s: %d; %s: %s", + __entry->cpu, + __entry->sev, __entry->affinity, __entry->mpidr, __entry->midr, - __entry->running_state, __entry->psci_state) + __entry->running_state, __entry->psci_state, + APEIL, __entry->pei_len, APEID, + __print_hex(__get_dynamic_array(buf), __entry->pei_len), + APECIL, __entry->ctx_len, APECID, + __print_hex(__get_dynamic_array(buf1), __entry->ctx_len), + VSEIL, __entry->oem_len, VSEID, + __print_hex(__get_dynamic_array(buf2), __entry->oem_len)) ); /*