Message ID | 20240102150933.161009-3-Smita.KoralahalliChannabasappa@amd.com |
---|---|
State | Superseded |
Headers | show |
Series | acpi/ghes, cper, cxl: Trace FW-First CXL Protocol Errors | expand |
Smita Koralahalli wrote: > In preparation to add tracepoint support, move protocol error UUID > definition to a common location and make CXL RAS capability struct > global for use across different modules. > > Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com> [snip] > diff --git a/drivers/firmware/efi/cper_cxl.h b/drivers/firmware/efi/cper_cxl.h > index 86bfcf7909ec..6f8c00495708 100644 > --- a/drivers/firmware/efi/cper_cxl.h > +++ b/drivers/firmware/efi/cper_cxl.h > @@ -7,14 +7,11 @@ > * Author: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com> > */ > > +#include <linux/cxl-event.h> > + > #ifndef LINUX_CPER_CXL_H > #define LINUX_CPER_CXL_H > > -/* CXL Protocol Error Section */ > -#define CPER_SEC_CXL_PROT_ERR \ > - GUID_INIT(0x80B9EFB4, 0x52B5, 0x4DE3, 0xA7, 0x77, 0x68, 0x78, \ > - 0x4B, 0x77, 0x10, 0x48) > - > #pragma pack(1) > > /* Compute Express Link Protocol Error Section, UEFI v2.10 sec N.2.13 */ > diff --git a/include/linux/cper.h b/include/linux/cper.h > index c1a7dc325121..2cbf0a93785a 100644 > --- a/include/linux/cper.h > +++ b/include/linux/cper.h > @@ -89,6 +89,10 @@ enum { > #define CPER_NOTIFY_DMAR \ > GUID_INIT(0x667DD791, 0xC6B3, 0x4c27, 0x8A, 0x6B, 0x0F, 0x8E, \ > 0x72, 0x2D, 0xEB, 0x41) > +/* CXL Protocol Error Section */ > +#define CPER_SEC_CXL_PROT_ERR \ > + GUID_INIT(0x80B9EFB4, 0x52B5, 0x4DE3, 0xA7, 0x77, 0x68, 0x78, \ > + 0x4B, 0x77, 0x10, 0x48) Is this shared with code outside of GHES? I did not need my GUID defines outside of ghes.c and further becuase the events are defined as UUID's I chose to keep the GUID definition as local as possible to ghes.c. Can you do the same with this define? The rest looks good, Ira [snip]
On 1/2/2024 8:30 AM, Ira Weiny wrote: > Smita Koralahalli wrote: >> In preparation to add tracepoint support, move protocol error UUID >> definition to a common location and make CXL RAS capability struct >> global for use across different modules. >> >> Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com> > > [snip] > >> diff --git a/drivers/firmware/efi/cper_cxl.h b/drivers/firmware/efi/cper_cxl.h >> index 86bfcf7909ec..6f8c00495708 100644 >> --- a/drivers/firmware/efi/cper_cxl.h >> +++ b/drivers/firmware/efi/cper_cxl.h >> @@ -7,14 +7,11 @@ >> * Author: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com> >> */ >> >> +#include <linux/cxl-event.h> >> + >> #ifndef LINUX_CPER_CXL_H >> #define LINUX_CPER_CXL_H >> >> -/* CXL Protocol Error Section */ >> -#define CPER_SEC_CXL_PROT_ERR \ >> - GUID_INIT(0x80B9EFB4, 0x52B5, 0x4DE3, 0xA7, 0x77, 0x68, 0x78, \ >> - 0x4B, 0x77, 0x10, 0x48) >> - >> #pragma pack(1) >> >> /* Compute Express Link Protocol Error Section, UEFI v2.10 sec N.2.13 */ >> diff --git a/include/linux/cper.h b/include/linux/cper.h >> index c1a7dc325121..2cbf0a93785a 100644 >> --- a/include/linux/cper.h >> +++ b/include/linux/cper.h >> @@ -89,6 +89,10 @@ enum { >> #define CPER_NOTIFY_DMAR \ >> GUID_INIT(0x667DD791, 0xC6B3, 0x4c27, 0x8A, 0x6B, 0x0F, 0x8E, \ >> 0x72, 0x2D, 0xEB, 0x41) >> +/* CXL Protocol Error Section */ >> +#define CPER_SEC_CXL_PROT_ERR \ >> + GUID_INIT(0x80B9EFB4, 0x52B5, 0x4DE3, 0xA7, 0x77, 0x68, 0x78, \ >> + 0x4B, 0x77, 0x10, 0x48) > > Is this shared with code outside of GHES? I did not need my GUID defines > outside of ghes.c and further becuase the events are defined as UUID's I > chose to keep the GUID definition as local as possible to ghes.c. > > Can you do the same with this define? Actually, it is shared with efi/cper. https://elixir.bootlin.com/linux/v6.7-rc8/source/drivers/firmware/efi/cper.c#L602 But this would be something to look into. Should we continue to support logging from efi/cper or just confine it to ghes.. If we just log it from ghes similar to component events, we might loose error records from RCH Downstream Port and other agent_types which do not log device_ids. Also, I'm not sure how useful are other fields in protocol error CPER, the ones like Capability struct and DVSEC len etc as the tracepoints doesn't log all of them. Thanks, Smita > > The rest looks good, > Ira > > [snip] >
Smita Koralahalli wrote: > On 1/2/2024 8:30 AM, Ira Weiny wrote: > > Smita Koralahalli wrote: > >> In preparation to add tracepoint support, move protocol error UUID > >> definition to a common location and make CXL RAS capability struct > >> global for use across different modules. > >> > >> Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com> > > > > [snip] > > > >> diff --git a/drivers/firmware/efi/cper_cxl.h b/drivers/firmware/efi/cper_cxl.h > >> index 86bfcf7909ec..6f8c00495708 100644 > >> --- a/drivers/firmware/efi/cper_cxl.h > >> +++ b/drivers/firmware/efi/cper_cxl.h > >> @@ -7,14 +7,11 @@ > >> * Author: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com> > >> */ > >> > >> +#include <linux/cxl-event.h> > >> + > >> #ifndef LINUX_CPER_CXL_H > >> #define LINUX_CPER_CXL_H > >> > >> -/* CXL Protocol Error Section */ > >> -#define CPER_SEC_CXL_PROT_ERR \ > >> - GUID_INIT(0x80B9EFB4, 0x52B5, 0x4DE3, 0xA7, 0x77, 0x68, 0x78, \ > >> - 0x4B, 0x77, 0x10, 0x48) > >> - > >> #pragma pack(1) > >> > >> /* Compute Express Link Protocol Error Section, UEFI v2.10 sec N.2.13 */ > >> diff --git a/include/linux/cper.h b/include/linux/cper.h > >> index c1a7dc325121..2cbf0a93785a 100644 > >> --- a/include/linux/cper.h > >> +++ b/include/linux/cper.h > >> @@ -89,6 +89,10 @@ enum { > >> #define CPER_NOTIFY_DMAR \ > >> GUID_INIT(0x667DD791, 0xC6B3, 0x4c27, 0x8A, 0x6B, 0x0F, 0x8E, \ > >> 0x72, 0x2D, 0xEB, 0x41) > >> +/* CXL Protocol Error Section */ > >> +#define CPER_SEC_CXL_PROT_ERR \ > >> + GUID_INIT(0x80B9EFB4, 0x52B5, 0x4DE3, 0xA7, 0x77, 0x68, 0x78, \ > >> + 0x4B, 0x77, 0x10, 0x48) > > > > Is this shared with code outside of GHES? I did not need my GUID defines > > outside of ghes.c and further becuase the events are defined as UUID's I > > chose to keep the GUID definition as local as possible to ghes.c. > > > > Can you do the same with this define? > > Actually, it is shared with efi/cper. > https://elixir.bootlin.com/linux/v6.7-rc8/source/drivers/firmware/efi/cper.c#L602 Ah ok. > > But this would be something to look into. Should we continue to support > logging from efi/cper or just confine it to ghes.. I missed that you were not removing the efi/cper print. I kind of thought that was part of the series. > > If we just log it from ghes similar to component events, we might loose > error records from RCH Downstream Port and other agent_types which do > not log device_ids. That is a good reason to keep the efi/cper print AFAICS. > Also, I'm not sure how useful are other fields in > protocol error CPER, the ones like Capability struct and DVSEC len etc > as the tracepoints doesn't log all of them. I'm not sure about their importance but if they are important I would say they should be added to the tracepoint. Ira
diff --git a/drivers/firmware/efi/cper_cxl.c b/drivers/firmware/efi/cper_cxl.c index a55771b99a97..4fd8d783993e 100644 --- a/drivers/firmware/efi/cper_cxl.c +++ b/drivers/firmware/efi/cper_cxl.c @@ -18,17 +18,6 @@ #define PROT_ERR_VALID_DVSEC BIT_ULL(5) #define PROT_ERR_VALID_ERROR_LOG BIT_ULL(6) -/* CXL RAS Capability Structure, CXL v3.0 sec 8.2.4.16 */ -struct cxl_ras_capability_regs { - u32 uncor_status; - u32 uncor_mask; - u32 uncor_severity; - u32 cor_status; - u32 cor_mask; - u32 cap_control; - u32 header_log[16]; -}; - static const char * const prot_err_agent_type_strs[] = { "Restricted CXL Device", "Restricted CXL Host Downstream Port", diff --git a/drivers/firmware/efi/cper_cxl.h b/drivers/firmware/efi/cper_cxl.h index 86bfcf7909ec..6f8c00495708 100644 --- a/drivers/firmware/efi/cper_cxl.h +++ b/drivers/firmware/efi/cper_cxl.h @@ -7,14 +7,11 @@ * Author: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com> */ +#include <linux/cxl-event.h> + #ifndef LINUX_CPER_CXL_H #define LINUX_CPER_CXL_H -/* CXL Protocol Error Section */ -#define CPER_SEC_CXL_PROT_ERR \ - GUID_INIT(0x80B9EFB4, 0x52B5, 0x4DE3, 0xA7, 0x77, 0x68, 0x78, \ - 0x4B, 0x77, 0x10, 0x48) - #pragma pack(1) /* Compute Express Link Protocol Error Section, UEFI v2.10 sec N.2.13 */ diff --git a/include/linux/cper.h b/include/linux/cper.h index c1a7dc325121..2cbf0a93785a 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -89,6 +89,10 @@ enum { #define CPER_NOTIFY_DMAR \ GUID_INIT(0x667DD791, 0xC6B3, 0x4c27, 0x8A, 0x6B, 0x0F, 0x8E, \ 0x72, 0x2D, 0xEB, 0x41) +/* CXL Protocol Error Section */ +#define CPER_SEC_CXL_PROT_ERR \ + GUID_INIT(0x80B9EFB4, 0x52B5, 0x4DE3, 0xA7, 0x77, 0x68, 0x78, \ + 0x4B, 0x77, 0x10, 0x48) /* * Flags bits definitions for flags in struct cper_record_header diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h index afa71ee0437c..90d8390a73cb 100644 --- a/include/linux/cxl-event.h +++ b/include/linux/cxl-event.h @@ -141,6 +141,17 @@ struct cxl_cper_event_rec { union cxl_event event; } __packed; +/* CXL RAS Capability Structure, CXL v3.0 sec 8.2.4.16 */ +struct cxl_ras_capability_regs { + u32 uncor_status; + u32 uncor_mask; + u32 uncor_severity; + u32 cor_status; + u32 cor_mask; + u32 cap_control; + u32 header_log[16]; +}; + struct cxl_cper_rec_data { struct cxl_cper_event_rec rec; };
In preparation to add tracepoint support, move protocol error UUID definition to a common location and make CXL RAS capability struct global for use across different modules. Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com> --- drivers/firmware/efi/cper_cxl.c | 11 ----------- drivers/firmware/efi/cper_cxl.h | 7 ++----- include/linux/cper.h | 4 ++++ include/linux/cxl-event.h | 11 +++++++++++ 4 files changed, 17 insertions(+), 16 deletions(-)