[12/19] EDAC: Store error type in struct edac_raw_error_desc

Message ID 20191010202418.25098-13-rrichter@marvell.com
State New
Headers show
Series
  • EDAC: Rework edac_mc and ghes drivers
Related show

Commit Message

Robert Richter Oct. 10, 2019, 8:25 p.m.
Store the error type in struct edac_raw_error_desc. This makes the
type parameter of edac_raw_mc_handle_error() obsolete.

Signed-off-by: Robert Richter <rrichter@marvell.com>

---
 drivers/edac/edac_mc.c   |  8 ++++----
 drivers/edac/edac_mc.h   |  4 +---
 drivers/edac/ghes_edac.c | 13 ++++++-------
 include/linux/edac.h     |  1 +
 4 files changed, 12 insertions(+), 14 deletions(-)

-- 
2.20.1

Comments

Mauro Carvalho Chehab Oct. 11, 2019, 10:54 a.m. | #1
Em Thu, 10 Oct 2019 20:25:29 +0000
Robert Richter <rrichter@marvell.com> escreveu:

> Store the error type in struct edac_raw_error_desc. This makes the

> type parameter of edac_raw_mc_handle_error() obsolete.


I don't see much gain on this change, but whatever works best for
ghes.

> 

> Signed-off-by: Robert Richter <rrichter@marvell.com>

> ---

>  drivers/edac/edac_mc.c   |  8 ++++----

>  drivers/edac/edac_mc.h   |  4 +---

>  drivers/edac/ghes_edac.c | 13 ++++++-------

>  include/linux/edac.h     |  1 +

>  4 files changed, 12 insertions(+), 14 deletions(-)

> 

> diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c

> index cdfb383f7a35..ca206854b8ee 100644

> --- a/drivers/edac/edac_mc.c

> +++ b/drivers/edac/edac_mc.c

> @@ -1040,15 +1040,14 @@ static void edac_ue_error(struct mem_ctl_info *mci,

>  	edac_inc_ue_error(mci, dimm, error_count);

>  }

>  

> -void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,

> -			      struct mem_ctl_info *mci,

> +void edac_raw_mc_handle_error(struct mem_ctl_info *mci,

>  			      struct dimm_info *dimm,

>  			      struct edac_raw_error_desc *e)

>  {

>  	char detail[80];

>  

>  	/* Memory type dependent details about the error */

> -	if (type == HW_EVENT_ERR_CORRECTED) {

> +	if (e->type == HW_EVENT_ERR_CORRECTED) {

>  		snprintf(detail, sizeof(detail),

>  			"page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",

>  			e->page_frame_number, e->offset_in_page,

> @@ -1095,6 +1094,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,

>  	/* Fills the error report buffer */

>  	memset(e, 0, sizeof (*e));

>  	e->error_count = error_count;

> +	e->type = type;

>  	e->top_layer = top_layer;

>  	e->mid_layer = mid_layer;

>  	e->low_layer = low_layer;

> @@ -1243,6 +1243,6 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,

>  

>  	dimm = edac_get_dimm(mci, top_layer, mid_layer, low_layer);

>  

> -	edac_raw_mc_handle_error(type, mci, dimm, e);

> +	edac_raw_mc_handle_error(mci, dimm, e);

>  }

>  EXPORT_SYMBOL_GPL(edac_mc_handle_error);

> diff --git a/drivers/edac/edac_mc.h b/drivers/edac/edac_mc.h

> index 2c3e2fbcedc4..a8f1b5b5e873 100644

> --- a/drivers/edac/edac_mc.h

> +++ b/drivers/edac/edac_mc.h

> @@ -212,7 +212,6 @@ extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,

>   * edac_raw_mc_handle_error() - Reports a memory event to userspace without

>   *	doing anything to discover the error location.

>   *

> - * @type:		severity of the error (CE/UE/Fatal)

>   * @mci:		a struct mem_ctl_info pointer

>   * @dimm:		a struct dimm_info pointer

>   * @e:			error description

> @@ -221,8 +220,7 @@ extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,

>   * only be called directly when the hardware error come directly from BIOS,

>   * like in the case of APEI GHES driver.

>   */

> -void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,

> -			      struct mem_ctl_info *mci,

> +void edac_raw_mc_handle_error(struct mem_ctl_info *mci,

>  			      struct dimm_info *dimm,

>  			      struct edac_raw_error_desc *e);

>  

> diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c

> index 4f5721cf4380..1db1c012bed9 100644

> --- a/drivers/edac/ghes_edac.c

> +++ b/drivers/edac/ghes_edac.c

> @@ -194,7 +194,6 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)

>  void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)

>  {

>  	struct dimm_info *dimm;

> -	enum hw_event_mc_err_type type;

>  	struct edac_raw_error_desc *e;

>  	struct mem_ctl_info *mci;

>  	struct ghes_edac_pvt *pvt = ghes_pvt;

> @@ -232,17 +231,17 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)

>  

>  	switch (sev) {

>  	case GHES_SEV_CORRECTED:

> -		type = HW_EVENT_ERR_CORRECTED;

> +		e->type = HW_EVENT_ERR_CORRECTED;

>  		break;

>  	case GHES_SEV_RECOVERABLE:

> -		type = HW_EVENT_ERR_UNCORRECTED;

> +		e->type = HW_EVENT_ERR_UNCORRECTED;

>  		break;

>  	case GHES_SEV_PANIC:

> -		type = HW_EVENT_ERR_FATAL;

> +		e->type = HW_EVENT_ERR_FATAL;

>  		break;

>  	default:

>  	case GHES_SEV_NO:

> -		type = HW_EVENT_ERR_INFO;

> +		e->type = HW_EVENT_ERR_INFO;

>  	}

>  

>  	edac_dbg(1, "error validation_bits: 0x%08llx\n",

> @@ -433,14 +432,14 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)

>  	grain_bits = fls_long(e->grain);

>  	snprintf(pvt->detail_location, sizeof(pvt->detail_location),

>  		 "APEI location: %s %s", e->location, e->other_detail);

> -	trace_mc_event(type, e->msg, e->label, e->error_count,

> +	trace_mc_event(e->type, e->msg, e->label, e->error_count,

>  		       mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer,

>  		       (e->page_frame_number << PAGE_SHIFT) | e->offset_in_page,

>  		       grain_bits, e->syndrome, pvt->detail_location);

>  

>  	dimm = edac_get_dimm_by_index(mci, e->top_layer);

>  

> -	edac_raw_mc_handle_error(type, mci, dimm, e);

> +	edac_raw_mc_handle_error(mci, dimm, e);

>  

>  	spin_unlock_irqrestore(&ghes_lock, flags);

>  }

> diff --git a/include/linux/edac.h b/include/linux/edac.h

> index 4d9673954856..587c53b87fdf 100644

> --- a/include/linux/edac.h

> +++ b/include/linux/edac.h

> @@ -463,6 +463,7 @@ struct edac_raw_error_desc {

>  	long grain;

>  

>  	u16 error_count;

> +	enum hw_event_mc_err_type type;

>  	int top_layer;

>  	int mid_layer;

>  	int low_layer;




Thanks,
Mauro
Robert Richter Oct. 14, 2019, 11:47 a.m. | #2
On 11.10.19 07:54:19, Mauro Carvalho Chehab wrote:
> Em Thu, 10 Oct 2019 20:25:29 +0000

> Robert Richter <rrichter@marvell.com> escreveu:

> 

> > Store the error type in struct edac_raw_error_desc. This makes the

> > type parameter of edac_raw_mc_handle_error() obsolete.

> 

> I don't see much gain on this change, but whatever works best for

> ghes.


The error type clearly describes the error. It makes sense to keep it
in struct edac_raw_error_desc as the function interface of
edac_raw_mc_handle_error() becomes easier. There is no reason to have
a function argument for the type while all other error data is in
edac_raw_error_desc.

This change might look trivial, but this series contains many small
changes like this and in the end there is a reasonable change of the
function that describes it much better:

void edac_raw_mc_handle_error(struct edac_raw_error_desc *e,
                          struct dimm_info *dimm);

... that reads as handle error described in e that affects this dimm.

-Robert

Patch

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index cdfb383f7a35..ca206854b8ee 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -1040,15 +1040,14 @@  static void edac_ue_error(struct mem_ctl_info *mci,
 	edac_inc_ue_error(mci, dimm, error_count);
 }
 
-void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
-			      struct mem_ctl_info *mci,
+void edac_raw_mc_handle_error(struct mem_ctl_info *mci,
 			      struct dimm_info *dimm,
 			      struct edac_raw_error_desc *e)
 {
 	char detail[80];
 
 	/* Memory type dependent details about the error */
-	if (type == HW_EVENT_ERR_CORRECTED) {
+	if (e->type == HW_EVENT_ERR_CORRECTED) {
 		snprintf(detail, sizeof(detail),
 			"page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
 			e->page_frame_number, e->offset_in_page,
@@ -1095,6 +1094,7 @@  void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 	/* Fills the error report buffer */
 	memset(e, 0, sizeof (*e));
 	e->error_count = error_count;
+	e->type = type;
 	e->top_layer = top_layer;
 	e->mid_layer = mid_layer;
 	e->low_layer = low_layer;
@@ -1243,6 +1243,6 @@  void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 
 	dimm = edac_get_dimm(mci, top_layer, mid_layer, low_layer);
 
-	edac_raw_mc_handle_error(type, mci, dimm, e);
+	edac_raw_mc_handle_error(mci, dimm, e);
 }
 EXPORT_SYMBOL_GPL(edac_mc_handle_error);
diff --git a/drivers/edac/edac_mc.h b/drivers/edac/edac_mc.h
index 2c3e2fbcedc4..a8f1b5b5e873 100644
--- a/drivers/edac/edac_mc.h
+++ b/drivers/edac/edac_mc.h
@@ -212,7 +212,6 @@  extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
  * edac_raw_mc_handle_error() - Reports a memory event to userspace without
  *	doing anything to discover the error location.
  *
- * @type:		severity of the error (CE/UE/Fatal)
  * @mci:		a struct mem_ctl_info pointer
  * @dimm:		a struct dimm_info pointer
  * @e:			error description
@@ -221,8 +220,7 @@  extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
  * only be called directly when the hardware error come directly from BIOS,
  * like in the case of APEI GHES driver.
  */
-void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
-			      struct mem_ctl_info *mci,
+void edac_raw_mc_handle_error(struct mem_ctl_info *mci,
 			      struct dimm_info *dimm,
 			      struct edac_raw_error_desc *e);
 
diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index 4f5721cf4380..1db1c012bed9 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -194,7 +194,6 @@  static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
 void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
 {
 	struct dimm_info *dimm;
-	enum hw_event_mc_err_type type;
 	struct edac_raw_error_desc *e;
 	struct mem_ctl_info *mci;
 	struct ghes_edac_pvt *pvt = ghes_pvt;
@@ -232,17 +231,17 @@  void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
 
 	switch (sev) {
 	case GHES_SEV_CORRECTED:
-		type = HW_EVENT_ERR_CORRECTED;
+		e->type = HW_EVENT_ERR_CORRECTED;
 		break;
 	case GHES_SEV_RECOVERABLE:
-		type = HW_EVENT_ERR_UNCORRECTED;
+		e->type = HW_EVENT_ERR_UNCORRECTED;
 		break;
 	case GHES_SEV_PANIC:
-		type = HW_EVENT_ERR_FATAL;
+		e->type = HW_EVENT_ERR_FATAL;
 		break;
 	default:
 	case GHES_SEV_NO:
-		type = HW_EVENT_ERR_INFO;
+		e->type = HW_EVENT_ERR_INFO;
 	}
 
 	edac_dbg(1, "error validation_bits: 0x%08llx\n",
@@ -433,14 +432,14 @@  void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
 	grain_bits = fls_long(e->grain);
 	snprintf(pvt->detail_location, sizeof(pvt->detail_location),
 		 "APEI location: %s %s", e->location, e->other_detail);
-	trace_mc_event(type, e->msg, e->label, e->error_count,
+	trace_mc_event(e->type, e->msg, e->label, e->error_count,
 		       mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer,
 		       (e->page_frame_number << PAGE_SHIFT) | e->offset_in_page,
 		       grain_bits, e->syndrome, pvt->detail_location);
 
 	dimm = edac_get_dimm_by_index(mci, e->top_layer);
 
-	edac_raw_mc_handle_error(type, mci, dimm, e);
+	edac_raw_mc_handle_error(mci, dimm, e);
 
 	spin_unlock_irqrestore(&ghes_lock, flags);
 }
diff --git a/include/linux/edac.h b/include/linux/edac.h
index 4d9673954856..587c53b87fdf 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -463,6 +463,7 @@  struct edac_raw_error_desc {
 	long grain;
 
 	u16 error_count;
+	enum hw_event_mc_err_type type;
 	int top_layer;
 	int mid_layer;
 	int low_layer;