[RFC,1/4] ACPI: APEI: Add support to notify the vendor specific HW errors

Message ID 20190812101149.26036-2-shiju.jose@huawei.com
State New
Headers show
Series
  • [RFC,1/4] ACPI: APEI: Add support to notify the vendor specific HW errors
Related show

Commit Message

Shiju Jose Aug. 12, 2019, 10:11 a.m.
Presently the vendor specific HW errors, in the non-standard format,
are not reported to the vendor drivers for the recovery.

This patch adds support to notify the vendor specific HW errors to the
registered kernel drivers.

Signed-off-by: Shiju Jose <shiju.jose@huawei.com>

---
 drivers/acpi/apei/ghes.c | 118 +++++++++++++++++++++++++++++++++++++++++++++--
 include/acpi/ghes.h      |  47 +++++++++++++++++++
 2 files changed, 160 insertions(+), 5 deletions(-)

-- 
1.9.1

Patch

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index a66e00f..374d197 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -477,6 +477,77 @@  static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
 #endif
 }
 
+struct ghes_error_notify {
+	struct list_head list;
+	struct rcu_head	rcu_head;
+	guid_t sec_type; /* guid of the error record */
+	error_handle handle; /* error handler function */
+	void *data; /* handler driver's private data if any */
+};
+
+/* List to store the registered error handling functions */
+static DEFINE_MUTEX(ghes_error_notify_mutex);
+static LIST_HEAD(ghes_error_notify_list);
+static refcount_t ghes_ref_count;
+
+/**
+ * ghes_error_notify_register - register an error handling function
+ * for the hw errors.
+ * @sec_type: sec_type of the corresponding CPER to be notified.
+ * @handle: pointer to the error handling function.
+ * @data: handler driver's private data.
+ *
+ * return 0 : SUCCESS, non-zero : FAIL
+ */
+int ghes_error_notify_register(guid_t sec_type, error_handle handle, void *data)
+{
+	struct ghes_error_notify *err_notify;
+
+	mutex_lock(&ghes_error_notify_mutex);
+	err_notify = kzalloc(sizeof(*err_notify), GFP_KERNEL);
+	if (!err_notify)
+		return -ENOMEM;
+
+	err_notify->handle = handle;
+	guid_copy(&err_notify->sec_type, &sec_type);
+	err_notify->data = data;
+	list_add_rcu(&err_notify->list, &ghes_error_notify_list);
+	mutex_unlock(&ghes_error_notify_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ghes_error_notify_register);
+
+/**
+ * ghes_error_notify_unregister - unregister an error handling function.
+ * @sec_type: sec_type of the corresponding CPER.
+ * @handle: pointer to the error handling function.
+ *
+ * return none.
+ */
+void ghes_error_notify_unregister(guid_t sec_type, error_handle handle)
+{
+	struct ghes_error_notify *err_notify;
+	bool found = 0;
+
+	mutex_lock(&ghes_error_notify_mutex);
+	rcu_read_lock();
+	list_for_each_entry_rcu(err_notify, &ghes_error_notify_list, list) {
+		if (guid_equal(&err_notify->sec_type, &sec_type) &&
+		    err_notify->handle == handle) {
+			list_del_rcu(&err_notify->list);
+			found = 1;
+			break;
+		}
+	}
+	rcu_read_unlock();
+	synchronize_rcu();
+	mutex_unlock(&ghes_error_notify_mutex);
+	if (found)
+		kfree(err_notify);
+}
+EXPORT_SYMBOL_GPL(ghes_error_notify_unregister);
+
 static void ghes_do_proc(struct ghes *ghes,
 			 const struct acpi_hest_generic_status *estatus)
 {
@@ -485,6 +556,8 @@  static void ghes_do_proc(struct ghes *ghes,
 	guid_t *sec_type;
 	guid_t *fru_id = &NULL_UUID_LE;
 	char *fru_text = "";
+	bool is_notify = 0;
+	struct ghes_error_notify *err_notify;
 
 	sev = ghes_severity(estatus->error_severity);
 	apei_estatus_for_each_section(estatus, gdata) {
@@ -512,11 +585,29 @@  static void ghes_do_proc(struct ghes *ghes,
 
 			log_arm_hw_error(err);
 		} else {
-			void *err = acpi_hest_get_payload(gdata);
-
-			log_non_standard_event(sec_type, fru_id, fru_text,
-					       sec_sev, err,
-					       gdata->error_data_length);
+			rcu_read_lock();
+			list_for_each_entry_rcu(err_notify,
+						&ghes_error_notify_list, list) {
+				if (guid_equal(&err_notify->sec_type,
+					       sec_type)) {
+					/* The notification is called in the
+					 * interrupt context, thus the handler
+					 * functions should be take care of it.
+					 */
+					err_notify->handle(gdata, sev,
+							   err_notify->data);
+					is_notify = 1;
+				}
+			}
+			rcu_read_unlock();
+
+			if (!is_notify) {
+				void *err = acpi_hest_get_payload(gdata);
+
+				log_non_standard_event(sec_type, fru_id,
+						       fru_text, sec_sev, err,
+						       gdata->error_data_length);
+			}
 		}
 	}
 }
@@ -1217,6 +1308,11 @@  static int ghes_probe(struct platform_device *ghes_dev)
 
 	ghes_edac_register(ghes, &ghes_dev->dev);
 
+	if (!refcount_read(&ghes_ref_count))
+		refcount_set(&ghes_ref_count, 1);
+	else
+		refcount_inc(&ghes_ref_count);
+
 	/* Handle any pending errors right away */
 	spin_lock_irqsave(&ghes_notify_lock_irq, flags);
 	ghes_proc(ghes);
@@ -1237,6 +1333,7 @@  static int ghes_remove(struct platform_device *ghes_dev)
 	int rc;
 	struct ghes *ghes;
 	struct acpi_hest_generic *generic;
+	struct ghes_error_notify *err_notify, *tmp;
 
 	ghes = platform_get_drvdata(ghes_dev);
 	generic = ghes->generic;
@@ -1279,6 +1376,17 @@  static int ghes_remove(struct platform_device *ghes_dev)
 
 	ghes_fini(ghes);
 
+	if (refcount_dec_and_test(&ghes_ref_count) &&
+	    !list_empty(&ghes_error_notify_list)) {
+		mutex_lock(&ghes_error_notify_mutex);
+		list_for_each_entry_safe(err_notify, tmp,
+					 &ghes_error_notify_list, list) {
+			list_del_rcu(&err_notify->list);
+			kfree_rcu(err_notify, rcu_head);
+		}
+		mutex_unlock(&ghes_error_notify_mutex);
+	}
+
 	ghes_edac_unregister(ghes);
 
 	kfree(ghes);
diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
index e3f1cdd..d480537 100644
--- a/include/acpi/ghes.h
+++ b/include/acpi/ghes.h
@@ -50,6 +50,53 @@  enum {
 	GHES_SEV_PANIC = 0x3,
 };
 
+/**
+ * error_handle - error handling function for the hw errors.
+ * This handle function is called in the interrupt context.
+ * @gdata: acpi_hest_generic_data.
+ * @sev: error severity of the entire error event defined in the
+ * ACPI spec table generic error status block.
+ * @data: handler driver's private data.
+ *
+ * return : none.
+ */
+typedef void (*error_handle)(struct acpi_hest_generic_data *gdata, int sev,
+			     void *data);
+
+#ifdef CONFIG_ACPI_APEI_GHES
+/**
+ * ghes_error_notify_register - register an error handling function
+ * for the hw errors.
+ * @sec_type: sec_type of the corresponding CPER to be notified.
+ * @handle: pointer to the error handling function.
+ * @data: handler driver's private data.
+ *
+ * return : 0 - SUCCESS, non-zero - FAIL.
+ */
+int ghes_error_notify_register(guid_t sec_type, error_handle handle,
+			       void *data);
+
+/**
+ * ghes_error_notify_unregister - unregister an error handling function
+ * for the hw errors.
+ * @sec_type: sec_type of the corresponding CPER.
+ * @handle: pointer to the error handling function.
+ *
+ * return none.
+ */
+void ghes_error_notify_unregister(guid_t sec_type, error_handle handle);
+
+#else
+int ghes_error_notify_register(guid_t sec_type, error_handle handle, void *data)
+{
+	return -ENODEV;
+}
+
+void ghes_error_notify_unregister(guid_t sec_type, error_handle handle)
+{
+}
+#endif
+
 int ghes_estatus_pool_init(int num_ghes);
 
 /* From drivers/edac/ghes_edac.c */