diff mbox series

[v3,3/5] RAS/AEST: Introduce AEST inject interface to test AEST driver

Message ID 20250115084228.107573-4-tianruidong@linux.alibaba.com
State New
Headers show
Series [v3,1/5] ACPI/RAS/AEST: Initial AEST driver | expand

Commit Message

Ruidong Tian Jan. 15, 2025, 8:42 a.m. UTC
AEST injection interface can help to test how AEST driver process error
record which raise error.

This interface just raise a SW simulate error rather than HW error.

Example1:

1. write RAS register value to err_* file:
echo 0x... > <debugfs>/aest/<dev>/<node>/inject/err_fr
echo 0x... > <debugfs>/aest/<dev>/<node>/inject/err_status
echo 0x... > <debugfs>/aest/<dev>/<node>/inject/err_addr
echo 0x... > <debugfs>/aest/<dev>/<node>/inject/err_*

2. trigger the error:
echo -1 > <debugfs>/aest/<dev>/<node>/inject/inject

AEST driver will process this error with error register value specified
by user.

Example2:

1. just trigger the error:
echo n(record_cpunt > n >=0 ) > <debugfs>/aest/<dev>/<node>/inject/inject

AEST driver will process this error with error register values read
from record<n> of this node.

Signed-off-by: Ruidong Tian <tianruidong@linux.alibaba.com>
---
 Documentation/ABI/testing/debugfs-aest |  17 +++
 drivers/ras/aest/Makefile              |   1 +
 drivers/ras/aest/aest-inject.c         | 151 +++++++++++++++++++++++++
 drivers/ras/aest/aest-sysfs.c          |   8 +-
 drivers/ras/aest/aest.h                |   2 +
 5 files changed, 177 insertions(+), 2 deletions(-)
 create mode 100644 drivers/ras/aest/aest-inject.c

Comments

kernel test robot Jan. 17, 2025, 7:07 a.m. UTC | #1
Hi Ruidong,

kernel test robot noticed the following build errors:

[auto build test ERROR on rafael-pm/linux-next]
[also build test ERROR on rafael-pm/bleeding-edge arm64/for-next/core ras/edac-for-next linus/master tip/smp/core v6.13-rc7 next-20250116]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Ruidong-Tian/ACPI-RAS-AEST-Initial-AEST-driver/20250115-164601
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git linux-next
patch link:    https://lore.kernel.org/r/20250115084228.107573-4-tianruidong%40linux.alibaba.com
patch subject: [PATCH v3 3/5] RAS/AEST: Introduce AEST inject interface to test AEST driver
config: arm64-allmodconfig (https://download.01.org/0day-ci/archive/20250117/202501171406.o7oztilo-lkp@intel.com/config)
compiler: clang version 18.1.8 (https://github.com/llvm/llvm-project 3b5b5c1ec4a3095ab096dd780e84d7ab81f3d7ff)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250117/202501171406.o7oztilo-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202501171406.o7oztilo-lkp@intel.com/

All errors (new ones prefixed by >>):

>> drivers/ras/aest/aest-core.c:280:13: error: static declaration of 'aest_proc_record' follows non-static declaration
     280 | static void aest_proc_record(struct aest_record *record, void *data)
         |             ^
   drivers/ras/aest/aest.h:338:6: note: previous declaration is here
     338 | void aest_proc_record(struct aest_record *record, void *data);
         |      ^
   1 error generated.


vim +/aest_proc_record +280 drivers/ras/aest/aest-core.c

b6c745ae1213b2 Ruidong Tian 2025-01-15  279  
b6c745ae1213b2 Ruidong Tian 2025-01-15 @280  static void aest_proc_record(struct aest_record *record, void *data)
b6c745ae1213b2 Ruidong Tian 2025-01-15  281  {
b6c745ae1213b2 Ruidong Tian 2025-01-15  282  	struct ras_ext_regs regs = {0};
b6c745ae1213b2 Ruidong Tian 2025-01-15  283  	int *count = data;
b6c745ae1213b2 Ruidong Tian 2025-01-15  284  
b6c745ae1213b2 Ruidong Tian 2025-01-15  285  	regs.err_status = record_read(record, ERXSTATUS);
b6c745ae1213b2 Ruidong Tian 2025-01-15  286  	if (!(regs.err_status & ERR_STATUS_V))
b6c745ae1213b2 Ruidong Tian 2025-01-15  287  		return;
b6c745ae1213b2 Ruidong Tian 2025-01-15  288  
b6c745ae1213b2 Ruidong Tian 2025-01-15  289  	(*count)++;
b6c745ae1213b2 Ruidong Tian 2025-01-15  290  
b6c745ae1213b2 Ruidong Tian 2025-01-15  291  	if (regs.err_status & ERR_STATUS_AV)
b6c745ae1213b2 Ruidong Tian 2025-01-15  292  		regs.err_addr = record_read(record, ERXADDR);
b6c745ae1213b2 Ruidong Tian 2025-01-15  293  
b6c745ae1213b2 Ruidong Tian 2025-01-15  294  	regs.err_fr = record->fr;
b6c745ae1213b2 Ruidong Tian 2025-01-15  295  	regs.err_ctlr = record_read(record, ERXCTLR);
b6c745ae1213b2 Ruidong Tian 2025-01-15  296  
b6c745ae1213b2 Ruidong Tian 2025-01-15  297  	if (regs.err_status & ERR_STATUS_MV) {
b6c745ae1213b2 Ruidong Tian 2025-01-15  298  		regs.err_misc[0] = record_read(record, ERXMISC0);
b6c745ae1213b2 Ruidong Tian 2025-01-15  299  		regs.err_misc[1] = record_read(record, ERXMISC1);
b6c745ae1213b2 Ruidong Tian 2025-01-15  300  		if (record->node->version >= ID_AA64PFR0_EL1_RAS_V1P1) {
b6c745ae1213b2 Ruidong Tian 2025-01-15  301  			regs.err_misc[2] = record_read(record, ERXMISC2);
b6c745ae1213b2 Ruidong Tian 2025-01-15  302  			regs.err_misc[3] = record_read(record, ERXMISC3);
b6c745ae1213b2 Ruidong Tian 2025-01-15  303  		}
b6c745ae1213b2 Ruidong Tian 2025-01-15  304  
b6c745ae1213b2 Ruidong Tian 2025-01-15  305  		if (record->node->info->interface_hdr->flags &
b6c745ae1213b2 Ruidong Tian 2025-01-15  306  			AEST_XFACE_FLAG_CLEAR_MISC) {
b6c745ae1213b2 Ruidong Tian 2025-01-15  307  			record_write(record, ERXMISC0, 0);
b6c745ae1213b2 Ruidong Tian 2025-01-15  308  			record_write(record, ERXMISC1, 0);
b6c745ae1213b2 Ruidong Tian 2025-01-15  309  			if (record->node->version >= ID_AA64PFR0_EL1_RAS_V1P1) {
b6c745ae1213b2 Ruidong Tian 2025-01-15  310  				record_write(record, ERXMISC2, 0);
b6c745ae1213b2 Ruidong Tian 2025-01-15  311  				record_write(record, ERXMISC3, 0);
b6c745ae1213b2 Ruidong Tian 2025-01-15  312  			}
b6c745ae1213b2 Ruidong Tian 2025-01-15  313  		/* ce count is 0 if record do not support ce */
b6c745ae1213b2 Ruidong Tian 2025-01-15  314  		} else if (record->ce.count > 0)
b6c745ae1213b2 Ruidong Tian 2025-01-15  315  			record_write(record, ERXMISC0, record->ce.reg_val);
b6c745ae1213b2 Ruidong Tian 2025-01-15  316  	}
b6c745ae1213b2 Ruidong Tian 2025-01-15  317  
b6c745ae1213b2 Ruidong Tian 2025-01-15  318  	/* panic if unrecoverable and uncontainable error encountered */
b6c745ae1213b2 Ruidong Tian 2025-01-15  319  	if ((regs.err_status & ERR_STATUS_UE) &&
b6c745ae1213b2 Ruidong Tian 2025-01-15  320  		(regs.err_status & ERR_STATUS_UET) > ERR_STATUS_UET_UEU)
b6c745ae1213b2 Ruidong Tian 2025-01-15  321  		aest_panic(record, &regs, "AEST: unrecoverable error encountered");
b6c745ae1213b2 Ruidong Tian 2025-01-15  322  
b6c745ae1213b2 Ruidong Tian 2025-01-15  323  	aest_log(record, &regs);
b6c745ae1213b2 Ruidong Tian 2025-01-15  324  
b6c745ae1213b2 Ruidong Tian 2025-01-15  325  	/* Write-one-to-clear the bits we've seen */
b6c745ae1213b2 Ruidong Tian 2025-01-15  326  	regs.err_status &= ERR_STATUS_W1TC;
b6c745ae1213b2 Ruidong Tian 2025-01-15  327  
b6c745ae1213b2 Ruidong Tian 2025-01-15  328  	/* Multi bit filed need to write all-ones to clear. */
b6c745ae1213b2 Ruidong Tian 2025-01-15  329  	if (regs.err_status & ERR_STATUS_CE)
b6c745ae1213b2 Ruidong Tian 2025-01-15  330  		regs.err_status |= ERR_STATUS_CE;
b6c745ae1213b2 Ruidong Tian 2025-01-15  331  
b6c745ae1213b2 Ruidong Tian 2025-01-15  332  	/* Multi bit filed need to write all-ones to clear. */
b6c745ae1213b2 Ruidong Tian 2025-01-15  333  	if (regs.err_status & ERR_STATUS_UET)
b6c745ae1213b2 Ruidong Tian 2025-01-15  334  		regs.err_status |= ERR_STATUS_UET;
b6c745ae1213b2 Ruidong Tian 2025-01-15  335  
b6c745ae1213b2 Ruidong Tian 2025-01-15  336  	record_write(record, ERXSTATUS, regs.err_status);
b6c745ae1213b2 Ruidong Tian 2025-01-15  337  }
b6c745ae1213b2 Ruidong Tian 2025-01-15  338
diff mbox series

Patch

diff --git a/Documentation/ABI/testing/debugfs-aest b/Documentation/ABI/testing/debugfs-aest
index 39d9c85843ef..4d3f4464cf98 100644
--- a/Documentation/ABI/testing/debugfs-aest
+++ b/Documentation/ABI/testing/debugfs-aest
@@ -96,3 +96,20 @@  KernelVersion	6.10
 Contact:	Ruidong Tian <tianruidong@linux.alibaba.com>
 Description:
 		(RO) Outputs error statistics for all this records.
+
+What:		/sys/devices/platform/AEST.<UID>/<Nome_name>/inject/err_*
+Date:		June 2024
+KernelVersion	6.10
+Contact:	Ruidong Tian <tianruidong@linux.alibaba.com>
+Description:
+		(RW) Write any integer to this file to trigger the error
+		injection. Make sure you have specified all necessary error
+		parameters, i.e. this write should be the last step when
+		injecting errors.
+
+		Accepts values -  -1 or n ( 0 <= n < <record_count>).
+		-1 : If you write -1, make sure you specified all err_* file,
+		     driver will use these err_* value to proce AEST error.
+		n : Driver will read record<n> of this error node to collect
+		    error register value, and use these values to proce AEST
+		    error.
diff --git a/drivers/ras/aest/Makefile b/drivers/ras/aest/Makefile
index 75495413d2b6..5ee10fc8b2e9 100644
--- a/drivers/ras/aest/Makefile
+++ b/drivers/ras/aest/Makefile
@@ -4,3 +4,4 @@  obj-$(CONFIG_AEST) 	+= aest.o
 
 aest-y		:= aest-core.o
 aest-y		+= aest-sysfs.o
+aest-y		+= aest-inject.o
diff --git a/drivers/ras/aest/aest-inject.c b/drivers/ras/aest/aest-inject.c
new file mode 100644
index 000000000000..2ca074aa021c
--- /dev/null
+++ b/drivers/ras/aest/aest-inject.c
@@ -0,0 +1,151 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM Error Source Table Support
+ *
+ * Copyright (c) 2024, Alibaba Group.
+ */
+
+#include "aest.h"
+
+static struct ras_ext_regs regs_inj;
+static u64 hard_inject_val;
+
+struct inj_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct aest_node *n, struct inj_attr *a, char *b);
+	ssize_t (*store)(struct aest_node *n, struct inj_attr *a, const char *b,
+				size_t c);
+};
+
+struct aest_inject {
+	struct aest_node *node;
+	struct kobject kobj;
+};
+
+#define to_inj(k)	container_of(k, struct aest_inject, kobj)
+#define to_inj_attr(a)	container_of(a, struct inj_attr, attr)
+
+static u64 aest_sysreg_read_inject(void *__unused, u32 offset)
+{
+	u64 *p = (u64 *)&regs_inj;
+
+	return p[offset/8];
+}
+
+static void aest_sysreg_write_inject(void *base, u32 offset, u64 val)
+{
+	u64 *p = (u64 *)&regs_inj;
+
+	p[offset/8] = val;
+}
+
+static u64 aest_iomem_read_inject(void *base, u32 offset)
+{
+	u64 *p = (u64 *)&regs_inj;
+
+	return p[offset/8];
+}
+
+static void aest_iomem_write_inject(void *base, u32 offset, u64 val)
+{
+	u64 *p = (u64 *)&regs_inj;
+
+	p[offset/8] = val;
+}
+
+static struct aest_access aest_access_inject[] = {
+	[ACPI_AEST_NODE_SYSTEM_REGISTER] = {
+		.read = aest_sysreg_read_inject,
+		.write = aest_sysreg_write_inject,
+	},
+
+	[ACPI_AEST_NODE_MEMORY_MAPPED] = {
+		.read = aest_iomem_read_inject,
+		.write = aest_iomem_write_inject,
+	},
+	[ACPI_AEST_NODE_SINGLE_RECORD_MEMORY_MAPPED] = {
+		.read = aest_iomem_read_inject,
+		.write = aest_iomem_write_inject,
+	},
+	{ }
+};
+
+static int inject_store(void *data, u64 val)
+{
+	int i = val, count = 0;
+	struct aest_record record_inj, *record;
+	struct aest_node node_inj, *node = data;
+
+	if (i > (int)node->info->interface_hdr->error_record_count)
+		return -EINVAL;
+
+	memcpy(&node_inj, node, sizeof(*node));
+	node_inj.name = "AEST-injection";
+
+	record_inj.access = &aest_access_inject[node->info->interface_hdr->type];
+	record_inj.node = &node_inj;
+	record_inj.index = i;
+	if (i >= 0) {
+		record = &node->records[i];
+		regs_inj.err_fr = record_read(record, ERXFR);
+		regs_inj.err_ctlr = record_read(record, ERXCTLR);
+		regs_inj.err_status = record_read(record, ERXSTATUS);
+		regs_inj.err_addr = record_read(record, ERXADDR);
+		regs_inj.err_misc[0] = record_read(record, ERXMISC0);
+		regs_inj.err_misc[1] = record_read(record, ERXMISC1);
+		regs_inj.err_misc[2] = record_read(record, ERXMISC2);
+		regs_inj.err_misc[3] = record_read(record, ERXMISC3);
+	}
+
+	regs_inj.err_status |= ERR_STATUS_V;
+
+	aest_proc_record(&record_inj, &count);
+
+	if (count != 1)
+		return -EIO;
+
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(inject_ops, NULL, inject_store, "%llu\n");
+
+static int hard_inject_store(void *data, u64 val)
+{
+	struct aest_node *node = data;
+
+	if (!node->inj)
+		return -EPERM;
+
+	if (val > node->record_count)
+		return -ENODEV;
+
+	if (node->type == ACPI_AEST_PROCESSOR_ERROR_NODE) {
+		aest_select_record(node, val);
+		write_sysreg_s(hard_inject_val, SYS_ERXPFGCTL_EL1);
+		write_sysreg_s(0x100, SYS_ERXPFGCDN_EL1);
+		aest_sync(node);
+	} else
+		writeq_relaxed(hard_inject_val, node->inj + val * 8);
+
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(hard_inject_ops, NULL, hard_inject_store, "%llu\n");
+
+void aest_inject_init_debugfs(struct aest_node *node)
+{
+	struct dentry *inj;
+
+	inj = debugfs_create_dir("inject", node->debugfs);
+
+	debugfs_create_u64("err_fr", 0400, inj, &regs_inj.err_fr);
+	debugfs_create_u64("err_ctrl", 0400, inj, &regs_inj.err_ctlr);
+	debugfs_create_u64("err_status", 0400, inj, &regs_inj.err_status);
+	debugfs_create_u64("err_addr", 0400, inj, &regs_inj.err_addr);
+	debugfs_create_u64("err_misc0", 0400, inj, &regs_inj.err_misc[0]);
+	debugfs_create_u64("err_misc1", 0400, inj, &regs_inj.err_misc[1]);
+	debugfs_create_u64("err_misc2", 0400, inj, &regs_inj.err_misc[2]);
+	debugfs_create_u64("err_misc3", 0400, inj, &regs_inj.err_misc[3]);
+	debugfs_create_file("inject", 0400, inj, node, &inject_ops);
+
+	debugfs_create_file("hard_inject", 0600, inj, node, &hard_inject_ops);
+	debugfs_create_u64("hard_inject_val", 0600, inj, &hard_inject_val);
+}
diff --git a/drivers/ras/aest/aest-sysfs.c b/drivers/ras/aest/aest-sysfs.c
index f19cd2b5edb2..ba913556fc03 100644
--- a/drivers/ras/aest/aest-sysfs.c
+++ b/drivers/ras/aest/aest-sysfs.c
@@ -192,8 +192,8 @@  aest_oncore_dev_init_debugfs(struct aest_device *adev)
 	for_each_possible_cpu(cpu) {
 		percpu_dev = this_cpu_ptr(adev->adev_oncore);
 
-		snprintf(name, sizeof(name), "processor%u", cpu);
-		percpu_dev->debugfs = debugfs_create_dir(name, aest_debugfs);
+		snprintf(name, sizeof(name), "CPU%u", cpu);
+		percpu_dev->debugfs = debugfs_create_dir(name, adev->debugfs);
 
 		for (i = 0; i < adev->node_cnt; i++) {
 			node = &adev->nodes[i];
@@ -210,6 +210,9 @@  void aest_dev_init_debugfs(struct aest_device *adev)
 	int i;
 	struct aest_node *node;
 
+	if (!aest_debugfs)
+		dev_err(adev->dev, "debugfs not enabled\n");
+
 	adev->debugfs = debugfs_create_dir(dev_name(adev->dev), aest_debugfs);
 	if (aest_dev_is_oncore(adev)) {
 		aest_oncore_dev_init_debugfs(adev);
@@ -222,5 +225,6 @@  void aest_dev_init_debugfs(struct aest_device *adev)
 			continue;
 		node->debugfs = debugfs_create_dir(node->name, adev->debugfs);
 		aest_node_init_debugfs(node);
+		aest_inject_init_debugfs(node);
 	}
 }
diff --git a/drivers/ras/aest/aest.h b/drivers/ras/aest/aest.h
index d9a52e39b1b9..90a96e2666d3 100644
--- a/drivers/ras/aest/aest.h
+++ b/drivers/ras/aest/aest.h
@@ -334,3 +334,5 @@  aest_set_name(struct aest_device *adev, struct aest_hnode *ahnode)
 }
 
 void aest_dev_init_debugfs(struct aest_device *adev);
+void aest_inject_init_debugfs(struct aest_node *node);
+void aest_proc_record(struct aest_record *record, void *data);