diff mbox series

[v10,01/11] EDAC: Add generic EDAC RAS control feature driver

Message ID 20240726160556.2079-2-shiju.jose@huawei.com
State New
Headers show
Series EDAC: Scrub: introduce generic EDAC RAS control feature driver + CXL/ACPI-RAS2 drivers | expand

Commit Message

Shiju Jose July 26, 2024, 4:05 p.m. UTC
From: Shiju Jose <shiju.jose@huawei.com>

Add generic EDAC driver supports registering RAS features supported
in the system. The driver exposes feature's control attributes to the
userspace in /sys/bus/edac/devices/<dev-name>/<ras-feature>/

Co-developed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
 drivers/edac/Makefile            |   1 +
 drivers/edac/edac_ras_feature.c  | 181 +++++++++++++++++++++++++++++++
 include/linux/edac_ras_feature.h |  66 +++++++++++
 3 files changed, 248 insertions(+)
 create mode 100755 drivers/edac/edac_ras_feature.c
 create mode 100755 include/linux/edac_ras_feature.h

Comments

Borislav Petkov July 30, 2024, 1:16 p.m. UTC | #1
On Fri, Jul 26, 2024 at 05:05:45PM +0100, shiju.jose@huawei.com wrote:
> From: Shiju Jose <shiju.jose@huawei.com>
> 
> Add generic EDAC driver supports registering RAS features supported
> in the system. The driver exposes feature's control attributes to the
> userspace in /sys/bus/edac/devices/<dev-name>/<ras-feature>/
> 
> Co-developed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
> ---
>  drivers/edac/Makefile            |   1 +
>  drivers/edac/edac_ras_feature.c  | 181 +++++++++++++++++++++++++++++++
>  include/linux/edac_ras_feature.h |  66 +++++++++++
>  3 files changed, 248 insertions(+)
>  create mode 100755 drivers/edac/edac_ras_feature.c
>  create mode 100755 include/linux/edac_ras_feature.h
> 
> diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
> index 9c09893695b7..c532b57a6d8a 100644
> --- a/drivers/edac/Makefile
> +++ b/drivers/edac/Makefile
> @@ -10,6 +10,7 @@ obj-$(CONFIG_EDAC)			:= edac_core.o
>  
>  edac_core-y	:= edac_mc.o edac_device.o edac_mc_sysfs.o
>  edac_core-y	+= edac_module.o edac_device_sysfs.o wq.o
> +edac_core-y	+= edac_ras_feature.o

EDAC and RAS and feature?!

Oh boy.

EDAC == RAS.

"feature" is silly.

Looking at the code below, you're registering an EDAC device.
- edac_ras_dev_register().

So why isn't this thing in edac_device.c?

> diff --git a/include/linux/edac_ras_feature.h b/include/linux/edac_ras_feature.h
> new file mode 100755
> index 000000000000..8f0e0c47a617
> --- /dev/null
> +++ b/include/linux/edac_ras_feature.h
> @@ -0,0 +1,66 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * EDAC RAS control features.
> + *
> + * Copyright (c) 2024 HiSilicon Limited.
> + */
> +
> +#ifndef __EDAC_RAS_FEAT_H
> +#define __EDAC_RAS_FEAT_H
> +
> +#include <linux/types.h>
> +#include <linux/edac.h>
> +
> +#define EDAC_RAS_NAME_LEN	128
> +
> +enum edac_ras_feat {
> +	RAS_FEAT_SCRUB,
> +	RAS_FEAT_ECS,
> +	RAS_FEAT_MAX
> +};
> +
> +struct edac_ecs_ex_info {
> +	u16 num_media_frus;
> +};
> +
> +/*
> + * EDAC RAS feature information structure
> + */
> +struct edac_scrub_data {
> +	const struct edac_scrub_ops *ops;
> +	void *private;
> +};
> +
> +struct edac_ecs_data {
> +	const struct edac_ecs_ops *ops;
> +	void *private;
> +};

So each "feature" would require a separate struct type?

Why don't you define a *single* struct which accomodates any RAS
functionality?

Thx.
Shiju Jose July 30, 2024, 5:01 p.m. UTC | #2
>-----Original Message-----
>From: Borislav Petkov <bp@alien8.de>
>Sent: 30 July 2024 14:16
>To: Shiju Jose <shiju.jose@huawei.com>
>Cc: linux-edac@vger.kernel.org; linux-cxl@vger.kernel.org; linux-
>acpi@vger.kernel.org; linux-mm@kvack.org; linux-kernel@vger.kernel.org;
>tony.luck@intel.com; rafael@kernel.org; lenb@kernel.org;
>mchehab@kernel.org; dan.j.williams@intel.com; dave@stgolabs.net; Jonathan
>Cameron <jonathan.cameron@huawei.com>; dave.jiang@intel.com;
>alison.schofield@intel.com; vishal.l.verma@intel.com; ira.weiny@intel.com;
>david@redhat.com; Vilas.Sridharan@amd.com; leo.duran@amd.com;
>Yazen.Ghannam@amd.com; rientjes@google.com; jiaqiyan@google.com;
>Jon.Grimm@amd.com; dave.hansen@linux.intel.com;
>naoya.horiguchi@nec.com; james.morse@arm.com; jthoughton@google.com;
>somasundaram.a@hpe.com; erdemaktas@google.com; pgonda@google.com;
>duenwen@google.com; mike.malvestuto@intel.com; gthelen@google.com;
>wschwartz@amperecomputing.com; dferguson@amperecomputing.com;
>wbs@os.amperecomputing.com; nifan.cxl@gmail.com; tanxiaofei
><tanxiaofei@huawei.com>; Zengtao (B) <prime.zeng@hisilicon.com>; Roberto
>Sassu <roberto.sassu@huawei.com>; kangkang.shen@futurewei.com;
>wanghuiqiang <wanghuiqiang@huawei.com>; Linuxarm
><linuxarm@huawei.com>
>Subject: Re: [PATCH v10 01/11] EDAC: Add generic EDAC RAS control feature
>driver
>
>On Fri, Jul 26, 2024 at 05:05:45PM +0100, shiju.jose@huawei.com wrote:
>> From: Shiju Jose <shiju.jose@huawei.com>
>>
>> Add generic EDAC driver supports registering RAS features supported in
>> the system. The driver exposes feature's control attributes to the
>> userspace in /sys/bus/edac/devices/<dev-name>/<ras-feature>/
>>
>> Co-developed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>> Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
>> ---
>>  drivers/edac/Makefile            |   1 +
>>  drivers/edac/edac_ras_feature.c  | 181
>> +++++++++++++++++++++++++++++++  include/linux/edac_ras_feature.h |
>> 66 +++++++++++
>>  3 files changed, 248 insertions(+)
>>  create mode 100755 drivers/edac/edac_ras_feature.c  create mode
>> 100755 include/linux/edac_ras_feature.h
>>
>> diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index
>> 9c09893695b7..c532b57a6d8a 100644
>> --- a/drivers/edac/Makefile
>> +++ b/drivers/edac/Makefile
>> @@ -10,6 +10,7 @@ obj-$(CONFIG_EDAC)			:= edac_core.o
>>
>>  edac_core-y	:= edac_mc.o edac_device.o edac_mc_sysfs.o
>>  edac_core-y	+= edac_module.o edac_device_sysfs.o wq.o
>> +edac_core-y	+= edac_ras_feature.o
>
>EDAC and RAS and feature?!
>
>Oh boy.
>
>EDAC == RAS.
>
>"feature" is silly.
>
>Looking at the code below, you're registering an EDAC device.
>- edac_ras_dev_register().
>
>So why isn't this thing in edac_device.c?
Sure. Then can I add definitions in edac_ras_feature.h to /linux/edac.h?    

>
>> diff --git a/include/linux/edac_ras_feature.h
>> b/include/linux/edac_ras_feature.h
>> new file mode 100755
>> index 000000000000..8f0e0c47a617
>> --- /dev/null
>> +++ b/include/linux/edac_ras_feature.h
>> @@ -0,0 +1,66 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + * EDAC RAS control features.
>> + *
>> + * Copyright (c) 2024 HiSilicon Limited.
>> + */
>> +
>> +#ifndef __EDAC_RAS_FEAT_H
>> +#define __EDAC_RAS_FEAT_H
>> +
>> +#include <linux/types.h>
>> +#include <linux/edac.h>
>> +
>> +#define EDAC_RAS_NAME_LEN	128
>> +
>> +enum edac_ras_feat {
>> +	RAS_FEAT_SCRUB,
>> +	RAS_FEAT_ECS,
>> +	RAS_FEAT_MAX
>> +};
>> +
>> +struct edac_ecs_ex_info {
>> +	u16 num_media_frus;
>> +};
>> +
>> +/*
>> + * EDAC RAS feature information structure  */ struct edac_scrub_data
>> +{
>> +	const struct edac_scrub_ops *ops;
>> +	void *private;
>> +};
>> +
>> +struct edac_ecs_data {
>> +	const struct edac_ecs_ops *ops;
>> +	void *private;
>> +};
>
>So each "feature" would require a separate struct type?
>
>Why don't you define a *single* struct which accomodates any RAS
>functionality?
Done.

>
>Thx.
>
>--
>Regards/Gruss,
>    Boris.
>
>https://people.kernel.org/tglx/notes-about-netiquette
>

Thanks,
Shiju
diff mbox series

Patch

diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 9c09893695b7..c532b57a6d8a 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -10,6 +10,7 @@  obj-$(CONFIG_EDAC)			:= edac_core.o
 
 edac_core-y	:= edac_mc.o edac_device.o edac_mc_sysfs.o
 edac_core-y	+= edac_module.o edac_device_sysfs.o wq.o
+edac_core-y	+= edac_ras_feature.o
 
 edac_core-$(CONFIG_EDAC_DEBUG)		+= debugfs.o
 
diff --git a/drivers/edac/edac_ras_feature.c b/drivers/edac/edac_ras_feature.c
new file mode 100755
index 000000000000..dca19fd511b5
--- /dev/null
+++ b/drivers/edac/edac_ras_feature.c
@@ -0,0 +1,181 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * EDAC RAS control feature driver supports registering RAS
+ * features with the EDAC and exposes the feature's control
+ * attributes to the userspace in sysfs.
+ *
+ * Copyright (c) 2024 HiSilicon Limited.
+ */
+
+#define pr_fmt(fmt)     "EDAC RAS FEAT: " fmt
+
+#include <linux/edac_ras_feature.h>
+
+static void edac_ras_dev_release(struct device *dev)
+{
+	struct edac_ras_feat_ctx *ctx =
+		container_of(dev, struct edac_ras_feat_ctx, dev);
+
+	kfree(ctx->dev.groups);
+	kfree(ctx);
+}
+
+const struct device_type edac_ras_dev_type = {
+	.name = "edac_ras_dev",
+	.release = edac_ras_dev_release,
+};
+
+static void edac_ras_dev_unreg(void *data)
+{
+	device_unregister(data);
+}
+
+/**
+ * edac_ras_feat_scrub_init - Init ras scrub feature
+ * @parent: client device.
+ * @sdata: pointer to struct edac_scrub_data.
+ * @sfeat: pointer to struct edac_ras_feature for scrub.
+ * @attr_groups: pointer to attribute group's container.
+ *
+ * Returns number of scrub feature's attribute groups on success,
+ * error otherwise.
+ */
+static int edac_ras_feat_scrub_init(struct device *parent,
+				    struct edac_scrub_data *sdata,
+				    const struct edac_ras_feature *sfeat,
+				    const struct attribute_group **attr_groups)
+{
+	sdata->ops = sfeat->scrub_ops;
+	sdata->private = sfeat->scrub_ctx;
+
+	return 1;
+}
+
+/**
+ * edac_ras_feat_ecs_init - Init ras ecs feature
+ * @parent: client device.
+ * @edata: pointer to struct edac_ecs_data.
+ * @efeat: pointer to struct edac_ras_feature for ecs.
+ * @attr_groups: pointer to attribute group's container.
+ *
+ * Returns number of ecs feature's attribute groups on success,
+ * error otherwise.
+ */
+static int edac_ras_feat_ecs_init(struct device *parent,
+				  struct edac_ecs_data *edata,
+				  const struct edac_ras_feature *efeat,
+				  const struct attribute_group **attr_groups)
+{
+	int num = efeat->ecs_info.num_media_frus;
+
+	edata->ops = efeat->ecs_ops;
+	edata->private = efeat->ecs_ctx;
+
+	return num;
+}
+
+/**
+ * edac_ras_dev_register - register device for ras features with edac
+ * @parent: client device.
+ * @name: client device's name.
+ * @private: parent driver's data to store in the context if any.
+ * @num_features: number of ras features to register.
+ * @ras_features: list of ras features to register.
+ *
+ * Returns 0 on success, error otherwise.
+ * The new edac_ras_feat_ctx would be freed automatically.
+ */
+int edac_ras_dev_register(struct device *parent, char *name,
+			  void *private, int num_features,
+			  const struct edac_ras_feature *ras_features)
+{
+	const struct attribute_group **ras_attr_groups;
+	struct edac_ras_feat_ctx *ctx;
+	int attr_gcnt = 0;
+	int ret, feat;
+
+	if (!parent || !name || !num_features || !ras_features)
+		return -EINVAL;
+
+	/* Double parse so we can make space for attributes */
+	for (feat = 0; feat < num_features; feat++) {
+		switch (ras_features[feat].feat) {
+		case RAS_FEAT_SCRUB:
+			attr_gcnt++;
+			break;
+		case RAS_FEAT_ECS:
+			attr_gcnt += ras_features[feat].ecs_info.num_media_frus;
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->dev.parent = parent;
+	ctx->private = private;
+
+	ras_attr_groups = kcalloc(attr_gcnt + 1, sizeof(*ras_attr_groups), GFP_KERNEL);
+	if (!ras_attr_groups) {
+		ret = -ENOMEM;
+		goto ctx_free;
+	}
+
+	attr_gcnt = 0;
+	for (feat = 0; feat < num_features; feat++, ras_features++) {
+		switch (ras_features->feat) {
+		case RAS_FEAT_SCRUB:
+			if (!ras_features->scrub_ops)
+				continue;
+			ret = edac_ras_feat_scrub_init(parent, &ctx->scrub,
+						       ras_features,
+						       &ras_attr_groups[attr_gcnt]);
+			if (ret < 0)
+				goto groups_free;
+
+			attr_gcnt += ret;
+			break;
+		case RAS_FEAT_ECS:
+			if (!ras_features->ecs_ops)
+				continue;
+			ret = edac_ras_feat_ecs_init(parent, &ctx->ecs,
+						     ras_features,
+						     &ras_attr_groups[attr_gcnt]);
+			if (ret < 0)
+				goto groups_free;
+
+			attr_gcnt += ret;
+			break;
+		default:
+			ret = -EINVAL;
+			goto groups_free;
+		}
+	}
+	ras_attr_groups[attr_gcnt] = NULL;
+	ctx->dev.bus = edac_get_sysfs_subsys();
+	ctx->dev.type = &edac_ras_dev_type;
+	ctx->dev.groups = ras_attr_groups;
+	dev_set_drvdata(&ctx->dev, ctx);
+	ret = dev_set_name(&ctx->dev, name);
+	if (ret)
+		goto groups_free;
+
+	ret = device_register(&ctx->dev);
+	if (ret) {
+		put_device(&ctx->dev);
+		goto groups_free;
+		return ret;
+	}
+
+	return devm_add_action_or_reset(parent, edac_ras_dev_unreg, &ctx->dev);
+
+groups_free:
+	kfree(ras_attr_groups);
+ctx_free:
+	kfree(ctx);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(edac_ras_dev_register);
diff --git a/include/linux/edac_ras_feature.h b/include/linux/edac_ras_feature.h
new file mode 100755
index 000000000000..8f0e0c47a617
--- /dev/null
+++ b/include/linux/edac_ras_feature.h
@@ -0,0 +1,66 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * EDAC RAS control features.
+ *
+ * Copyright (c) 2024 HiSilicon Limited.
+ */
+
+#ifndef __EDAC_RAS_FEAT_H
+#define __EDAC_RAS_FEAT_H
+
+#include <linux/types.h>
+#include <linux/edac.h>
+
+#define EDAC_RAS_NAME_LEN	128
+
+enum edac_ras_feat {
+	RAS_FEAT_SCRUB,
+	RAS_FEAT_ECS,
+	RAS_FEAT_MAX
+};
+
+struct edac_ecs_ex_info {
+	u16 num_media_frus;
+};
+
+/*
+ * EDAC RAS feature information structure
+ */
+struct edac_scrub_data {
+	const struct edac_scrub_ops *ops;
+	void *private;
+};
+
+struct edac_ecs_data {
+	const struct edac_ecs_ops *ops;
+	void *private;
+};
+
+struct device;
+
+struct edac_ras_feat_ctx {
+	struct device dev;
+	void *private;
+	struct edac_scrub_data scrub;
+	struct edac_ecs_data ecs;
+};
+
+struct edac_ras_feature {
+	enum edac_ras_feat feat;
+	union {
+		const struct edac_scrub_ops *scrub_ops;
+		const struct edac_ecs_ops *ecs_ops;
+	};
+	union {
+		void *scrub_ctx;
+		void *ecs_ctx;
+	};
+	union {
+		struct edac_ecs_ex_info ecs_info;
+	};
+};
+
+int edac_ras_dev_register(struct device *parent, char *dev_name,
+			  void *parent_pvt_data, int num_features,
+			  const struct edac_ras_feature *ras_features);
+#endif /* __EDAC_RAS_FEAT_H */