diff mbox series

[v7,1/5] cxl/port: Add EINJ debugfs files and callback support

Message ID 20231128160656.166609-2-Benjamin.Cheatham@amd.com
State New
Headers show
Series CXL, ACPI, APEI, EINJ: Update EINJ for CXL error types | expand

Commit Message

Ben Cheatham Nov. 28, 2023, 4:06 p.m. UTC
Add creation of debugfs directories for ports and dports under
/sys/kernel/debug/cxl when EINJ support is enabled. The dport
directories will contain files for injecting CXL protocol errors.
These files are only usable once the EINJ module has loaded and
registered callback functions with the CXL core module, before that
occurs (or if the EINJ module isn't loaded) the files will do nothing
and return an ENODEV error.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com>
---
 Documentation/ABI/testing/debugfs-cxl | 27 +++++++++
 drivers/cxl/core/port.c               | 84 +++++++++++++++++++++++++++
 drivers/cxl/cxl.h                     | 10 ++++
 3 files changed, 121 insertions(+)

Comments

Dan Williams Dec. 7, 2023, 11:13 p.m. UTC | #1
Ben Cheatham wrote:
> Add creation of debugfs directories for ports and dports under
> /sys/kernel/debug/cxl when EINJ support is enabled. The dport
> directories will contain files for injecting CXL protocol errors.
> These files are only usable once the EINJ module has loaded and
> registered callback functions with the CXL core module, before that
> occurs (or if the EINJ module isn't loaded) the files will do nothing
> and return an ENODEV error.
> 
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com>
> ---
>  Documentation/ABI/testing/debugfs-cxl | 27 +++++++++
>  drivers/cxl/core/port.c               | 84 +++++++++++++++++++++++++++
>  drivers/cxl/cxl.h                     | 10 ++++
>  3 files changed, 121 insertions(+)
> 
> diff --git a/Documentation/ABI/testing/debugfs-cxl b/Documentation/ABI/testing/debugfs-cxl
> index fe61d372e3fa..782a1bb78884 100644
> --- a/Documentation/ABI/testing/debugfs-cxl
> +++ b/Documentation/ABI/testing/debugfs-cxl
> @@ -33,3 +33,30 @@ Description:
>  		device cannot clear poison from the address, -ENXIO is returned.
>  		The clear_poison attribute is only visible for devices
>  		supporting the capability.
> +
> +What:		/sys/kernel/debug/cxl/portX/dportY/einj_types

Given this file is identical contents for all dports it only needs to
exist in one common location

/sys/kernel/debug/cxl/einj_types


> +Date:		November, 2023
> +KernelVersion:	v6.8
> +Contact:	linux-cxl@vger.kernel.org
> +Description:
> +		(RO) Prints the CXL protocol error types made available by
> +		the platform in the format "0x<error number>	<error type>".
> +		The <error number> can be written to einj_inject to inject
> +		<error type> into dportY. This file is only visible if
> +		CONFIG_ACPI_APEI_EINJ is enabled, and the EINJ module must
> +		be able to reach one (or both) of the CXL_ACPI or CXL_PORT
> +		modules to be functional.

This can be simplified. Have something like:

config CXL_EINJ
	default CXL_BUS
	depends on ACPI_APEI_EINJ && ACPI_APEI_EINJ=CXL_BUS
	...

Then the documentation moves to Kconfig for how to enable this and the
CXL code can directly call into the EINJ module without worry.

It would of course need stubs like these in a shared header:

#ifdef CONFIG_CXL_EINJ
int cxl_einj_available_error_type(struct seq_file *m, void *v);
int cxl_einj_inject_error(struct cxl_dport *dport, u64 type);
#else
static inline int cxl_einj_available_error_type(struct seq_file *m, void *v)
{
	return -ENXIO;
}

int cxl_einj_inject_error(struct cxl_dport *dport, u64 type)
{
	return -ENXIO;
}
#endif

> +
> +What:		/sys/kernel/debug/cxl/portX/dportY/einj_inject

See my comments on cxl_debugfs_create_dport_dir() later on, but I think
the "portX" directory can be eliminated.

> +Date:		November, 2023
> +KernelVersion:	v6.8
> +Contact:	linux-cxl@vger.kernel.org
> +Description:
> +		(WO) Writing an integer to this file injects the corresponding
> +		CXL protocol error into dportY (integer to type mapping is
> +		available by reading from einj_types). If the dport was
> +		enumerated in RCH mode, a CXL 1.1 error is injected, otherwise
> +		a CXL 2.0 error is injected. This file is only visible if
> +		CONFIG_ACPI_APEI_EINJ is enabled, and the EINJ module must
> +		be able to reach one (or both) of the CXL_ACPI or CXL_PORT
> +		modules to be functional.

Similar comments about dropping these details that can just be solved in
Kconfig.

This is next comment is on EINJ ABI, but you can skip it just to
maintain momentum with the status quo. Why require the user to do the
string to integer conversion? Seems like a small matter of programming
to allow:

echo "CXL.cache Protocol Correctable" > einj_inject

...to do the right thing. That probably makes scripts more readable as
well.

> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
> index 38441634e4c6..acf10415a174 100644
> --- a/drivers/cxl/core/port.c
> +++ b/drivers/cxl/core/port.c
> @@ -783,6 +783,72 @@ static int cxl_dport_setup_regs(struct device *host, struct cxl_dport *dport,
>  	return rc;
>  }
>  
> +static struct cxl_einj_ops einj_ops;
> +void cxl_einj_set_ops_cbs(struct cxl_einj_ops *ops)
> +{
> +	if (!IS_REACHABLE(CONFIG_ACPI_APEI_EINJ) || !ops)
> +		return;
> +
> +	einj_ops = *ops;
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_einj_set_ops_cbs, CXL);

einj_ops goes away when the CXL code can just call the EINJ module
directly.

> +
> +static int cxl_einj_type_show(struct seq_file *f, void *data)
> +{
> +	if (!einj_ops.einj_type)
> +		return -ENODEV;
> +
> +	return einj_ops.einj_type(f, data);
> +}
> +
> +static int cxl_einj_inject(void *data, u64 type)
> +{
> +	struct cxl_dport *dport = data;
> +
> +	if (!einj_ops.einj_inject)
> +		return -ENODEV;
> +
> +	return einj_ops.einj_inject(dport, type);
> +}
> +DEFINE_DEBUGFS_ATTRIBUTE(cxl_einj_inject_fops, NULL, cxl_einj_inject, "%llx\n");

The wrappers go away and DEFINE_DEBUGFS_ATTRIBUTE() can reference the
EINJ symbols directly.

> +
> +static int cxl_debugfs_create_dport_dir(struct dentry *port_dir,
> +						   struct cxl_dport *dport)
> +{
> +	struct dentry *dir;
> +	char dir_name[32];
> +
> +	snprintf(dir_name, 31, "dport%d", dport->port_id);

How about dev_name(dport->dport_dev) rather than the dynamic name?

The other benefit of this is that the dport_dev names are unique, so you
can move the einj_inject file to:

/sys/kernel/debug/cxl/$dport_dev/einj_inject

> +	dir = debugfs_create_dir(dir_name, port_dir);
> +	if (IS_ERR(dir))
> +		return PTR_ERR(dir);
> +
> +	debugfs_create_devm_seqfile(dport->dport_dev, "einj_types", dir,
> +				    cxl_einj_type_show);

Per above, move this to be a top-level file.

> +
> +	debugfs_create_file("einj_inject", 0200, dir, dport,
> +			    &cxl_einj_inject_fops);
> +	return 0;

debugfs is good about failing gracefully when pre-requisites are not
present. This is why none of the debugfs creation helpers have return
codes because failing to setup debugfs is never fatal.

In other words, it is ok to take the output of debugfs_create_dir()
without checking, and this function should not be returning an error.

> +}
> +
> +static struct dentry *cxl_debugfs_create_port_dir(struct cxl_port *port)
> +{
> +	const char *dir_name = dev_name(&port->dev);
> +	struct dentry *dir;
> +
> +	if (!IS_ENABLED(CONFIG_ACPI_APEI_EINJ))
> +		return ERR_PTR(-ENODEV);
> +
> +	dir = cxl_debugfs_create_dir(dir_name);
> +	if (IS_ERR(dir)) {
> +		dev_dbg(&port->dev, "Failed to create port debugfs dir: %ld\n",
> +			PTR_ERR(dir));
> +		return dir;
> +	}
> +
> +	return dir;
> +}
> +
>  static struct cxl_port *__devm_cxl_add_port(struct device *host,
>  					    struct device *uport_dev,
>  					    resource_size_t component_reg_phys,
> @@ -861,6 +927,7 @@ struct cxl_port *devm_cxl_add_port(struct device *host,
>  				   struct cxl_dport *parent_dport)
>  {
>  	struct cxl_port *port, *parent_port;
> +	struct dentry *dir;
>  
>  	port = __devm_cxl_add_port(host, uport_dev, component_reg_phys,
>  				   parent_dport);
> @@ -878,6 +945,10 @@ struct cxl_port *devm_cxl_add_port(struct device *host,
>  			parent_port ? " to " : "",
>  			parent_port ? dev_name(&parent_port->dev) : "",
>  			parent_port ? "" : " (root port)");
> +
> +		dir = cxl_debugfs_create_port_dir(port);
> +		if (!IS_ERR(dir))
> +			port->debug_dir = dir;
>  	}
>  
>  	return port;
> @@ -1127,6 +1198,7 @@ struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port,
>  				     resource_size_t component_reg_phys)
>  {
>  	struct cxl_dport *dport;
> +	int rc;
>  
>  	dport = __devm_cxl_add_dport(port, dport_dev, port_id,
>  				     component_reg_phys, CXL_RESOURCE_NONE);
> @@ -1136,6 +1208,11 @@ struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port,
>  	} else {
>  		dev_dbg(dport_dev, "dport added to %s\n",
>  			dev_name(&port->dev));
> +
> +		rc = cxl_debugfs_create_dport_dir(port->debug_dir, dport);
> +		if (rc)
> +			dev_dbg(dport_dev,
> +				"Failed to create dport debugfs dir: %d\n", rc);

Drop the debug messages about failing to setup debugfs. This follows the
lead of other debugfs setup in CXL.

>  	}
>  
>  	return dport;
> @@ -1156,6 +1233,7 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
>  					 resource_size_t rcrb)
>  {
>  	struct cxl_dport *dport;
> +	int rc;
>  
>  	if (rcrb == CXL_RESOURCE_NONE) {
>  		dev_dbg(&port->dev, "failed to add RCH dport, missing RCRB\n");
> @@ -1170,6 +1248,12 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
>  	} else {
>  		dev_dbg(dport_dev, "RCH dport added to %s\n",
>  			dev_name(&port->dev));
> +
> +		rc = cxl_debugfs_create_dport_dir(port->debug_dir, dport);
> +		if (rc)
> +			dev_dbg(dport_dev,
> +				"Failed to create rch dport debugfs dir: %d\n",
> +				rc);
>  	}
>  
>  	return dport;
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index 687043ece101..3c7744fc3106 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -590,6 +590,7 @@ struct cxl_dax_region {
>   * @depth: How deep this port is relative to the root. depth 0 is the root.
>   * @cdat: Cached CDAT data
>   * @cdat_available: Should a CDAT attribute be available in sysfs
> + * @debug_dir: dentry for port in cxl debugfs (optional)
>   */
>  struct cxl_port {
>  	struct device dev;
> @@ -612,6 +613,7 @@ struct cxl_port {
>  		size_t length;
>  	} cdat;
>  	bool cdat_available;
> +	struct dentry *debug_dir;

Part of why I asked for the debugfs file rename was to eliminate this
wart on the data structure.

>  };
>  
>  static inline struct cxl_dport *
> @@ -813,6 +815,14 @@ bool is_cxl_nvdimm_bridge(struct device *dev);
>  int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd);
>  struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd);
>  
> +struct cxl_einj_ops {
> +	int (*einj_type)(struct seq_file *f, void *data);
> +	int (*einj_inject)(struct cxl_dport *dport, u64 type);
> +};
> +
> +void cxl_einj_set_ops_cbs(struct cxl_einj_ops *ops);
> +
> +
>  #ifdef CONFIG_CXL_REGION
>  bool is_cxl_pmem_region(struct device *dev);
>  struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev);
> -- 
> 2.34.1
> 
>
Ben Cheatham Dec. 8, 2023, 4:22 p.m. UTC | #2
Thanks for taking a look Dan! Replies inline.

On 12/7/23 5:13 PM, Dan Williams wrote:
> Ben Cheatham wrote:
>> Add creation of debugfs directories for ports and dports under
>> /sys/kernel/debug/cxl when EINJ support is enabled. The dport
>> directories will contain files for injecting CXL protocol errors.
>> These files are only usable once the EINJ module has loaded and
>> registered callback functions with the CXL core module, before that
>> occurs (or if the EINJ module isn't loaded) the files will do nothing
>> and return an ENODEV error.
>>
>> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>> Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com>
>> ---
>>  Documentation/ABI/testing/debugfs-cxl | 27 +++++++++
>>  drivers/cxl/core/port.c               | 84 +++++++++++++++++++++++++++
>>  drivers/cxl/cxl.h                     | 10 ++++
>>  3 files changed, 121 insertions(+)
>>
>> diff --git a/Documentation/ABI/testing/debugfs-cxl b/Documentation/ABI/testing/debugfs-cxl
>> index fe61d372e3fa..782a1bb78884 100644
>> --- a/Documentation/ABI/testing/debugfs-cxl
>> +++ b/Documentation/ABI/testing/debugfs-cxl
>> @@ -33,3 +33,30 @@ Description:
>>  		device cannot clear poison from the address, -ENXIO is returned.
>>  		The clear_poison attribute is only visible for devices
>>  		supporting the capability.
>> +
>> +What:		/sys/kernel/debug/cxl/portX/dportY/einj_types
> 
> Given this file is identical contents for all dports it only needs to
> exist in one common location
> 
> /sys/kernel/debug/cxl/einj_types
> 

Good point, I'll make that change.

> 
>> +Date:		November, 2023
>> +KernelVersion:	v6.8
>> +Contact:	linux-cxl@vger.kernel.org
>> +Description:
>> +		(RO) Prints the CXL protocol error types made available by
>> +		the platform in the format "0x<error number>	<error type>".
>> +		The <error number> can be written to einj_inject to inject
>> +		<error type> into dportY. This file is only visible if
>> +		CONFIG_ACPI_APEI_EINJ is enabled, and the EINJ module must
>> +		be able to reach one (or both) of the CXL_ACPI or CXL_PORT
>> +		modules to be functional.
> 
> This can be simplified. Have something like:
> 
> config CXL_EINJ
> 	default CXL_BUS
> 	depends on ACPI_APEI_EINJ && ACPI_APEI_EINJ=CXL_BUS
> 	...
> 
> Then the documentation moves to Kconfig for how to enable this and the
> CXL code can directly call into the EINJ module without worry.
> 
> It would of course need stubs like these in a shared header:
> 
> #ifdef CONFIG_CXL_EINJ
> int cxl_einj_available_error_type(struct seq_file *m, void *v);
> int cxl_einj_inject_error(struct cxl_dport *dport, u64 type);
> #else
> static inline int cxl_einj_available_error_type(struct seq_file *m, void *v)
> {
> 	return -ENXIO;
> }
> 
> int cxl_einj_inject_error(struct cxl_dport *dport, u64 type)
> {
> 	return -ENXIO;
> }
> #endif
> 

I had to go back and take a look, but I had a shared header in v5 (link: https://lore.kernel.org/linux-cxl/20230926120418.0000575d@Huawei.com/). 
Jonathan recommended that I instead include cxl.h directly, but that was pretty much a completely different patch set
at the time (and the header was under include/linux/). That being said, I agree that a header under drivers/cxl would
make much more sense here.

>> +
>> +What:		/sys/kernel/debug/cxl/portX/dportY/einj_inject
> 
> See my comments on cxl_debugfs_create_dport_dir() later on, but I think
> the "portX" directory can be eliminated.
> 
>> +Date:		November, 2023
>> +KernelVersion:	v6.8
>> +Contact:	linux-cxl@vger.kernel.org
>> +Description:
>> +		(WO) Writing an integer to this file injects the corresponding
>> +		CXL protocol error into dportY (integer to type mapping is
>> +		available by reading from einj_types). If the dport was
>> +		enumerated in RCH mode, a CXL 1.1 error is injected, otherwise
>> +		a CXL 2.0 error is injected. This file is only visible if
>> +		CONFIG_ACPI_APEI_EINJ is enabled, and the EINJ module must
>> +		be able to reach one (or both) of the CXL_ACPI or CXL_PORT
>> +		modules to be functional.
> 
> Similar comments about dropping these details that can just be solved in
> Kconfig.
> 
> This is next comment is on EINJ ABI, but you can skip it just to
> maintain momentum with the status quo. Why require the user to do the
> string to integer conversion? Seems like a small matter of programming
> to allow:
> 
> echo "CXL.cache Protocol Correctable" > einj_inject
> 
> ...to do the right thing. That probably makes scripts more readable as
> well.
> 

That's a good point. I can do that, but I think it may be better to keep the
consistency with the EINJ module to simplify things for end users. If you feel
that isn't a big enough concern I can go ahead and modify it.

>> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
>> index 38441634e4c6..acf10415a174 100644
>> --- a/drivers/cxl/core/port.c
>> +++ b/drivers/cxl/core/port.c
>> @@ -783,6 +783,72 @@ static int cxl_dport_setup_regs(struct device *host, struct cxl_dport *dport,
>>  	return rc;
>>  }
>>  
>> +static struct cxl_einj_ops einj_ops;
>> +void cxl_einj_set_ops_cbs(struct cxl_einj_ops *ops)
>> +{
>> +	if (!IS_REACHABLE(CONFIG_ACPI_APEI_EINJ) || !ops)
>> +		return;
>> +
>> +	einj_ops = *ops;
>> +}
>> +EXPORT_SYMBOL_NS_GPL(cxl_einj_set_ops_cbs, CXL);
> 
> einj_ops goes away when the CXL code can just call the EINJ module
> directly.
> 
>> +
>> +static int cxl_einj_type_show(struct seq_file *f, void *data)
>> +{
>> +	if (!einj_ops.einj_type)
>> +		return -ENODEV;
>> +
>> +	return einj_ops.einj_type(f, data);
>> +}
>> +
>> +static int cxl_einj_inject(void *data, u64 type)
>> +{
>> +	struct cxl_dport *dport = data;
>> +
>> +	if (!einj_ops.einj_inject)
>> +		return -ENODEV;
>> +
>> +	return einj_ops.einj_inject(dport, type);
>> +}
>> +DEFINE_DEBUGFS_ATTRIBUTE(cxl_einj_inject_fops, NULL, cxl_einj_inject, "%llx\n");
> 
> The wrappers go away and DEFINE_DEBUGFS_ATTRIBUTE() can reference the
> EINJ symbols directly.
> 
>> +
>> +static int cxl_debugfs_create_dport_dir(struct dentry *port_dir,
>> +						   struct cxl_dport *dport)
>> +{
>> +	struct dentry *dir;
>> +	char dir_name[32];
>> +
>> +	snprintf(dir_name, 31, "dport%d", dport->port_id);
> 
> How about dev_name(dport->dport_dev) rather than the dynamic name?
> 
> The other benefit of this is that the dport_dev names are unique, so you
> can move the einj_inject file to:
> 
> /sys/kernel/debug/cxl/$dport_dev/einj_inject
> 

I didn't realize the dport names were also unique. I'll go ahead and do that instead.

>> +	dir = debugfs_create_dir(dir_name, port_dir);
>> +	if (IS_ERR(dir))
>> +		return PTR_ERR(dir);
>> +
>> +	debugfs_create_devm_seqfile(dport->dport_dev, "einj_types", dir,
>> +				    cxl_einj_type_show);
> 
> Per above, move this to be a top-level file.
> 

Will do.

>> +
>> +	debugfs_create_file("einj_inject", 0200, dir, dport,
>> +			    &cxl_einj_inject_fops);
>> +	return 0;
> 
> debugfs is good about failing gracefully when pre-requisites are not
> present. This is why none of the debugfs creation helpers have return
> codes because failing to setup debugfs is never fatal.
> 
> In other words, it is ok to take the output of debugfs_create_dir()
> without checking, and this function should not be returning an error.
> 

Will do.

>> +}
>> +
>> +static struct dentry *cxl_debugfs_create_port_dir(struct cxl_port *port)
>> +{
>> +	const char *dir_name = dev_name(&port->dev);
>> +	struct dentry *dir;
>> +
>> +	if (!IS_ENABLED(CONFIG_ACPI_APEI_EINJ))
>> +		return ERR_PTR(-ENODEV);
>> +
>> +	dir = cxl_debugfs_create_dir(dir_name);
>> +	if (IS_ERR(dir)) {
>> +		dev_dbg(&port->dev, "Failed to create port debugfs dir: %ld\n",
>> +			PTR_ERR(dir));
>> +		return dir;
>> +	}
>> +
>> +	return dir;
>> +}
>> +
>>  static struct cxl_port *__devm_cxl_add_port(struct device *host,
>>  					    struct device *uport_dev,
>>  					    resource_size_t component_reg_phys,
>> @@ -861,6 +927,7 @@ struct cxl_port *devm_cxl_add_port(struct device *host,
>>  				   struct cxl_dport *parent_dport)
>>  {
>>  	struct cxl_port *port, *parent_port;
>> +	struct dentry *dir;
>>  
>>  	port = __devm_cxl_add_port(host, uport_dev, component_reg_phys,
>>  				   parent_dport);
>> @@ -878,6 +945,10 @@ struct cxl_port *devm_cxl_add_port(struct device *host,
>>  			parent_port ? " to " : "",
>>  			parent_port ? dev_name(&parent_port->dev) : "",
>>  			parent_port ? "" : " (root port)");
>> +
>> +		dir = cxl_debugfs_create_port_dir(port);
>> +		if (!IS_ERR(dir))
>> +			port->debug_dir = dir;
>>  	}
>>  
>>  	return port;
>> @@ -1127,6 +1198,7 @@ struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port,
>>  				     resource_size_t component_reg_phys)
>>  {
>>  	struct cxl_dport *dport;
>> +	int rc;
>>  
>>  	dport = __devm_cxl_add_dport(port, dport_dev, port_id,
>>  				     component_reg_phys, CXL_RESOURCE_NONE);
>> @@ -1136,6 +1208,11 @@ struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port,
>>  	} else {
>>  		dev_dbg(dport_dev, "dport added to %s\n",
>>  			dev_name(&port->dev));
>> +
>> +		rc = cxl_debugfs_create_dport_dir(port->debug_dir, dport);
>> +		if (rc)
>> +			dev_dbg(dport_dev,
>> +				"Failed to create dport debugfs dir: %d\n", rc);
> 
> Drop the debug messages about failing to setup debugfs. This follows the
> lead of other debugfs setup in CXL.
> 

Will do.

>>  	}
>>  
>>  	return dport;
>> @@ -1156,6 +1233,7 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
>>  					 resource_size_t rcrb)
>>  {
>>  	struct cxl_dport *dport;
>> +	int rc;
>>  
>>  	if (rcrb == CXL_RESOURCE_NONE) {
>>  		dev_dbg(&port->dev, "failed to add RCH dport, missing RCRB\n");
>> @@ -1170,6 +1248,12 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
>>  	} else {
>>  		dev_dbg(dport_dev, "RCH dport added to %s\n",
>>  			dev_name(&port->dev));
>> +
>> +		rc = cxl_debugfs_create_dport_dir(port->debug_dir, dport);
>> +		if (rc)
>> +			dev_dbg(dport_dev,
>> +				"Failed to create rch dport debugfs dir: %d\n",
>> +				rc);
>>  	}
>>  
>>  	return dport;
>> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
>> index 687043ece101..3c7744fc3106 100644
>> --- a/drivers/cxl/cxl.h
>> +++ b/drivers/cxl/cxl.h
>> @@ -590,6 +590,7 @@ struct cxl_dax_region {
>>   * @depth: How deep this port is relative to the root. depth 0 is the root.
>>   * @cdat: Cached CDAT data
>>   * @cdat_available: Should a CDAT attribute be available in sysfs
>> + * @debug_dir: dentry for port in cxl debugfs (optional)
>>   */
>>  struct cxl_port {
>>  	struct device dev;
>> @@ -612,6 +613,7 @@ struct cxl_port {
>>  		size_t length;
>>  	} cdat;
>>  	bool cdat_available;
>> +	struct dentry *debug_dir;
> 
> Part of why I asked for the debugfs file rename was to eliminate this
> wart on the data structure.
> 

Yeah I wasn't that happy about adding it, so I'd be happy to remove it!

>>  };
>>  
>>  static inline struct cxl_dport *
>> @@ -813,6 +815,14 @@ bool is_cxl_nvdimm_bridge(struct device *dev);
>>  int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd);
>>  struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd);
>>  
>> +struct cxl_einj_ops {
>> +	int (*einj_type)(struct seq_file *f, void *data);
>> +	int (*einj_inject)(struct cxl_dport *dport, u64 type);
>> +};
>> +
>> +void cxl_einj_set_ops_cbs(struct cxl_einj_ops *ops);
>> +
>> +
>>  #ifdef CONFIG_CXL_REGION
>>  bool is_cxl_pmem_region(struct device *dev);
>>  struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev);
>> -- 
>> 2.34.1
>>
>>
> 
> 
>
Dan Williams Dec. 8, 2023, 6:01 p.m. UTC | #3
Ben Cheatham wrote:
[..]
> > This can be simplified. Have something like:
> > 
> > config CXL_EINJ
> > 	default CXL_BUS
> > 	depends on ACPI_APEI_EINJ && ACPI_APEI_EINJ=CXL_BUS
> > 	...
> > 
> > Then the documentation moves to Kconfig for how to enable this and the
> > CXL code can directly call into the EINJ module without worry.
> > 
> > It would of course need stubs like these in a shared header:
> > 
> > #ifdef CONFIG_CXL_EINJ
> > int cxl_einj_available_error_type(struct seq_file *m, void *v);
> > int cxl_einj_inject_error(struct cxl_dport *dport, u64 type);
> > #else
> > static inline int cxl_einj_available_error_type(struct seq_file *m, void *v)
> > {
> > 	return -ENXIO;
> > }
> > 
> > int cxl_einj_inject_error(struct cxl_dport *dport, u64 type)
> > {
> > 	return -ENXIO;
> > }
> > #endif
> > 
> 
> I had to go back and take a look, but I had a shared header in v5
> (link:
> https://lore.kernel.org/linux-cxl/20230926120418.0000575d@Huawei.com/).
> Jonathan recommended that I instead include cxl.h directly, but that
> was pretty much a completely different patch set at the time (and the
> header was under include/linux/). That being said, I agree that a
> header under drivers/cxl would make much more sense here.

I agree with Jonathan that there are still cases where it makes sense to
do the relative include thing, but cxl_pmu is an intimate extenstion of
the CXL subsystem that just happens to live in a another directory. This
EINJ support is a handful of functions to communicate between modules
with no need for EINJ to understand all the gory details in cxl.h.

[..]
> >> +Date:               November, 2023
> >> +KernelVersion:      v6.8
> >> +Contact:    linux-cxl@vger.kernel.org
> >> +Description:
> >> +            (WO) Writing an integer to this file injects the corresponding
> >> +            CXL protocol error into dportY (integer to type mapping is
> >> +            available by reading from einj_types). If the dport was
> >> +            enumerated in RCH mode, a CXL 1.1 error is injected, otherwise
> >> +            a CXL 2.0 error is injected. This file is only visible if
> >> +            CONFIG_ACPI_APEI_EINJ is enabled, and the EINJ module must
> >> +            be able to reach one (or both) of the CXL_ACPI or CXL_PORT
> >> +            modules to be functional.
> > 
> > Similar comments about dropping these details that can just be solved in
> > Kconfig.
> > 
> > This is next comment is on EINJ ABI, but you can skip it just to
> > maintain momentum with the status quo. Why require the user to do the
> > string to integer conversion? Seems like a small matter of programming
> > to allow:
> > 
> > echo "CXL.cache Protocol Correctable" > einj_inject
> > 
> > ...to do the right thing. That probably makes scripts more readable as
> > well.
> > 
> 
> That's a good point. I can do that, but I think it may be better to keep the
> consistency with the EINJ module to simplify things for end users. If you feel
> that isn't a big enough concern I can go ahead and modify it.

Oh, definitely keep the old style as well. I was thinking of something
like:

echo "CXL.cache Protocol Correctable" > einj_inject
echo "0x1000" > einj_inject

...having the same result. Up to you though, I will still take the
series if only the integer way works.

[..]
> >> +	snprintf(dir_name, 31, "dport%d", dport->port_id);
> > 
> > How about dev_name(dport->dport_dev) rather than the dynamic name?
> > 
> > The other benefit of this is that the dport_dev names are unique, so you
> > can move the einj_inject file to:
> > 
> > /sys/kernel/debug/cxl/$dport_dev/einj_inject
> > 
> 
> I didn't realize the dport names were also unique. I'll go ahead and do that instead.

Yeah, you can assume that all devices that share a bus must have unique
names so that /sys/bus/$bus/devices can list all of them without
name collisions.
Ben Cheatham Dec. 8, 2023, 8:35 p.m. UTC | #4
On 12/8/23 12:01 PM, Dan Williams wrote:
> Ben Cheatham wrote:
> [..]
>>> This can be simplified. Have something like:
>>>
>>> config CXL_EINJ
>>> 	default CXL_BUS
>>> 	depends on ACPI_APEI_EINJ && ACPI_APEI_EINJ=CXL_BUS
>>> 	...
>>>
>>> Then the documentation moves to Kconfig for how to enable this and the
>>> CXL code can directly call into the EINJ module without worry.
>>>
>>> It would of course need stubs like these in a shared header:
>>>
>>> #ifdef CONFIG_CXL_EINJ
>>> int cxl_einj_available_error_type(struct seq_file *m, void *v);
>>> int cxl_einj_inject_error(struct cxl_dport *dport, u64 type);
>>> #else
>>> static inline int cxl_einj_available_error_type(struct seq_file *m, void *v)
>>> {
>>> 	return -ENXIO;
>>> }
>>>
>>> int cxl_einj_inject_error(struct cxl_dport *dport, u64 type)
>>> {
>>> 	return -ENXIO;
>>> }
>>> #endif
>>>
>>
>> I had to go back and take a look, but I had a shared header in v5
>> (link:
>> https://lore.kernel.org/linux-cxl/20230926120418.0000575d@Huawei.com/).
>> Jonathan recommended that I instead include cxl.h directly, but that
>> was pretty much a completely different patch set at the time (and the
>> header was under include/linux/). That being said, I agree that a
>> header under drivers/cxl would make much more sense here.
> 
> I agree with Jonathan that there are still cases where it makes sense to
> do the relative include thing, but cxl_pmu is an intimate extenstion of
> the CXL subsystem that just happens to live in a another directory. This
> EINJ support is a handful of functions to communicate between modules
> with no need for EINJ to understand all the gory details in cxl.h.
> 

All right that makes sense. Thanks for the clarification.

> [..]
>>>> +Date:               November, 2023
>>>> +KernelVersion:      v6.8
>>>> +Contact:    linux-cxl@vger.kernel.org
>>>> +Description:
>>>> +            (WO) Writing an integer to this file injects the corresponding
>>>> +            CXL protocol error into dportY (integer to type mapping is
>>>> +            available by reading from einj_types). If the dport was
>>>> +            enumerated in RCH mode, a CXL 1.1 error is injected, otherwise
>>>> +            a CXL 2.0 error is injected. This file is only visible if
>>>> +            CONFIG_ACPI_APEI_EINJ is enabled, and the EINJ module must
>>>> +            be able to reach one (or both) of the CXL_ACPI or CXL_PORT
>>>> +            modules to be functional.
>>>
>>> Similar comments about dropping these details that can just be solved in
>>> Kconfig.
>>>
>>> This is next comment is on EINJ ABI, but you can skip it just to
>>> maintain momentum with the status quo. Why require the user to do the
>>> string to integer conversion? Seems like a small matter of programming
>>> to allow:
>>>
>>> echo "CXL.cache Protocol Correctable" > einj_inject
>>>
>>> ...to do the right thing. That probably makes scripts more readable as
>>> well.
>>>
>>
>> That's a good point. I can do that, but I think it may be better to keep the
>> consistency with the EINJ module to simplify things for end users. If you feel
>> that isn't a big enough concern I can go ahead and modify it.
> 
> Oh, definitely keep the old style as well. I was thinking of something
> like:
> 
> echo "CXL.cache Protocol Correctable" > einj_inject
> echo "0x1000" > einj_inject
> 
> ...having the same result. Up to you though, I will still take the
> series if only the integer way works.
> 

Sounds good. If I get the time I'll add in the string version as well.

> [..]
>>>> +	snprintf(dir_name, 31, "dport%d", dport->port_id);
>>>
>>> How about dev_name(dport->dport_dev) rather than the dynamic name?
>>>
>>> The other benefit of this is that the dport_dev names are unique, so you
>>> can move the einj_inject file to:
>>>
>>> /sys/kernel/debug/cxl/$dport_dev/einj_inject
>>>
>>
>> I didn't realize the dport names were also unique. I'll go ahead and do that instead.
> 
> Yeah, you can assume that all devices that share a bus must have unique
> names so that /sys/bus/$bus/devices can list all of them without
> name collisions.

Yeah I was thinking that dev_name(dport->dport_dev) would give a name like dportX for some reason
and realized what that would actually do about 30 mins after I sent out the email :/.

Thanks,
Ben
Ben Cheatham Dec. 8, 2023, 10:27 p.m. UTC | #5
On 12/8/23 2:35 PM, Ben Cheatham wrote:
> 
> 
> On 12/8/23 12:01 PM, Dan Williams wrote:
>> Ben Cheatham wrote:
>> [..]
>>>> This can be simplified. Have something like:
>>>>
>>>> config CXL_EINJ
>>>> 	default CXL_BUS
>>>> 	depends on ACPI_APEI_EINJ && ACPI_APEI_EINJ=CXL_BUS
>>>> 	...
>>>>
>>>> Then the documentation moves to Kconfig for how to enable this and the
>>>> CXL code can directly call into the EINJ module without worry.
>>>>
>>>> It would of course need stubs like these in a shared header:
>>>>
>>>> #ifdef CONFIG_CXL_EINJ
>>>> int cxl_einj_available_error_type(struct seq_file *m, void *v);
>>>> int cxl_einj_inject_error(struct cxl_dport *dport, u64 type);
>>>> #else
>>>> static inline int cxl_einj_available_error_type(struct seq_file *m, void *v)
>>>> {
>>>> 	return -ENXIO;
>>>> }
>>>>
>>>> int cxl_einj_inject_error(struct cxl_dport *dport, u64 type)
>>>> {
>>>> 	return -ENXIO;
>>>> }
>>>> #endif
>>>>
>>>
>>> I had to go back and take a look, but I had a shared header in v5
>>> (link:
>>> https://lore.kernel.org/linux-cxl/20230926120418.0000575d@Huawei.com/).
>>> Jonathan recommended that I instead include cxl.h directly, but that
>>> was pretty much a completely different patch set at the time (and the
>>> header was under include/linux/). That being said, I agree that a
>>> header under drivers/cxl would make much more sense here.
>>
>> I agree with Jonathan that there are still cases where it makes sense to
>> do the relative include thing, but cxl_pmu is an intimate extenstion of
>> the CXL subsystem that just happens to live in a another directory. This
>> EINJ support is a handful of functions to communicate between modules
>> with no need for EINJ to understand all the gory details in cxl.h.
>>
> 
> All right that makes sense. Thanks for the clarification.
> 

Ok so I took a look at implementing this. I don't think this solution ends up having
the intended behavior. Using a shared header and the Kconfig rules above introduces
a dependency on the EINJ module, which I was trying to avoid by using the callbacks
since I don't think the CXL core should fail to load if the EINJ module fails.
So, unless you have any other suggestions, I'll use the Kconfig rules above but keep
the callbacks (and also change the EINJ module to use IS_REACHABLE(CONFIG_CXL_EINJ) instead
of IS_REACHABLE(CONFIG_CXL_ACPI) || IS_REACHABLE(CONFIG_CXL_PORT)). I may also just
be doing something wrong (most likely due to late Friday brain fog), so let me know if
I got something wrong here.

Thanks,
Ben

>> [..]
>>>>> +Date:               November, 2023
>>>>> +KernelVersion:      v6.8
>>>>> +Contact:    linux-cxl@vger.kernel.org
>>>>> +Description:
>>>>> +            (WO) Writing an integer to this file injects the corresponding
>>>>> +            CXL protocol error into dportY (integer to type mapping is
>>>>> +            available by reading from einj_types). If the dport was
>>>>> +            enumerated in RCH mode, a CXL 1.1 error is injected, otherwise
>>>>> +            a CXL 2.0 error is injected. This file is only visible if
>>>>> +            CONFIG_ACPI_APEI_EINJ is enabled, and the EINJ module must
>>>>> +            be able to reach one (or both) of the CXL_ACPI or CXL_PORT
>>>>> +            modules to be functional.
>>>>
>>>> Similar comments about dropping these details that can just be solved in
>>>> Kconfig.
>>>>
>>>> This is next comment is on EINJ ABI, but you can skip it just to
>>>> maintain momentum with the status quo. Why require the user to do the
>>>> string to integer conversion? Seems like a small matter of programming
>>>> to allow:
>>>>
>>>> echo "CXL.cache Protocol Correctable" > einj_inject
>>>>
>>>> ...to do the right thing. That probably makes scripts more readable as
>>>> well.
>>>>
>>>
>>> That's a good point. I can do that, but I think it may be better to keep the
>>> consistency with the EINJ module to simplify things for end users. If you feel
>>> that isn't a big enough concern I can go ahead and modify it.
>>
>> Oh, definitely keep the old style as well. I was thinking of something
>> like:
>>
>> echo "CXL.cache Protocol Correctable" > einj_inject
>> echo "0x1000" > einj_inject
>>
>> ...having the same result. Up to you though, I will still take the
>> series if only the integer way works.
>>
> 
> Sounds good. If I get the time I'll add in the string version as well.
> 
>> [..]
>>>>> +	snprintf(dir_name, 31, "dport%d", dport->port_id);
>>>>
>>>> How about dev_name(dport->dport_dev) rather than the dynamic name?
>>>>
>>>> The other benefit of this is that the dport_dev names are unique, so you
>>>> can move the einj_inject file to:
>>>>
>>>> /sys/kernel/debug/cxl/$dport_dev/einj_inject
>>>>
>>>
>>> I didn't realize the dport names were also unique. I'll go ahead and do that instead.
>>
>> Yeah, you can assume that all devices that share a bus must have unique
>> names so that /sys/bus/$bus/devices can list all of them without
>> name collisions.
> 
> Yeah I was thinking that dev_name(dport->dport_dev) would give a name like dportX for some reason
> and realized what that would actually do about 30 mins after I sent out the email :/.
> 
> Thanks,
> Ben
>
Dan Williams Dec. 9, 2023, 12:07 a.m. UTC | #6
Ben Cheatham wrote:
> 
> 
> On 12/8/23 2:35 PM, Ben Cheatham wrote:
> > 
> > 
> > On 12/8/23 12:01 PM, Dan Williams wrote:
> >> Ben Cheatham wrote:
> >> [..]
> >>>> This can be simplified. Have something like:
> >>>>
> >>>> config CXL_EINJ
> >>>> 	default CXL_BUS
> >>>> 	depends on ACPI_APEI_EINJ && ACPI_APEI_EINJ=CXL_BUS
> >>>> 	...
> >>>>
> >>>> Then the documentation moves to Kconfig for how to enable this and the
> >>>> CXL code can directly call into the EINJ module without worry.
> >>>>
> >>>> It would of course need stubs like these in a shared header:
> >>>>
> >>>> #ifdef CONFIG_CXL_EINJ
> >>>> int cxl_einj_available_error_type(struct seq_file *m, void *v);
> >>>> int cxl_einj_inject_error(struct cxl_dport *dport, u64 type);
> >>>> #else
> >>>> static inline int cxl_einj_available_error_type(struct seq_file *m, void *v)
> >>>> {
> >>>> 	return -ENXIO;
> >>>> }
> >>>>
> >>>> int cxl_einj_inject_error(struct cxl_dport *dport, u64 type)
> >>>> {
> >>>> 	return -ENXIO;
> >>>> }
> >>>> #endif
> >>>>
> >>>
> >>> I had to go back and take a look, but I had a shared header in v5
> >>> (link:
> >>> https://lore.kernel.org/linux-cxl/20230926120418.0000575d@Huawei.com/).
> >>> Jonathan recommended that I instead include cxl.h directly, but that
> >>> was pretty much a completely different patch set at the time (and the
> >>> header was under include/linux/). That being said, I agree that a
> >>> header under drivers/cxl would make much more sense here.
> >>
> >> I agree with Jonathan that there are still cases where it makes sense to
> >> do the relative include thing, but cxl_pmu is an intimate extenstion of
> >> the CXL subsystem that just happens to live in a another directory. This
> >> EINJ support is a handful of functions to communicate between modules
> >> with no need for EINJ to understand all the gory details in cxl.h.
> >>
> > 
> > All right that makes sense. Thanks for the clarification.
> > 
> 
> Ok so I took a look at implementing this. I don't think this solution ends up having
> the intended behavior. Using a shared header and the Kconfig rules above introduces
> a dependency on the EINJ module, which I was trying to avoid by using the callbacks
> since I don't think the CXL core should fail to load if the EINJ module fails.

Good looking out for that. However, if EINJ is going to be offering
services to other parts of the kernel it needs to be better behaved as
library module that can export symbols independent of the platform
specific, not software, error conditions that can make einj_init() fail.

> So, unless you have any other suggestions, I'll use the Kconfig rules above but keep
> the callbacks (and also change the EINJ module to use IS_REACHABLE(CONFIG_CXL_EINJ) instead
> of IS_REACHABLE(CONFIG_CXL_ACPI) || IS_REACHABLE(CONFIG_CXL_PORT)). I may also just
> be doing something wrong (most likely due to late Friday brain fog), so let me know if
> I got something wrong here.

I think the dependency is ok, it's the myriad of failure cases in
einj_init() that are at issue.

If einj.ko was loaded relative to a platform device then it would
already be the case that all of those platform specific setup details
would move to something like einj_probe() insted of einj_init().
Unfortuantely, a full refactor along those lines would probably yield
more violence than benefit.

However, something like that can be approximated since einj.ko, before
your changes, is only ever loaded manually. Just require it to be
unloaded manually unless CXL has it pinned, something like the below.

Open to suggestions from other linux-acpi@ denizens.

diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
index 013eb621dc92..9c179766af88 100644
--- a/drivers/acpi/apei/einj.c
+++ b/drivers/acpi/apei/einj.c
@@ -137,6 +137,7 @@ static struct apei_exec_ins_type einj_ins_type[] = {
 static DEFINE_MUTEX(einj_mutex);
 
 static void *einj_param;
+static bool einj_initialized;
 
 static void einj_exec_ctx_init(struct apei_exec_context *ctx)
 {
@@ -684,7 +685,7 @@ static int einj_check_table(struct acpi_table_einj *einj_tab)
 	return 0;
 }
 
-static int __init einj_init(void)
+static int __init __einj_init(void)
 {
 	int rc;
 	acpi_status status;
@@ -782,10 +783,30 @@ static int __init einj_init(void)
 	return rc;
 }
 
+static int __init einj_init(void)
+{
+	int rc = __einj_init();
+
+	einj_initialized = (rc == 0);
+
+	/*
+	 * CXL needs to be able to link and call its error injection
+	 * helpers regardless of whether the EINJ table is present and
+	 * initialized correctly. Helpers check @einj_initialized.
+	 */
+	if (IS_ENABLED(CONFIG_CXL_EINJ))
+		return 0;
+
+	return rc;
+}
+
 static void __exit einj_exit(void)
 {
 	struct apei_exec_context ctx;
 
+	if (!einj_initialized)
+		return;
+
 	if (einj_param) {
 		acpi_size size = (acpi5) ?
 			sizeof(struct set_error_type_with_address) :
diff mbox series

Patch

diff --git a/Documentation/ABI/testing/debugfs-cxl b/Documentation/ABI/testing/debugfs-cxl
index fe61d372e3fa..782a1bb78884 100644
--- a/Documentation/ABI/testing/debugfs-cxl
+++ b/Documentation/ABI/testing/debugfs-cxl
@@ -33,3 +33,30 @@  Description:
 		device cannot clear poison from the address, -ENXIO is returned.
 		The clear_poison attribute is only visible for devices
 		supporting the capability.
+
+What:		/sys/kernel/debug/cxl/portX/dportY/einj_types
+Date:		November, 2023
+KernelVersion:	v6.8
+Contact:	linux-cxl@vger.kernel.org
+Description:
+		(RO) Prints the CXL protocol error types made available by
+		the platform in the format "0x<error number>	<error type>".
+		The <error number> can be written to einj_inject to inject
+		<error type> into dportY. This file is only visible if
+		CONFIG_ACPI_APEI_EINJ is enabled, and the EINJ module must
+		be able to reach one (or both) of the CXL_ACPI or CXL_PORT
+		modules to be functional.
+
+What:		/sys/kernel/debug/cxl/portX/dportY/einj_inject
+Date:		November, 2023
+KernelVersion:	v6.8
+Contact:	linux-cxl@vger.kernel.org
+Description:
+		(WO) Writing an integer to this file injects the corresponding
+		CXL protocol error into dportY (integer to type mapping is
+		available by reading from einj_types). If the dport was
+		enumerated in RCH mode, a CXL 1.1 error is injected, otherwise
+		a CXL 2.0 error is injected. This file is only visible if
+		CONFIG_ACPI_APEI_EINJ is enabled, and the EINJ module must
+		be able to reach one (or both) of the CXL_ACPI or CXL_PORT
+		modules to be functional.
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 38441634e4c6..acf10415a174 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -783,6 +783,72 @@  static int cxl_dport_setup_regs(struct device *host, struct cxl_dport *dport,
 	return rc;
 }
 
+static struct cxl_einj_ops einj_ops;
+void cxl_einj_set_ops_cbs(struct cxl_einj_ops *ops)
+{
+	if (!IS_REACHABLE(CONFIG_ACPI_APEI_EINJ) || !ops)
+		return;
+
+	einj_ops = *ops;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_einj_set_ops_cbs, CXL);
+
+static int cxl_einj_type_show(struct seq_file *f, void *data)
+{
+	if (!einj_ops.einj_type)
+		return -ENODEV;
+
+	return einj_ops.einj_type(f, data);
+}
+
+static int cxl_einj_inject(void *data, u64 type)
+{
+	struct cxl_dport *dport = data;
+
+	if (!einj_ops.einj_inject)
+		return -ENODEV;
+
+	return einj_ops.einj_inject(dport, type);
+}
+DEFINE_DEBUGFS_ATTRIBUTE(cxl_einj_inject_fops, NULL, cxl_einj_inject, "%llx\n");
+
+static int cxl_debugfs_create_dport_dir(struct dentry *port_dir,
+						   struct cxl_dport *dport)
+{
+	struct dentry *dir;
+	char dir_name[32];
+
+	snprintf(dir_name, 31, "dport%d", dport->port_id);
+	dir = debugfs_create_dir(dir_name, port_dir);
+	if (IS_ERR(dir))
+		return PTR_ERR(dir);
+
+	debugfs_create_devm_seqfile(dport->dport_dev, "einj_types", dir,
+				    cxl_einj_type_show);
+
+	debugfs_create_file("einj_inject", 0200, dir, dport,
+			    &cxl_einj_inject_fops);
+	return 0;
+}
+
+static struct dentry *cxl_debugfs_create_port_dir(struct cxl_port *port)
+{
+	const char *dir_name = dev_name(&port->dev);
+	struct dentry *dir;
+
+	if (!IS_ENABLED(CONFIG_ACPI_APEI_EINJ))
+		return ERR_PTR(-ENODEV);
+
+	dir = cxl_debugfs_create_dir(dir_name);
+	if (IS_ERR(dir)) {
+		dev_dbg(&port->dev, "Failed to create port debugfs dir: %ld\n",
+			PTR_ERR(dir));
+		return dir;
+	}
+
+	return dir;
+}
+
 static struct cxl_port *__devm_cxl_add_port(struct device *host,
 					    struct device *uport_dev,
 					    resource_size_t component_reg_phys,
@@ -861,6 +927,7 @@  struct cxl_port *devm_cxl_add_port(struct device *host,
 				   struct cxl_dport *parent_dport)
 {
 	struct cxl_port *port, *parent_port;
+	struct dentry *dir;
 
 	port = __devm_cxl_add_port(host, uport_dev, component_reg_phys,
 				   parent_dport);
@@ -878,6 +945,10 @@  struct cxl_port *devm_cxl_add_port(struct device *host,
 			parent_port ? " to " : "",
 			parent_port ? dev_name(&parent_port->dev) : "",
 			parent_port ? "" : " (root port)");
+
+		dir = cxl_debugfs_create_port_dir(port);
+		if (!IS_ERR(dir))
+			port->debug_dir = dir;
 	}
 
 	return port;
@@ -1127,6 +1198,7 @@  struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port,
 				     resource_size_t component_reg_phys)
 {
 	struct cxl_dport *dport;
+	int rc;
 
 	dport = __devm_cxl_add_dport(port, dport_dev, port_id,
 				     component_reg_phys, CXL_RESOURCE_NONE);
@@ -1136,6 +1208,11 @@  struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port,
 	} else {
 		dev_dbg(dport_dev, "dport added to %s\n",
 			dev_name(&port->dev));
+
+		rc = cxl_debugfs_create_dport_dir(port->debug_dir, dport);
+		if (rc)
+			dev_dbg(dport_dev,
+				"Failed to create dport debugfs dir: %d\n", rc);
 	}
 
 	return dport;
@@ -1156,6 +1233,7 @@  struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
 					 resource_size_t rcrb)
 {
 	struct cxl_dport *dport;
+	int rc;
 
 	if (rcrb == CXL_RESOURCE_NONE) {
 		dev_dbg(&port->dev, "failed to add RCH dport, missing RCRB\n");
@@ -1170,6 +1248,12 @@  struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
 	} else {
 		dev_dbg(dport_dev, "RCH dport added to %s\n",
 			dev_name(&port->dev));
+
+		rc = cxl_debugfs_create_dport_dir(port->debug_dir, dport);
+		if (rc)
+			dev_dbg(dport_dev,
+				"Failed to create rch dport debugfs dir: %d\n",
+				rc);
 	}
 
 	return dport;
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 687043ece101..3c7744fc3106 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -590,6 +590,7 @@  struct cxl_dax_region {
  * @depth: How deep this port is relative to the root. depth 0 is the root.
  * @cdat: Cached CDAT data
  * @cdat_available: Should a CDAT attribute be available in sysfs
+ * @debug_dir: dentry for port in cxl debugfs (optional)
  */
 struct cxl_port {
 	struct device dev;
@@ -612,6 +613,7 @@  struct cxl_port {
 		size_t length;
 	} cdat;
 	bool cdat_available;
+	struct dentry *debug_dir;
 };
 
 static inline struct cxl_dport *
@@ -813,6 +815,14 @@  bool is_cxl_nvdimm_bridge(struct device *dev);
 int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd);
 struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd);
 
+struct cxl_einj_ops {
+	int (*einj_type)(struct seq_file *f, void *data);
+	int (*einj_inject)(struct cxl_dport *dport, u64 type);
+};
+
+void cxl_einj_set_ops_cbs(struct cxl_einj_ops *ops);
+
+
 #ifdef CONFIG_CXL_REGION
 bool is_cxl_pmem_region(struct device *dev);
 struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev);