diff mbox series

[v7,12/12] cxl/region: Deal with numa nodes not enumerated by SRAT

Message ID 20240308220055.2172956-13-dave.jiang@intel.com
State Accepted
Commit debdce20c4f28b7e5aa48512e7abf270a00e9051
Headers show
Series cxl: Add support to report region access coordinates to numa nodes | expand

Commit Message

Dave Jiang March 8, 2024, 9:59 p.m. UTC
For the numa nodes that are not created by SRAT, no memory_target is
allocated and is not managed by the HMAT_REPORTING code. Therefore
hmat_callback() memory hotplug notifier will exit early on those NUMA
nodes. The CXL memory hotplug notifier will need to call
node_set_perf_attrs() directly in order to setup the access sysfs
attributes.

In acpi_numa_init(), the last proximity domain (pxm) id created by SRAT is
stored. Add a helper function acpi_node_backed_by_real_pxm() in order to
check if a NUMA node id is defined by SRAT or created by CFMWS.

node_set_perf_attrs() symbol is exported to allow update of perf attribs
for a node. The sysfs path of
/sys/devices/system/node/nodeX/access0/initiators/* is created by
node_set_perf_attrs() for the various attributes where nodeX is matched
to the NUMA node of the CXL region.

Cc: Rafael J. Wysocki <rafael@kernel.org>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
v7:
- Fix typo is commit log. (Jonathan)
---
 drivers/acpi/numa/srat.c  | 11 +++++++++++
 drivers/base/node.c       |  1 +
 drivers/cxl/core/cdat.c   |  5 +++++
 drivers/cxl/core/core.h   |  1 +
 drivers/cxl/core/region.c |  7 ++++++-
 include/linux/acpi.h      |  9 +++++++++
 6 files changed, 33 insertions(+), 1 deletion(-)

Comments

Dan Williams March 12, 2024, 9:41 p.m. UTC | #1
Dave Jiang wrote:
> For the numa nodes that are not created by SRAT, no memory_target is
> allocated and is not managed by the HMAT_REPORTING code. Therefore
> hmat_callback() memory hotplug notifier will exit early on those NUMA
> nodes. The CXL memory hotplug notifier will need to call
> node_set_perf_attrs() directly in order to setup the access sysfs
> attributes.
> 
> In acpi_numa_init(), the last proximity domain (pxm) id created by SRAT is
> stored. Add a helper function acpi_node_backed_by_real_pxm() in order to
> check if a NUMA node id is defined by SRAT or created by CFMWS.
> 
> node_set_perf_attrs() symbol is exported to allow update of perf attribs
> for a node. The sysfs path of
> /sys/devices/system/node/nodeX/access0/initiators/* is created by
> node_set_perf_attrs() for the various attributes where nodeX is matched
> to the NUMA node of the CXL region.
> 
> Cc: Rafael J. Wysocki <rafael@kernel.org>
> Reviewed-by: Alison Schofield <alison.schofield@intel.com>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
> ---
> v7:
> - Fix typo is commit log. (Jonathan)
> ---
>  drivers/acpi/numa/srat.c  | 11 +++++++++++
>  drivers/base/node.c       |  1 +
>  drivers/cxl/core/cdat.c   |  5 +++++
>  drivers/cxl/core/core.h   |  1 +
>  drivers/cxl/core/region.c |  7 ++++++-
>  include/linux/acpi.h      |  9 +++++++++
>  6 files changed, 33 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c
> index 0214518fc582..e45e64993c50 100644
> --- a/drivers/acpi/numa/srat.c
> +++ b/drivers/acpi/numa/srat.c
> @@ -29,6 +29,8 @@ static int node_to_pxm_map[MAX_NUMNODES]
>  unsigned char acpi_srat_revision __initdata;
>  static int acpi_numa __initdata;
>  
> +static int last_real_pxm;
> +
>  void __init disable_srat(void)
>  {
>  	acpi_numa = -1;
> @@ -536,6 +538,7 @@ int __init acpi_numa_init(void)
>  		if (node_to_pxm_map[i] > fake_pxm)
>  			fake_pxm = node_to_pxm_map[i];
>  	}
> +	last_real_pxm = fake_pxm;
>  	fake_pxm++;
>  	acpi_table_parse_cedt(ACPI_CEDT_TYPE_CFMWS, acpi_parse_cfmws,
>  			      &fake_pxm);
> @@ -547,6 +550,14 @@ int __init acpi_numa_init(void)
>  	return 0;
>  }
>  
> +bool acpi_node_backed_by_real_pxm(int nid)
> +{
> +	int pxm = node_to_pxm(nid);
> +
> +	return pxm <= last_real_pxm;
> +}
> +EXPORT_SYMBOL_GPL(acpi_node_backed_by_real_pxm);

I quibble with this naming because it is not about "real" vs "fake" is
about HMAT decorated PXMs vs not, but I do not want to hold up v6.9
consideration for this quibble.

It can be addressed when / if cxl_need_node_perf_attrs_update() ever
interoperates with a non ACPI platform.
diff mbox series

Patch

diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c
index 0214518fc582..e45e64993c50 100644
--- a/drivers/acpi/numa/srat.c
+++ b/drivers/acpi/numa/srat.c
@@ -29,6 +29,8 @@  static int node_to_pxm_map[MAX_NUMNODES]
 unsigned char acpi_srat_revision __initdata;
 static int acpi_numa __initdata;
 
+static int last_real_pxm;
+
 void __init disable_srat(void)
 {
 	acpi_numa = -1;
@@ -536,6 +538,7 @@  int __init acpi_numa_init(void)
 		if (node_to_pxm_map[i] > fake_pxm)
 			fake_pxm = node_to_pxm_map[i];
 	}
+	last_real_pxm = fake_pxm;
 	fake_pxm++;
 	acpi_table_parse_cedt(ACPI_CEDT_TYPE_CFMWS, acpi_parse_cfmws,
 			      &fake_pxm);
@@ -547,6 +550,14 @@  int __init acpi_numa_init(void)
 	return 0;
 }
 
+bool acpi_node_backed_by_real_pxm(int nid)
+{
+	int pxm = node_to_pxm(nid);
+
+	return pxm <= last_real_pxm;
+}
+EXPORT_SYMBOL_GPL(acpi_node_backed_by_real_pxm);
+
 static int acpi_get_pxm(acpi_handle h)
 {
 	unsigned long long pxm;
diff --git a/drivers/base/node.c b/drivers/base/node.c
index a73b0c9a401a..eb72580288e6 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -215,6 +215,7 @@  void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord,
 		}
 	}
 }
+EXPORT_SYMBOL_GPL(node_set_perf_attrs);
 
 /**
  * struct node_cache_info - Internal tracking for memory node caches
diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c
index ee1bc8fa396b..10d4f1d7dad1 100644
--- a/drivers/cxl/core/cdat.c
+++ b/drivers/cxl/core/cdat.c
@@ -586,3 +586,8 @@  int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr,
 {
 	return hmat_update_target_coordinates(nid, &cxlr->coord[access], access);
 }
+
+bool cxl_need_node_perf_attrs_update(int nid)
+{
+	return !acpi_node_backed_by_real_pxm(nid);
+}
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index e19800a7ce06..bc5a95665aa0 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -92,5 +92,6 @@  long cxl_pci_get_latency(struct pci_dev *pdev);
 
 int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr,
 				       enum access_coordinate_class access);
+bool cxl_need_node_perf_attrs_update(int nid);
 
 #endif /* __CXL_CORE_H__ */
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 535492ec8529..5c186e0a39b9 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -2279,7 +2279,12 @@  static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
 
 	for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
 		if (cxlr->coord[i].read_bandwidth) {
-			rc = cxl_update_hmat_access_coordinates(nid, cxlr, i);
+			rc = 0;
+			if (cxl_need_node_perf_attrs_update(nid))
+				node_set_perf_attrs(nid, &cxlr->coord[i], i);
+			else
+				rc = cxl_update_hmat_access_coordinates(nid, cxlr, i);
+
 			if (rc == 0)
 				cset++;
 		}
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index c84c2f34b8ee..2a7c4b90d589 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1559,4 +1559,13 @@  static inline int hmat_update_target_coordinates(int nid,
 }
 #endif
 
+#ifdef CONFIG_ACPI_NUMA
+bool acpi_node_backed_by_real_pxm(int nid);
+#else
+static inline bool acpi_node_backed_by_real_pxm(int nid)
+{
+	return false;
+}
+#endif
+
 #endif	/*_LINUX_ACPI_H*/