diff mbox series

[v4,7/7] cxl/dax: Defer DAX consumption of SOFT RESERVED resources until after CXL region creation

Message ID 20250603221949.53272-8-Smita.KoralahalliChannabasappa@amd.com
State New
Headers show
Series Add managed SOFT RESERVE resource handling | expand

Commit Message

Koralahalli Channabasappa, Smita June 3, 2025, 10:19 p.m. UTC
From: Nathan Fontenot <nathan.fontenot@amd.com>

The DAX HMEM driver currently consumes all SOFT RESERVED iomem resources
during initialization. This interferes with the CXL driver’s ability to
create regions and trim overlapping SOFT RESERVED ranges before DAX uses
them.

To resolve this, defer the DAX driver's resource consumption if the
cxl_acpi driver is enabled. The DAX HMEM initialization skips walking the
iomem resource tree in this case. After CXL region creation completes,
any remaining SOFT RESERVED resources are explicitly registered with the
DAX driver by the CXL driver.

This sequencing ensures proper handling of overlaps and fixes hotplug
failures.

Co-developed-by: Nathan Fontenot <Nathan.Fontenot@amd.com>
Signed-off-by: Nathan Fontenot <Nathan.Fontenot@amd.com>
Co-developed-by: Terry Bowman <terry.bowman@amd.com>
Signed-off-by: Terry Bowman <terry.bowman@amd.com>
Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
---
 drivers/cxl/core/region.c | 10 +++++++++
 drivers/dax/hmem/device.c | 43 ++++++++++++++++++++-------------------
 drivers/dax/hmem/hmem.c   |  3 ++-
 include/linux/dax.h       |  6 ++++++
 4 files changed, 40 insertions(+), 22 deletions(-)

Comments

Jonathan Cameron June 9, 2025, 1:01 p.m. UTC | #1
On Tue, 3 Jun 2025 22:19:49 +0000
Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com> wrote:

> From: Nathan Fontenot <nathan.fontenot@amd.com>
> 
> The DAX HMEM driver currently consumes all SOFT RESERVED iomem resources
> during initialization. This interferes with the CXL driver’s ability to
> create regions and trim overlapping SOFT RESERVED ranges before DAX uses
> them.
> 
> To resolve this, defer the DAX driver's resource consumption if the
> cxl_acpi driver is enabled. The DAX HMEM initialization skips walking the
> iomem resource tree in this case. After CXL region creation completes,
> any remaining SOFT RESERVED resources are explicitly registered with the
> DAX driver by the CXL driver.
> 
> This sequencing ensures proper handling of overlaps and fixes hotplug
> failures.
> 
> Co-developed-by: Nathan Fontenot <Nathan.Fontenot@amd.com>
> Signed-off-by: Nathan Fontenot <Nathan.Fontenot@amd.com>
> Co-developed-by: Terry Bowman <terry.bowman@amd.com>
> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
> Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
> ---
>  drivers/cxl/core/region.c | 10 +++++++++
>  drivers/dax/hmem/device.c | 43 ++++++++++++++++++++-------------------
>  drivers/dax/hmem/hmem.c   |  3 ++-
>  include/linux/dax.h       |  6 ++++++
>  4 files changed, 40 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 3a5ca44d65f3..c6c0c7ba3b20 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -10,6 +10,7 @@
>  #include <linux/sort.h>
>  #include <linux/idr.h>
>  #include <linux/memory-tiers.h>
> +#include <linux/dax.h>
>  #include <cxlmem.h>
>  #include <cxl.h>
>  #include "core.h"
> @@ -3553,6 +3554,11 @@ static struct resource *normalize_resource(struct resource *res)
>  	return NULL;
>  }
>  
> +static int cxl_softreserv_mem_register(struct resource *res, void *unused)
> +{
> +	return hmem_register_device(phys_to_target_node(res->start), res);
> +}
> +
>  static int __cxl_region_softreserv_update(struct resource *soft,
>  					  void *_cxlr)
>  {
> @@ -3590,6 +3596,10 @@ int cxl_region_softreserv_update(void)
>  				    __cxl_region_softreserv_update);
>  	}
>  
> +	/* Now register any remaining SOFT RESERVES with DAX */
> +	walk_iomem_res_desc(IORES_DESC_SOFT_RESERVED, IORESOURCE_MEM,
> +			    0, -1, NULL, cxl_softreserv_mem_register);
> +
>  	return 0;
>  }
>  EXPORT_SYMBOL_NS_GPL(cxl_region_softreserv_update, "CXL");
> diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c
> index 59ad44761191..cc1ed7bbdb1a 100644
> --- a/drivers/dax/hmem/device.c
> +++ b/drivers/dax/hmem/device.c
> @@ -8,7 +8,6 @@
>  static bool nohmem;
>  module_param_named(disable, nohmem, bool, 0444);
>  
> -static bool platform_initialized;
>  static DEFINE_MUTEX(hmem_resource_lock);
>  static struct resource hmem_active = {
>  	.name = "HMEM devices",
> @@ -35,9 +34,7 @@ EXPORT_SYMBOL_GPL(walk_hmem_resources);
>  
>  static void __hmem_register_resource(int target_nid, struct resource *res)
>  {
> -	struct platform_device *pdev;
>  	struct resource *new;
> -	int rc;
>  
>  	new = __request_region(&hmem_active, res->start, resource_size(res), "",
>  			       0);
> @@ -47,21 +44,6 @@ static void __hmem_register_resource(int target_nid, struct resource *res)
>  	}
>  
>  	new->desc = target_nid;
> -
> -	if (platform_initialized)
> -		return;
> -
> -	pdev = platform_device_alloc("hmem_platform", 0);
> -	if (!pdev) {
> -		pr_err_once("failed to register device-dax hmem_platform device\n");
> -		return;
> -	}
> -
> -	rc = platform_device_add(pdev);
> -	if (rc)
> -		platform_device_put(pdev);
> -	else
> -		platform_initialized = true;
>  }
>  
>  void hmem_register_resource(int target_nid, struct resource *res)
> @@ -83,9 +65,28 @@ static __init int hmem_register_one(struct resource *res, void *data)
>  
>  static __init int hmem_init(void)
>  {
> -	walk_iomem_res_desc(IORES_DESC_SOFT_RESERVED,
> -			IORESOURCE_MEM, 0, -1, NULL, hmem_register_one);
> -	return 0;
> +	struct platform_device *pdev;
> +	int rc;
> +
> +	if (!IS_ENABLED(CONFIG_CXL_ACPI)) {
> +		walk_iomem_res_desc(IORES_DESC_SOFT_RESERVED,
> +				    IORESOURCE_MEM, 0, -1, NULL,
> +				    hmem_register_one);
> +	}
> +
> +	pdev = platform_device_alloc("hmem_platform", 0);
> +	if (!pdev) {
> +		pr_err("failed to register device-dax hmem_platform device\n");
> +		return -1;
> +	}
> +
> +	rc = platform_device_add(pdev);

platform_device_register_simple("hmem_platform", -1, NULL, 0); or something like
that?  There are quite a few variants of platform_device_register to cover
simple cases.


> +	if (rc) {
> +		pr_err("failed to add device-dax hmem_platform device\n");
> +		platform_device_put(pdev);
> +	}
> +
> +	return rc;
>  }
>  
>  /*
Zhijian Li (Fujitsu) June 13, 2025, 2:12 a.m. UTC | #2
Hi Smita, Nathan, Terry

I am struggling to understand if this patch is truly necessary, or if I haven't
fully grasped the scenario where it provides value. Without applying this patch
on a QEMU/VM with both HMEM and CXL.mem installed, I observed no issues. (Are there
specific config options required to reproduce the problem?)

Here is the /proc/iomem without the patch:
180000000-1ffffffff : Soft Reserved  ### 2 hmem nodes
   180000000-1bfffffff : dax1.0
     180000000-1bfffffff : System RAM (kmem)
   1c0000000-1ffffffff : dax2.0
     1c0000000-1ffffffff : System RAM (kmem)
5c0001128-5c00011b7 : port1
5d0000000-64fffffff : CXL Window 0  ### 1 CXL node
   5d0000000-64fffffff : region0
     5d0000000-64fffffff : dax0.0
       5d0000000-64fffffff : System RAM (kmem)

On 04/06/2025 06:19, Smita Koralahalli wrote:
> From: Nathan Fontenot <nathan.fontenot@amd.com>
> 
> The DAX HMEM driver currently consumes all SOFT RESERVED iomem resources
> during initialization. This interferes with the CXL driver’s ability to
> create regions and trim overlapping SOFT RESERVED ranges before DAX uses
> them.

When referring to "HMEM driver" in the commit message, is it
`dax_hmem_platform_driver` or `dax_hmem_driver`? Regardless of which,
what is the impact if one consumes all SOFT RESERVED resources?

Since `hmem_register_device()` only creates HMEM devices for ranges
*without* `IORES_DESC_CXL` which could be marked in cxl_acpi , cxl_core/cxl_dax
should still create regions and DAX devices without conflicts.

> To resolve this, defer the DAX driver's resource consumption if the
> cxl_acpi driver is enabled. The DAX HMEM initialization skips walking the
> iomem resource tree in this case. After CXL region creation completes,
> any remaining SOFT RESERVED resources are explicitly registered with the
> DAX driver by the CXL driver.

Conversely, with this patch applied, `cxl_region_softreserv_update()` attempts
to register new HMEM devices. This may cause duplicate registrations for the
  same range (e.g., 0x180000000-0x1ffffffff), triggering warnings like:

[   14.984108] kmem dax4.0: mapping0: 0x180000000-0x1ffffffff could not reserve region
[   14.987204] kmem dax4.0: probe with driver kmem failed with error -16

Because the HMAT initialization already registered these sub-ranges:
   180000000-1bfffffff
   1c0000000-1ffffffff


If I'm missing something, please correct me.

Thanks,
Zhijian



> 
> This sequencing ensures proper handling of overlaps and fixes hotplug
> failures.
> 
> Co-developed-by: Nathan Fontenot <Nathan.Fontenot@amd.com>
> Signed-off-by: Nathan Fontenot <Nathan.Fontenot@amd.com>
> Co-developed-by: Terry Bowman <terry.bowman@amd.com>
> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
> Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
> ---
>   drivers/cxl/core/region.c | 10 +++++++++
>   drivers/dax/hmem/device.c | 43 ++++++++++++++++++++-------------------
>   drivers/dax/hmem/hmem.c   |  3 ++-
>   include/linux/dax.h       |  6 ++++++
>   4 files changed, 40 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 3a5ca44d65f3..c6c0c7ba3b20 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -10,6 +10,7 @@
>   #include <linux/sort.h>
>   #include <linux/idr.h>
>   #include <linux/memory-tiers.h>
> +#include <linux/dax.h>
>   #include <cxlmem.h>
>   #include <cxl.h>
>   #include "core.h"
> @@ -3553,6 +3554,11 @@ static struct resource *normalize_resource(struct resource *res)
>   	return NULL;
>   }
>   
> +static int cxl_softreserv_mem_register(struct resource *res, void *unused)
> +{
> +	return hmem_register_device(phys_to_target_node(res->start), res);
> +}
> +
>   static int __cxl_region_softreserv_update(struct resource *soft,
>   					  void *_cxlr)
>   {
> @@ -3590,6 +3596,10 @@ int cxl_region_softreserv_update(void)
>   				    __cxl_region_softreserv_update);
>   	}
>   
> +	/* Now register any remaining SOFT RESERVES with DAX */
> +	walk_iomem_res_desc(IORES_DESC_SOFT_RESERVED, IORESOURCE_MEM,
> +			    0, -1, NULL, cxl_softreserv_mem_register);
> +
>   	return 0;
>   }
>   EXPORT_SYMBOL_NS_GPL(cxl_region_softreserv_update, "CXL");
> diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c
> index 59ad44761191..cc1ed7bbdb1a 100644
> --- a/drivers/dax/hmem/device.c
> +++ b/drivers/dax/hmem/device.c
> @@ -8,7 +8,6 @@
>   static bool nohmem;
>   module_param_named(disable, nohmem, bool, 0444);
>   
> -static bool platform_initialized;
>   static DEFINE_MUTEX(hmem_resource_lock);
>   static struct resource hmem_active = {
>   	.name = "HMEM devices",
> @@ -35,9 +34,7 @@ EXPORT_SYMBOL_GPL(walk_hmem_resources);
>   
>   static void __hmem_register_resource(int target_nid, struct resource *res)
>   {
> -	struct platform_device *pdev;
>   	struct resource *new;
> -	int rc;
>   
>   	new = __request_region(&hmem_active, res->start, resource_size(res), "",
>   			       0);
> @@ -47,21 +44,6 @@ static void __hmem_register_resource(int target_nid, struct resource *res)
>   	}
>   
>   	new->desc = target_nid;
> -
> -	if (platform_initialized)
> -		return;
> -
> -	pdev = platform_device_alloc("hmem_platform", 0);
> -	if (!pdev) {
> -		pr_err_once("failed to register device-dax hmem_platform device\n");
> -		return;
> -	}
> -
> -	rc = platform_device_add(pdev);
> -	if (rc)
> -		platform_device_put(pdev);
> -	else
> -		platform_initialized = true;
>   }
>   
>   void hmem_register_resource(int target_nid, struct resource *res)
> @@ -83,9 +65,28 @@ static __init int hmem_register_one(struct resource *res, void *data)
>   
>   static __init int hmem_init(void)
>   {
> -	walk_iomem_res_desc(IORES_DESC_SOFT_RESERVED,
> -			IORESOURCE_MEM, 0, -1, NULL, hmem_register_one);
> -	return 0;
> +	struct platform_device *pdev;
> +	int rc;
> +
> +	if (!IS_ENABLED(CONFIG_CXL_ACPI)) {
> +		walk_iomem_res_desc(IORES_DESC_SOFT_RESERVED,
> +				    IORESOURCE_MEM, 0, -1, NULL,
> +				    hmem_register_one);
> +	}
> +
> +	pdev = platform_device_alloc("hmem_platform", 0);
> +	if (!pdev) {
> +		pr_err("failed to register device-dax hmem_platform device\n");
> +		return -1;
> +	}
> +
> +	rc = platform_device_add(pdev);
> +	if (rc) {
> +		pr_err("failed to add device-dax hmem_platform device\n");
> +		platform_device_put(pdev);
> +	}
> +
> +	return rc;
>   }
>   
>   /*
> diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c
> index 3aedef5f1be1..a206b9b383e4 100644
> --- a/drivers/dax/hmem/hmem.c
> +++ b/drivers/dax/hmem/hmem.c
> @@ -61,7 +61,7 @@ static void release_hmem(void *pdev)
>   	platform_device_unregister(pdev);
>   }
>   
> -static int hmem_register_device(int target_nid, const struct resource *res)
> +int hmem_register_device(int target_nid, const struct resource *res)
>   {
>   	struct device *host = &dax_hmem_pdev->dev;
>   	struct platform_device *pdev;
> @@ -124,6 +124,7 @@ static int hmem_register_device(int target_nid, const struct resource *res)
>   	platform_device_put(pdev);
>   	return rc;
>   }
> +EXPORT_SYMBOL_GPL(hmem_register_device);
>   
>   static int dax_hmem_platform_probe(struct platform_device *pdev)
>   {
> diff --git a/include/linux/dax.h b/include/linux/dax.h
> index a4ad3708ea35..5052dca8b3bc 100644
> --- a/include/linux/dax.h
> +++ b/include/linux/dax.h
> @@ -299,10 +299,16 @@ static inline int dax_mem2blk_err(int err)
>   
>   #ifdef CONFIG_DEV_DAX_HMEM_DEVICES
>   void hmem_register_resource(int target_nid, struct resource *r);
> +int hmem_register_device(int target_nid, const struct resource *res);
>   #else
>   static inline void hmem_register_resource(int target_nid, struct resource *r)
>   {
>   }
> +
> +static inline int hmem_register_device(int target_nid, const struct resource *res)
> +{
> +	return 0;
> +}
>   #endif
>   
>   typedef int (*walk_hmem_fn)(int target_nid, const struct resource *res);
diff mbox series

Patch

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 3a5ca44d65f3..c6c0c7ba3b20 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -10,6 +10,7 @@ 
 #include <linux/sort.h>
 #include <linux/idr.h>
 #include <linux/memory-tiers.h>
+#include <linux/dax.h>
 #include <cxlmem.h>
 #include <cxl.h>
 #include "core.h"
@@ -3553,6 +3554,11 @@  static struct resource *normalize_resource(struct resource *res)
 	return NULL;
 }
 
+static int cxl_softreserv_mem_register(struct resource *res, void *unused)
+{
+	return hmem_register_device(phys_to_target_node(res->start), res);
+}
+
 static int __cxl_region_softreserv_update(struct resource *soft,
 					  void *_cxlr)
 {
@@ -3590,6 +3596,10 @@  int cxl_region_softreserv_update(void)
 				    __cxl_region_softreserv_update);
 	}
 
+	/* Now register any remaining SOFT RESERVES with DAX */
+	walk_iomem_res_desc(IORES_DESC_SOFT_RESERVED, IORESOURCE_MEM,
+			    0, -1, NULL, cxl_softreserv_mem_register);
+
 	return 0;
 }
 EXPORT_SYMBOL_NS_GPL(cxl_region_softreserv_update, "CXL");
diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c
index 59ad44761191..cc1ed7bbdb1a 100644
--- a/drivers/dax/hmem/device.c
+++ b/drivers/dax/hmem/device.c
@@ -8,7 +8,6 @@ 
 static bool nohmem;
 module_param_named(disable, nohmem, bool, 0444);
 
-static bool platform_initialized;
 static DEFINE_MUTEX(hmem_resource_lock);
 static struct resource hmem_active = {
 	.name = "HMEM devices",
@@ -35,9 +34,7 @@  EXPORT_SYMBOL_GPL(walk_hmem_resources);
 
 static void __hmem_register_resource(int target_nid, struct resource *res)
 {
-	struct platform_device *pdev;
 	struct resource *new;
-	int rc;
 
 	new = __request_region(&hmem_active, res->start, resource_size(res), "",
 			       0);
@@ -47,21 +44,6 @@  static void __hmem_register_resource(int target_nid, struct resource *res)
 	}
 
 	new->desc = target_nid;
-
-	if (platform_initialized)
-		return;
-
-	pdev = platform_device_alloc("hmem_platform", 0);
-	if (!pdev) {
-		pr_err_once("failed to register device-dax hmem_platform device\n");
-		return;
-	}
-
-	rc = platform_device_add(pdev);
-	if (rc)
-		platform_device_put(pdev);
-	else
-		platform_initialized = true;
 }
 
 void hmem_register_resource(int target_nid, struct resource *res)
@@ -83,9 +65,28 @@  static __init int hmem_register_one(struct resource *res, void *data)
 
 static __init int hmem_init(void)
 {
-	walk_iomem_res_desc(IORES_DESC_SOFT_RESERVED,
-			IORESOURCE_MEM, 0, -1, NULL, hmem_register_one);
-	return 0;
+	struct platform_device *pdev;
+	int rc;
+
+	if (!IS_ENABLED(CONFIG_CXL_ACPI)) {
+		walk_iomem_res_desc(IORES_DESC_SOFT_RESERVED,
+				    IORESOURCE_MEM, 0, -1, NULL,
+				    hmem_register_one);
+	}
+
+	pdev = platform_device_alloc("hmem_platform", 0);
+	if (!pdev) {
+		pr_err("failed to register device-dax hmem_platform device\n");
+		return -1;
+	}
+
+	rc = platform_device_add(pdev);
+	if (rc) {
+		pr_err("failed to add device-dax hmem_platform device\n");
+		platform_device_put(pdev);
+	}
+
+	return rc;
 }
 
 /*
diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c
index 3aedef5f1be1..a206b9b383e4 100644
--- a/drivers/dax/hmem/hmem.c
+++ b/drivers/dax/hmem/hmem.c
@@ -61,7 +61,7 @@  static void release_hmem(void *pdev)
 	platform_device_unregister(pdev);
 }
 
-static int hmem_register_device(int target_nid, const struct resource *res)
+int hmem_register_device(int target_nid, const struct resource *res)
 {
 	struct device *host = &dax_hmem_pdev->dev;
 	struct platform_device *pdev;
@@ -124,6 +124,7 @@  static int hmem_register_device(int target_nid, const struct resource *res)
 	platform_device_put(pdev);
 	return rc;
 }
+EXPORT_SYMBOL_GPL(hmem_register_device);
 
 static int dax_hmem_platform_probe(struct platform_device *pdev)
 {
diff --git a/include/linux/dax.h b/include/linux/dax.h
index a4ad3708ea35..5052dca8b3bc 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -299,10 +299,16 @@  static inline int dax_mem2blk_err(int err)
 
 #ifdef CONFIG_DEV_DAX_HMEM_DEVICES
 void hmem_register_resource(int target_nid, struct resource *r);
+int hmem_register_device(int target_nid, const struct resource *res);
 #else
 static inline void hmem_register_resource(int target_nid, struct resource *r)
 {
 }
+
+static inline int hmem_register_device(int target_nid, const struct resource *res)
+{
+	return 0;
+}
 #endif
 
 typedef int (*walk_hmem_fn)(int target_nid, const struct resource *res);