diff mbox series

cxl/mem: Disable suspend

Message ID 164894751774.951952.9428402449668442020.stgit@dwillia2-desk3.amr.corp.intel.com
State New
Headers show
Series cxl/mem: Disable suspend | expand

Commit Message

Dan Williams April 3, 2022, 12:58 a.m. UTC
The CXL specification claims S3 support at a hardware level, but at a
system software level there are some missing pieces. Section 9.4 rightly
claims that "CXL mem adapters may need aux power to retain memory
context across S3", but there is no enumeration mechanism for the OS to
determine if a given adapter has that support. Moreover the save state
and resume image for the system may inadvertantly end up in a CXL device
that needs to be restored before the save state is recoverable. I.e. a
circular dependency that is not resolvable without a third party
save-area.

Arrange for the cxl_mem driver to fail S3 attempts. This still nominaly
allows for suspend, but requires unbinding all CXL memory devices before
the suspend to ensure the typical DRAM flow is taken. The cxl_mem unbind
flow is intended to also tear down all CXL memory regions associated
with a given cxl_memdev.

It is reasonable to assume that any device participating in a System RAM
range published in the EFI memory map is covered by aux power and
save-area outside the device itself. So this restriction can be
minimized in the future once pre-existing region enumeration support
arrives, and perhaps a spec update to clarify if the EFI memory is
sufficent for determining the range of devices managed by
platform-firmware for S3 support.

Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/core/memdev.c |    1 -
 drivers/cxl/mem.c         |   26 ++++++++++++++++++++++++++
 2 files changed, 26 insertions(+), 1 deletion(-)

Comments

Rafael J. Wysocki April 4, 2022, 3:59 p.m. UTC | #1
On Sun, Apr 3, 2022 at 2:58 AM Dan Williams <dan.j.williams@intel.com> wrote:
>
> The CXL specification claims S3 support at a hardware level, but at a
> system software level there are some missing pieces. Section 9.4 rightly
> claims that "CXL mem adapters may need aux power to retain memory
> context across S3", but there is no enumeration mechanism for the OS to
> determine if a given adapter has that support. Moreover the save state
> and resume image for the system may inadvertantly end up in a CXL device
> that needs to be restored before the save state is recoverable. I.e. a
> circular dependency that is not resolvable without a third party
> save-area.
>
> Arrange for the cxl_mem driver to fail S3 attempts. This still nominaly
> allows for suspend, but requires unbinding all CXL memory devices before
> the suspend to ensure the typical DRAM flow is taken. The cxl_mem unbind
> flow is intended to also tear down all CXL memory regions associated
> with a given cxl_memdev.
>
> It is reasonable to assume that any device participating in a System RAM
> range published in the EFI memory map is covered by aux power and
> save-area outside the device itself. So this restriction can be
> minimized in the future once pre-existing region enumeration support
> arrives, and perhaps a spec update to clarify if the EFI memory is
> sufficent for determining the range of devices managed by
> platform-firmware for S3 support.
>
> Cc: "Rafael J. Wysocki" <rafael@kernel.org>
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>

A few thoughts:

1. I don't think it is necessary to fail suspend-to-idle too (which
the driver will do after the patch AFAICS).
2. Should hibernation fail too?  From the description above it looks
like that should be the case.
3. If "deep"suspend is going to fail every time, it may be better to
prevent "deep" from being written to /sys/power/mem_sleep instead of
failing suspend in progress, especially after freezing user space.

> ---
>  drivers/cxl/core/memdev.c |    1 -
>  drivers/cxl/mem.c         |   26 ++++++++++++++++++++++++++
>  2 files changed, 26 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
> index 1f76b28f9826..efe4d2e9bfef 100644
> --- a/drivers/cxl/core/memdev.c
> +++ b/drivers/cxl/core/memdev.c
> @@ -251,7 +251,6 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
>         dev->bus = &cxl_bus_type;
>         dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
>         dev->type = &cxl_memdev_type;
> -       device_set_pm_not_required(dev);
>         INIT_WORK(&cxlmd->detach_work, detach_memdev);
>
>         cdev = &cxlmd->cdev;
> diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
> index 49a4b1c47299..0660bb1488cb 100644
> --- a/drivers/cxl/mem.c
> +++ b/drivers/cxl/mem.c
> @@ -3,6 +3,7 @@
>  #include <linux/device.h>
>  #include <linux/module.h>
>  #include <linux/pci.h>
> +#include <linux/pm.h>
>
>  #include "cxlmem.h"
>  #include "cxlpci.h"
> @@ -210,10 +211,35 @@ static int cxl_mem_probe(struct device *dev)
>         return rc;
>  }
>
> +static int cxl_mem_suspend(struct device *dev)
> +{
> +       /*
> +        * The kernel may be operating out of CXL memory on this device,
> +        * there is no spec defined way to determine whether this device
> +        * preserves contents over suspend, and there is no simple way
> +        * to arrange for the suspend image to avoid CXL memory which
> +        * would setup a circular dependency between PCI resume and save
> +        * state restoration.
> +        */
> +       dev_err(dev, "CXL memory suspend not supported\n");
> +       return -EBUSY;
> +}
> +
> +static int cxl_mem_resume(struct device *dev)
> +{
> +       /* nothing to do since suspend is prevented */
> +       return 0;
> +}

This is not needed AFAICS.

> +
> +static DEFINE_SIMPLE_DEV_PM_OPS(cxl_pm_ops, cxl_mem_suspend, cxl_mem_resume);
> +
>  static struct cxl_driver cxl_mem_driver = {
>         .name = "cxl_mem",
>         .probe = cxl_mem_probe,
>         .id = CXL_DEVICE_MEMORY_EXPANDER,
> +       .drv = {
> +               .pm = &cxl_pm_ops,
> +       },
>  };
>
>  module_cxl_driver(cxl_mem_driver);
>
Dan Williams April 4, 2022, 6:16 p.m. UTC | #2
On Mon, Apr 4, 2022 at 9:00 AM Rafael J. Wysocki <rafael@kernel.org> wrote:
>
> On Sun, Apr 3, 2022 at 2:58 AM Dan Williams <dan.j.williams@intel.com> wrote:
> >
> > The CXL specification claims S3 support at a hardware level, but at a
> > system software level there are some missing pieces. Section 9.4 rightly
> > claims that "CXL mem adapters may need aux power to retain memory
> > context across S3", but there is no enumeration mechanism for the OS to
> > determine if a given adapter has that support. Moreover the save state
> > and resume image for the system may inadvertantly end up in a CXL device
> > that needs to be restored before the save state is recoverable. I.e. a
> > circular dependency that is not resolvable without a third party
> > save-area.
> >
> > Arrange for the cxl_mem driver to fail S3 attempts. This still nominaly
> > allows for suspend, but requires unbinding all CXL memory devices before
> > the suspend to ensure the typical DRAM flow is taken. The cxl_mem unbind
> > flow is intended to also tear down all CXL memory regions associated
> > with a given cxl_memdev.
> >
> > It is reasonable to assume that any device participating in a System RAM
> > range published in the EFI memory map is covered by aux power and
> > save-area outside the device itself. So this restriction can be
> > minimized in the future once pre-existing region enumeration support
> > arrives, and perhaps a spec update to clarify if the EFI memory is
> > sufficent for determining the range of devices managed by
> > platform-firmware for S3 support.
> >
> > Cc: "Rafael J. Wysocki" <rafael@kernel.org>
> > Signed-off-by: Dan Williams <dan.j.williams@intel.com>
>
> A few thoughts:
>
> 1. I don't think it is necessary to fail suspend-to-idle too (which
> the driver will do after the patch AFAICS).

Ah true, I missed that this would also disable suspend to idle.

> 2. Should hibernation fail too?  From the description above it looks
> like that should be the case.

Yes, any CXL address range that was provisioned by the OS would need
some off-device save area for the device-state which seems difficult
to support in the general case.

> 3. If "deep"suspend is going to fail every time, it may be better to
> prevent "deep" from being written to /sys/power/mem_sleep instead of
> failing suspend in progress, especially after freezing user space.

Yeah, that sounds much better, let me explore that option.

>
> > ---
> >  drivers/cxl/core/memdev.c |    1 -
> >  drivers/cxl/mem.c         |   26 ++++++++++++++++++++++++++
> >  2 files changed, 26 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
> > index 1f76b28f9826..efe4d2e9bfef 100644
> > --- a/drivers/cxl/core/memdev.c
> > +++ b/drivers/cxl/core/memdev.c
> > @@ -251,7 +251,6 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
> >         dev->bus = &cxl_bus_type;
> >         dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
> >         dev->type = &cxl_memdev_type;
> > -       device_set_pm_not_required(dev);
> >         INIT_WORK(&cxlmd->detach_work, detach_memdev);
> >
> >         cdev = &cxlmd->cdev;
> > diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
> > index 49a4b1c47299..0660bb1488cb 100644
> > --- a/drivers/cxl/mem.c
> > +++ b/drivers/cxl/mem.c
> > @@ -3,6 +3,7 @@
> >  #include <linux/device.h>
> >  #include <linux/module.h>
> >  #include <linux/pci.h>
> > +#include <linux/pm.h>
> >
> >  #include "cxlmem.h"
> >  #include "cxlpci.h"
> > @@ -210,10 +211,35 @@ static int cxl_mem_probe(struct device *dev)
> >         return rc;
> >  }
> >
> > +static int cxl_mem_suspend(struct device *dev)
> > +{
> > +       /*
> > +        * The kernel may be operating out of CXL memory on this device,
> > +        * there is no spec defined way to determine whether this device
> > +        * preserves contents over suspend, and there is no simple way
> > +        * to arrange for the suspend image to avoid CXL memory which
> > +        * would setup a circular dependency between PCI resume and save
> > +        * state restoration.
> > +        */
> > +       dev_err(dev, "CXL memory suspend not supported\n");
> > +       return -EBUSY;
> > +}
> > +
> > +static int cxl_mem_resume(struct device *dev)
> > +{
> > +       /* nothing to do since suspend is prevented */
> > +       return 0;
> > +}
>
> This is not needed AFAICS.

Ok, I should have checked, but I'll circle back with sleep state
disabling rather than failing suspend.

Thanks, Rafael.
Rafael J. Wysocki April 4, 2022, 6:24 p.m. UTC | #3
On Mon, Apr 4, 2022 at 8:16 PM Dan Williams <dan.j.williams@intel.com> wrote:
>
> On Mon, Apr 4, 2022 at 9:00 AM Rafael J. Wysocki <rafael@kernel.org> wrote:
> >
> > On Sun, Apr 3, 2022 at 2:58 AM Dan Williams <dan.j.williams@intel.com> wrote:
> > >
> > > The CXL specification claims S3 support at a hardware level, but at a
> > > system software level there are some missing pieces. Section 9.4 rightly
> > > claims that "CXL mem adapters may need aux power to retain memory
> > > context across S3", but there is no enumeration mechanism for the OS to
> > > determine if a given adapter has that support. Moreover the save state
> > > and resume image for the system may inadvertantly end up in a CXL device
> > > that needs to be restored before the save state is recoverable. I.e. a
> > > circular dependency that is not resolvable without a third party
> > > save-area.
> > >
> > > Arrange for the cxl_mem driver to fail S3 attempts. This still nominaly
> > > allows for suspend, but requires unbinding all CXL memory devices before
> > > the suspend to ensure the typical DRAM flow is taken. The cxl_mem unbind
> > > flow is intended to also tear down all CXL memory regions associated
> > > with a given cxl_memdev.
> > >
> > > It is reasonable to assume that any device participating in a System RAM
> > > range published in the EFI memory map is covered by aux power and
> > > save-area outside the device itself. So this restriction can be
> > > minimized in the future once pre-existing region enumeration support
> > > arrives, and perhaps a spec update to clarify if the EFI memory is
> > > sufficent for determining the range of devices managed by
> > > platform-firmware for S3 support.
> > >
> > > Cc: "Rafael J. Wysocki" <rafael@kernel.org>
> > > Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> >
> > A few thoughts:
> >
> > 1. I don't think it is necessary to fail suspend-to-idle too (which
> > the driver will do after the patch AFAICS).
>
> Ah true, I missed that this would also disable suspend to idle.
>
> > 2. Should hibernation fail too?  From the description above it looks
> > like that should be the case.
>
> Yes, any CXL address range that was provisioned by the OS would need
> some off-device save area for the device-state which seems difficult
> to support in the general case.

So it should cause errors to be returned from the hibernation path
too, ideally before the freezing of tasks.

> > 3. If "deep"suspend is going to fail every time, it may be better to
> > prevent "deep" from being written to /sys/power/mem_sleep instead of
> > failing suspend in progress, especially after freezing user space.
>
> Yeah, that sounds much better, let me explore that option.
Dan Williams April 5, 2022, 8:22 p.m. UTC | #4
On Tue, Apr 5, 2022 at 1:05 PM Brown, Len <len.brown@intel.com> wrote:
>
> Is it possible for ACPI to be aware that CXL on that system doesn't support S3, and for ACPI to thus simply not export S3?

The ACPI domain has a chance to be aware of CXL devices attached at
boot and were configured into the System Memory Map.

However, there may be caveats that limit device visibility like
platform firmware only scans CXL devices directly attached to root
ports, not behind switches.

It would be welcome if there was an ACPI data structure to indicate
"device-X is in scope for S3" which would mean that it has aux power
and all HDM decoders (the registers that enable access to CXL memory)
are restored by the BIOS before OS resume begins.

Absent that the OS can only assume that the BIOS has no knowledge of
any HDM configurations that were defined at run time by the OS, like
after hotplug, or configurations that the OS rewrote by triggering a
secondary bus reset to unlock what the BIOS had locked and put into
the EFI memory map.
diff mbox series

Patch

diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index 1f76b28f9826..efe4d2e9bfef 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -251,7 +251,6 @@  static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
 	dev->bus = &cxl_bus_type;
 	dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
 	dev->type = &cxl_memdev_type;
-	device_set_pm_not_required(dev);
 	INIT_WORK(&cxlmd->detach_work, detach_memdev);
 
 	cdev = &cxlmd->cdev;
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index 49a4b1c47299..0660bb1488cb 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -3,6 +3,7 @@ 
 #include <linux/device.h>
 #include <linux/module.h>
 #include <linux/pci.h>
+#include <linux/pm.h>
 
 #include "cxlmem.h"
 #include "cxlpci.h"
@@ -210,10 +211,35 @@  static int cxl_mem_probe(struct device *dev)
 	return rc;
 }
 
+static int cxl_mem_suspend(struct device *dev)
+{
+	/*
+	 * The kernel may be operating out of CXL memory on this device,
+	 * there is no spec defined way to determine whether this device
+	 * preserves contents over suspend, and there is no simple way
+	 * to arrange for the suspend image to avoid CXL memory which
+	 * would setup a circular dependency between PCI resume and save
+	 * state restoration.
+	 */
+	dev_err(dev, "CXL memory suspend not supported\n");
+	return -EBUSY;
+}
+
+static int cxl_mem_resume(struct device *dev)
+{
+	/* nothing to do since suspend is prevented */
+	return 0;
+}
+
+static DEFINE_SIMPLE_DEV_PM_OPS(cxl_pm_ops, cxl_mem_suspend, cxl_mem_resume);
+
 static struct cxl_driver cxl_mem_driver = {
 	.name = "cxl_mem",
 	.probe = cxl_mem_probe,
 	.id = CXL_DEVICE_MEMORY_EXPANDER,
+	.drv = {
+		.pm = &cxl_pm_ops,
+	},
 };
 
 module_cxl_driver(cxl_mem_driver);