diff mbox series

virtio-iommu: Default to bypass during boot

Message ID 20210218105929.1433230-1-jean-philippe@linaro.org
State New
Headers show
Series virtio-iommu: Default to bypass during boot | expand

Commit Message

Jean-Philippe Brucker Feb. 18, 2021, 10:59 a.m. UTC
Currently the virtio-iommu device must be programmed before it allows
DMA from any PCI device. This can make the VM entirely unusable when a
virtio-iommu driver isn't present, for example in a bootloader that
loads the OS from storage.

Similarly to the other vIOMMU implementations, default to DMA bypassing
the IOMMU during boot. Add a "boot-bypass" option that lets users change
this behavior.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>

---
 include/hw/virtio/virtio-iommu.h |  1 +
 hw/virtio/virtio-iommu.c         | 23 +++++++++++++++++++++--
 2 files changed, 22 insertions(+), 2 deletions(-)

-- 
2.30.0

Comments

Michael S. Tsirkin Feb. 21, 2021, 11:45 a.m. UTC | #1
On Thu, Feb 18, 2021 at 11:59:30AM +0100, Jean-Philippe Brucker wrote:
> Currently the virtio-iommu device must be programmed before it allows

> DMA from any PCI device. This can make the VM entirely unusable when a

> virtio-iommu driver isn't present, for example in a bootloader that

> loads the OS from storage.

> 

> Similarly to the other vIOMMU implementations, default to DMA bypassing

> the IOMMU during boot. Add a "boot-bypass" option that lets users change

> this behavior.

> 

> Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>


wouldn't this be a spec violation?


When the device is reset, endpoints are not attached to any domain.

If the VIRTIO_IOMMU_F_BYPASS feature is negotiated, all accesses from
unattached endpoints are allowed and translated by the IOMMU using the
identity function. If the feature is not negotiated, any memory access
from an unattached endpoint fails. Upon attaching an endpoint in
bypass mode to a new domain, any memory access from the endpoint fails,
since the domain does not contain any mapping.


Maybe it's not too late to change the spec here - want to try sending
a spec patch?




> ---

>  include/hw/virtio/virtio-iommu.h |  1 +

>  hw/virtio/virtio-iommu.c         | 23 +++++++++++++++++++++--

>  2 files changed, 22 insertions(+), 2 deletions(-)

> 

> diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h

> index 273e35c04bc..4c66989ca4e 100644

> --- a/include/hw/virtio/virtio-iommu.h

> +++ b/include/hw/virtio/virtio-iommu.h

> @@ -58,6 +58,7 @@ struct VirtIOIOMMU {

>      GTree *domains;

>      QemuMutex mutex;

>      GTree *endpoints;

> +    bool boot_bypass;

>  };

>  

>  #endif

> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c

> index c2883a2f6c8..cd08dc39eca 100644

> --- a/hw/virtio/virtio-iommu.c

> +++ b/hw/virtio/virtio-iommu.c

> @@ -689,6 +689,25 @@ static void virtio_iommu_report_fault(VirtIOIOMMU *viommu, uint8_t reason,

>  

>  }

>  

> +static bool virtio_iommu_bypass_is_allowed(VirtIOIOMMU *s)

> +{

> +    VirtIODevice *vdev = &s->parent_obj;

> +

> +    /*

> +     * Allow bypass if:

> +     * - boot_bypass is enabled and the BYPASS feature hasn't yet been

> +     *   acknowledged.

> +     * - the BYPASS feature has been negotiated.

> +     */

> +    if (s->boot_bypass && !(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK)) {

> +        return true;

> +    }

> +    if (virtio_vdev_has_feature(vdev, VIRTIO_IOMMU_F_BYPASS)) {

> +        return true;

> +    }

> +    return false;

> +}

> +

>  static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr,

>                                              IOMMUAccessFlags flag,

>                                              int iommu_idx)

> @@ -715,8 +734,7 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr,

>          .perm = IOMMU_NONE,

>      };

>  

> -    bypass_allowed = virtio_vdev_has_feature(&s->parent_obj,

> -                                             VIRTIO_IOMMU_F_BYPASS);

> +    bypass_allowed = virtio_iommu_bypass_is_allowed(s);

>  

>      sid = virtio_iommu_get_bdf(sdev);

>  

> @@ -1156,6 +1174,7 @@ static const VMStateDescription vmstate_virtio_iommu = {

>  

>  static Property virtio_iommu_properties[] = {

>      DEFINE_PROP_LINK("primary-bus", VirtIOIOMMU, primary_bus, "PCI", PCIBus *),

> +    DEFINE_PROP_BOOL("boot-bypass", VirtIOIOMMU, boot_bypass, true),

>      DEFINE_PROP_END_OF_LIST(),

>  };

>  

> -- 

> 2.30.0
Jean-Philippe Brucker Feb. 25, 2021, 5:13 p.m. UTC | #2
On Sun, Feb 21, 2021 at 06:45:18AM -0500, Michael S. Tsirkin wrote:
> On Thu, Feb 18, 2021 at 11:59:30AM +0100, Jean-Philippe Brucker wrote:

> > Currently the virtio-iommu device must be programmed before it allows

> > DMA from any PCI device. This can make the VM entirely unusable when a

> > virtio-iommu driver isn't present, for example in a bootloader that

> > loads the OS from storage.

> > 

> > Similarly to the other vIOMMU implementations, default to DMA bypassing

> > the IOMMU during boot. Add a "boot-bypass" option that lets users change

> > this behavior.

> > 

> > Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>

> 

> wouldn't this be a spec violation?

> 

> 

> When the device is reset, endpoints are not attached to any domain.

> 

> If the VIRTIO_IOMMU_F_BYPASS feature is negotiated, all accesses from

> unattached endpoints are allowed and translated by the IOMMU using the

> identity function. If the feature is not negotiated, any memory access

> from an unattached endpoint fails. Upon attaching an endpoint in

> bypass mode to a new domain, any memory access from the endpoint fails,

> since the domain does not contain any mapping.


Right, but the device behavior regarding BYPASS is specified as:

  If the driver does not accept the VIRTIO_IOMMU_F_BYPASS feature, the
  device SHOULD NOT let endpoints access the guest-physical address space.

So I figured that the spec could be read as "before feature negotiation
it's undefined whether the IOMMU is bypassed or not" and so a device
implementation can choose either (previously discussed at [1]). But it's a
stretch, we should clarify this.

Also, the OS might want to know whether DMA was bypassing the IOMMU before
the device is initialized. If there are strong security requirements, then
boot-bypass means the system was vulnerable to DMA attacks during boot.

So I'd like to add a new feature bit for this, VIRTIO_IOMMU_F_BOOT_BYPASS,
that tells whether DMA bypasses the IOMMU before feature negotiation. It's
only an indicator, accepting the feature doesn't change anything. I'll
send the spec change shortly.

Thanks,
Jean

[1] https://lore.kernel.org/qemu-devel/20200109104032.GA937123@myrica/

> Maybe it's not too late to change the spec here - want to try sending

> a spec patch?
Tian, Kevin Feb. 26, 2021, 8:11 a.m. UTC | #3
> From: Qemu-devel <qemu-devel-bounces+kevin.tian=intel.com@nongnu.org>

> On Behalf Of Jean-Philippe Brucker

> 

> On Sun, Feb 21, 2021 at 06:45:18AM -0500, Michael S. Tsirkin wrote:

> > On Thu, Feb 18, 2021 at 11:59:30AM +0100, Jean-Philippe Brucker wrote:

> > > Currently the virtio-iommu device must be programmed before it allows

> > > DMA from any PCI device. This can make the VM entirely unusable when

> a

> > > virtio-iommu driver isn't present, for example in a bootloader that

> > > loads the OS from storage.

> > >

> > > Similarly to the other vIOMMU implementations, default to DMA

> bypassing

> > > the IOMMU during boot. Add a "boot-bypass" option that lets users

> change

> > > this behavior.

> > >

> > > Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>

> >

> > wouldn't this be a spec violation?

> >

> >

> > When the device is reset, endpoints are not attached to any domain.

> >

> > If the VIRTIO_IOMMU_F_BYPASS feature is negotiated, all accesses from

> > unattached endpoints are allowed and translated by the IOMMU using the

> > identity function. If the feature is not negotiated, any memory access

> > from an unattached endpoint fails. Upon attaching an endpoint in

> > bypass mode to a new domain, any memory access from the endpoint fails,

> > since the domain does not contain any mapping.

> 

> Right, but the device behavior regarding BYPASS is specified as:

> 

>   If the driver does not accept the VIRTIO_IOMMU_F_BYPASS feature, the

>   device SHOULD NOT let endpoints access the guest-physical address space.

> 

> So I figured that the spec could be read as "before feature negotiation

> it's undefined whether the IOMMU is bypassed or not" and so a device

> implementation can choose either (previously discussed at [1]). But it's a

> stretch, we should clarify this.

> 

> Also, the OS might want to know whether DMA was bypassing the IOMMU

> before

> the device is initialized. If there are strong security requirements, then

> boot-bypass means the system was vulnerable to DMA attacks during boot.

> 

> So I'd like to add a new feature bit for this, VIRTIO_IOMMU_F_BOOT_BYPASS,

> that tells whether DMA bypasses the IOMMU before feature negotiation. It's

> only an indicator, accepting the feature doesn't change anything. I'll

> send the spec change shortly.

> 

> Thanks,

> Jean

> 

> [1] https://lore.kernel.org/qemu-

> devel/20200109104032.GA937123@myrica/

> 


I wonder why we didn't define the default behavior to bypass (to align with
other vIOMMUs) in the first place and then have an option (e.g. 
VIRTIO_IOMMU_F_NOBYPASS) to override in case a stricter policy is required. 
In concept when a IOMMU device is uninitialized or reset, it essentially means 
no protection in place thus a bypass behavior. 

Thanks
Kevin
Jean-Philippe Brucker Feb. 26, 2021, 1 p.m. UTC | #4
On Fri, Feb 26, 2021 at 08:11:41AM +0000, Tian, Kevin wrote:
> > From: Qemu-devel <qemu-devel-bounces+kevin.tian=intel.com@nongnu.org>

> > On Behalf Of Jean-Philippe Brucker

> > 

> > On Sun, Feb 21, 2021 at 06:45:18AM -0500, Michael S. Tsirkin wrote:

> > > On Thu, Feb 18, 2021 at 11:59:30AM +0100, Jean-Philippe Brucker wrote:

> > > > Currently the virtio-iommu device must be programmed before it allows

> > > > DMA from any PCI device. This can make the VM entirely unusable when

> > a

> > > > virtio-iommu driver isn't present, for example in a bootloader that

> > > > loads the OS from storage.

> > > >

> > > > Similarly to the other vIOMMU implementations, default to DMA

> > bypassing

> > > > the IOMMU during boot. Add a "boot-bypass" option that lets users

> > change

> > > > this behavior.

> > > >

> > > > Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>

> > >

> > > wouldn't this be a spec violation?

> > >

> > >

> > > When the device is reset, endpoints are not attached to any domain.

> > >

> > > If the VIRTIO_IOMMU_F_BYPASS feature is negotiated, all accesses from

> > > unattached endpoints are allowed and translated by the IOMMU using the

> > > identity function. If the feature is not negotiated, any memory access

> > > from an unattached endpoint fails. Upon attaching an endpoint in

> > > bypass mode to a new domain, any memory access from the endpoint fails,

> > > since the domain does not contain any mapping.

> > 

> > Right, but the device behavior regarding BYPASS is specified as:

> > 

> >   If the driver does not accept the VIRTIO_IOMMU_F_BYPASS feature, the

> >   device SHOULD NOT let endpoints access the guest-physical address space.

> > 

> > So I figured that the spec could be read as "before feature negotiation

> > it's undefined whether the IOMMU is bypassed or not" and so a device

> > implementation can choose either (previously discussed at [1]). But it's a

> > stretch, we should clarify this.

> > 

> > Also, the OS might want to know whether DMA was bypassing the IOMMU

> > before

> > the device is initialized. If there are strong security requirements, then

> > boot-bypass means the system was vulnerable to DMA attacks during boot.

> > 

> > So I'd like to add a new feature bit for this, VIRTIO_IOMMU_F_BOOT_BYPASS,

> > that tells whether DMA bypasses the IOMMU before feature negotiation. It's

> > only an indicator, accepting the feature doesn't change anything. I'll

> > send the spec change shortly.

> > 

> > Thanks,

> > Jean

> > 

> > [1] https://lore.kernel.org/qemu-

> > devel/20200109104032.GA937123@myrica/

> > 

> 

> I wonder why we didn't define the default behavior to bypass (to align with

> other vIOMMUs) in the first place and then have an option (e.g. 

> VIRTIO_IOMMU_F_NOBYPASS) to override in case a stricter policy is required. 


Yes in hindsight the polarity should probably have been reversed. Clearly
I could have put more thoughts into this. When writing the spec it seemed
natural in terms of security to disallow accesses until software
explicitly enables bypass.

In the linked discussion, which happened after the initial spec was
accepted, we concluded that QEMU should be boot-bypass by default and the
spec ought to be clarified to explicitly allow this, I just didn't take
time to work on it until now.

> In concept when a IOMMU device is uninitialized or reset, it essentially means 

> no protection in place thus a bypass behavior. 


Not necessarily, the SMMU leaves that choice to the implementation (but it
does have the right polarity 0 == bypass). The SMMU_GBPA register defines
the bypass behavior. Implementations can tie it to 0 or 1 to define the
default behavior of abort or bypass. Then software sets it to 0 or 1 to
select the runtime policy. Ideally I'd like a similar mechanism for
virtio-iommu, because it's sticky across reset - no vulnerability window
when firmware hands over the IOMMU to the OS.

Thanks,
Jean
Tian, Kevin March 4, 2021, 7:59 a.m. UTC | #5
> From: Jean-Philippe Brucker <jean-philippe@linaro.org>

> Sent: Friday, February 26, 2021 9:01 PM

> 

> On Fri, Feb 26, 2021 at 08:11:41AM +0000, Tian, Kevin wrote:

> > > From: Qemu-devel <qemu-devel-

> bounces+kevin.tian=intel.com@nongnu.org>

> > > On Behalf Of Jean-Philippe Brucker

> > >

> > > On Sun, Feb 21, 2021 at 06:45:18AM -0500, Michael S. Tsirkin wrote:

> > > > On Thu, Feb 18, 2021 at 11:59:30AM +0100, Jean-Philippe Brucker wrote:

> > > > > Currently the virtio-iommu device must be programmed before it

> allows

> > > > > DMA from any PCI device. This can make the VM entirely unusable

> when

> > > a

> > > > > virtio-iommu driver isn't present, for example in a bootloader that

> > > > > loads the OS from storage.

> > > > >

> > > > > Similarly to the other vIOMMU implementations, default to DMA

> > > bypassing

> > > > > the IOMMU during boot. Add a "boot-bypass" option that lets users

> > > change

> > > > > this behavior.

> > > > >

> > > > > Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>

> > > >

> > > > wouldn't this be a spec violation?

> > > >

> > > >

> > > > When the device is reset, endpoints are not attached to any domain.

> > > >

> > > > If the VIRTIO_IOMMU_F_BYPASS feature is negotiated, all accesses

> from

> > > > unattached endpoints are allowed and translated by the IOMMU using

> the

> > > > identity function. If the feature is not negotiated, any memory access

> > > > from an unattached endpoint fails. Upon attaching an endpoint in

> > > > bypass mode to a new domain, any memory access from the endpoint

> fails,

> > > > since the domain does not contain any mapping.

> > >

> > > Right, but the device behavior regarding BYPASS is specified as:

> > >

> > >   If the driver does not accept the VIRTIO_IOMMU_F_BYPASS feature, the

> > >   device SHOULD NOT let endpoints access the guest-physical address

> space.

> > >

> > > So I figured that the spec could be read as "before feature negotiation

> > > it's undefined whether the IOMMU is bypassed or not" and so a device

> > > implementation can choose either (previously discussed at [1]). But it's a

> > > stretch, we should clarify this.

> > >

> > > Also, the OS might want to know whether DMA was bypassing the

> IOMMU

> > > before

> > > the device is initialized. If there are strong security requirements, then

> > > boot-bypass means the system was vulnerable to DMA attacks during

> boot.

> > >

> > > So I'd like to add a new feature bit for this,

> VIRTIO_IOMMU_F_BOOT_BYPASS,

> > > that tells whether DMA bypasses the IOMMU before feature negotiation.

> It's

> > > only an indicator, accepting the feature doesn't change anything. I'll

> > > send the spec change shortly.

> > >

> > > Thanks,

> > > Jean

> > >

> > > [1] https://lore.kernel.org/qemu-

> > > devel/20200109104032.GA937123@myrica/

> > >

> >

> > I wonder why we didn't define the default behavior to bypass (to align with

> > other vIOMMUs) in the first place and then have an option (e.g.

> > VIRTIO_IOMMU_F_NOBYPASS) to override in case a stricter policy is

> required.

> 

> Yes in hindsight the polarity should probably have been reversed. Clearly

> I could have put more thoughts into this. When writing the spec it seemed

> natural in terms of security to disallow accesses until software

> explicitly enables bypass.

> 

> In the linked discussion, which happened after the initial spec was

> accepted, we concluded that QEMU should be boot-bypass by default and

> the

> spec ought to be clarified to explicitly allow this, I just didn't take

> time to work on it until now.

> 

> > In concept when a IOMMU device is uninitialized or reset, it essentially

> means

> > no protection in place thus a bypass behavior.

> 

> Not necessarily, the SMMU leaves that choice to the implementation (but it

> does have the right polarity 0 == bypass). The SMMU_GBPA register defines

> the bypass behavior. Implementations can tie it to 0 or 1 to define the

> default behavior of abort or bypass. Then software sets it to 0 or 1 to

> select the runtime policy. Ideally I'd like a similar mechanism for

> virtio-iommu, because it's sticky across reset - no vulnerability window

> when firmware hands over the IOMMU to the OS.

> 


Thanks. Now I see the background of this patch. 😊
diff mbox series

Patch

diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h
index 273e35c04bc..4c66989ca4e 100644
--- a/include/hw/virtio/virtio-iommu.h
+++ b/include/hw/virtio/virtio-iommu.h
@@ -58,6 +58,7 @@  struct VirtIOIOMMU {
     GTree *domains;
     QemuMutex mutex;
     GTree *endpoints;
+    bool boot_bypass;
 };
 
 #endif
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index c2883a2f6c8..cd08dc39eca 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -689,6 +689,25 @@  static void virtio_iommu_report_fault(VirtIOIOMMU *viommu, uint8_t reason,
 
 }
 
+static bool virtio_iommu_bypass_is_allowed(VirtIOIOMMU *s)
+{
+    VirtIODevice *vdev = &s->parent_obj;
+
+    /*
+     * Allow bypass if:
+     * - boot_bypass is enabled and the BYPASS feature hasn't yet been
+     *   acknowledged.
+     * - the BYPASS feature has been negotiated.
+     */
+    if (s->boot_bypass && !(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK)) {
+        return true;
+    }
+    if (virtio_vdev_has_feature(vdev, VIRTIO_IOMMU_F_BYPASS)) {
+        return true;
+    }
+    return false;
+}
+
 static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr,
                                             IOMMUAccessFlags flag,
                                             int iommu_idx)
@@ -715,8 +734,7 @@  static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr,
         .perm = IOMMU_NONE,
     };
 
-    bypass_allowed = virtio_vdev_has_feature(&s->parent_obj,
-                                             VIRTIO_IOMMU_F_BYPASS);
+    bypass_allowed = virtio_iommu_bypass_is_allowed(s);
 
     sid = virtio_iommu_get_bdf(sdev);
 
@@ -1156,6 +1174,7 @@  static const VMStateDescription vmstate_virtio_iommu = {
 
 static Property virtio_iommu_properties[] = {
     DEFINE_PROP_LINK("primary-bus", VirtIOIOMMU, primary_bus, "PCI", PCIBus *),
+    DEFINE_PROP_BOOL("boot-bypass", VirtIOIOMMU, boot_bypass, true),
     DEFINE_PROP_END_OF_LIST(),
 };