diff mbox series

[v12,08/18] KVM: guest_memfd: Allow host to map guest_memfd pages

Message ID 20250611133330.1514028-9-tabba@google.com
State New
Headers show
Series KVM: Mapping guest_memfd backed memory at the host for software protected VMs | expand

Commit Message

Fuad Tabba June 11, 2025, 1:33 p.m. UTC
This patch enables support for shared memory in guest_memfd, including
mapping that memory from host userspace.

This functionality is gated by the KVM_GMEM_SHARED_MEM Kconfig option,
and enabled for a given instance by the GUEST_MEMFD_FLAG_SUPPORT_SHARED
flag at creation time.

Reviewed-by: Gavin Shan <gshan@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
Co-developed-by: Ackerley Tng <ackerleytng@google.com>
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
---
 include/linux/kvm_host.h | 13 +++++++
 include/uapi/linux/kvm.h |  1 +
 virt/kvm/Kconfig         |  4 +++
 virt/kvm/guest_memfd.c   | 73 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 91 insertions(+)

Comments

Shivank Garg June 12, 2025, 4:16 p.m. UTC | #1
On 6/11/2025 7:03 PM, Fuad Tabba wrote:
> This patch enables support for shared memory in guest_memfd, including
> mapping that memory from host userspace.
> 
> This functionality is gated by the KVM_GMEM_SHARED_MEM Kconfig option,
> and enabled for a given instance by the GUEST_MEMFD_FLAG_SUPPORT_SHARED
> flag at creation time.
> 
> Reviewed-by: Gavin Shan <gshan@redhat.com>
> Acked-by: David Hildenbrand <david@redhat.com>
> Co-developed-by: Ackerley Tng <ackerleytng@google.com>
> Signed-off-by: Ackerley Tng <ackerleytng@google.com>
> Signed-off-by: Fuad Tabba <tabba@google.com>
> ---
>  include/linux/kvm_host.h | 13 +++++++
>  include/uapi/linux/kvm.h |  1 +
>  virt/kvm/Kconfig         |  4 +++
>  virt/kvm/guest_memfd.c   | 73 ++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 91 insertions(+)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 9a6712151a74..6b63556ca150 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -729,6 +729,19 @@ static inline bool kvm_arch_supports_gmem(struct kvm *kvm)
>  }
>  #endif
>  
> +/*
> + * Returns true if this VM supports shared mem in guest_memfd.
> + *
> + * Arch code must define kvm_arch_supports_gmem_shared_mem if support for
> + * guest_memfd is enabled.
> + */
> +#if !defined(kvm_arch_supports_gmem_shared_mem)
> +static inline bool kvm_arch_supports_gmem_shared_mem(struct kvm *kvm)
> +{
> +	return false;
> +}
> +#endif
> +
>  #ifndef kvm_arch_has_readonly_mem
>  static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
>  {
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index d00b85cb168c..cb19150fd595 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1570,6 +1570,7 @@ struct kvm_memory_attributes {
>  #define KVM_MEMORY_ATTRIBUTE_PRIVATE           (1ULL << 3)
>  
>  #define KVM_CREATE_GUEST_MEMFD	_IOWR(KVMIO,  0xd4, struct kvm_create_guest_memfd)
> +#define GUEST_MEMFD_FLAG_SUPPORT_SHARED	(1ULL << 0)
>  
>  struct kvm_create_guest_memfd {
>  	__u64 size;
> diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
> index 559c93ad90be..e90884f74404 100644
> --- a/virt/kvm/Kconfig
> +++ b/virt/kvm/Kconfig
> @@ -128,3 +128,7 @@ config HAVE_KVM_ARCH_GMEM_PREPARE
>  config HAVE_KVM_ARCH_GMEM_INVALIDATE
>         bool
>         depends on KVM_GMEM
> +
> +config KVM_GMEM_SHARED_MEM
> +       select KVM_GMEM
> +       bool
> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> index 6db515833f61..06616b6b493b 100644
> --- a/virt/kvm/guest_memfd.c
> +++ b/virt/kvm/guest_memfd.c
> @@ -312,7 +312,77 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
>  	return gfn - slot->base_gfn + slot->gmem.pgoff;
>  }
>  
> +static bool kvm_gmem_supports_shared(struct inode *inode)
> +{
> +	const u64 flags = (u64)inode->i_private;
> +
> +	if (!IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM))
> +		return false;
> +
> +	return flags & GUEST_MEMFD_FLAG_SUPPORT_SHARED;
> +}
> +
> +static vm_fault_t kvm_gmem_fault_shared(struct vm_fault *vmf)
> +{
> +	struct inode *inode = file_inode(vmf->vma->vm_file);
> +	struct folio *folio;
> +	vm_fault_t ret = VM_FAULT_LOCKED;
> +
> +	if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
> +		return VM_FAULT_SIGBUS;
> +
> +	folio = kvm_gmem_get_folio(inode, vmf->pgoff);
> +	if (IS_ERR(folio)) {
> +		int err = PTR_ERR(folio);
> +
> +		if (err == -EAGAIN)
> +			return VM_FAULT_RETRY;
> +
> +		return vmf_error(err);
> +	}
> +
> +	if (WARN_ON_ONCE(folio_test_large(folio))) {
> +		ret = VM_FAULT_SIGBUS;
> +		goto out_folio;
> +	}
> +
> +	if (!folio_test_uptodate(folio)) {
> +		clear_highpage(folio_page(folio, 0));
> +		kvm_gmem_mark_prepared(folio);
> +	}
> +
> +	vmf->page = folio_file_page(folio, vmf->pgoff);
> +
> +out_folio:
> +	if (ret != VM_FAULT_LOCKED) {
> +		folio_unlock(folio);
> +		folio_put(folio);
> +	}
> +
> +	return ret;
> +}
> +
> +static const struct vm_operations_struct kvm_gmem_vm_ops = {
> +	.fault = kvm_gmem_fault_shared,
> +};
> +
> +static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma)
> +{
> +	if (!kvm_gmem_supports_shared(file_inode(file)))
> +		return -ENODEV;
> +
> +	if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) !=
> +	    (VM_SHARED | VM_MAYSHARE)) {
> +		return -EINVAL;
> +	}
> +
> +	vma->vm_ops = &kvm_gmem_vm_ops;
> +
> +	return 0;
> +}
> +
>  static struct file_operations kvm_gmem_fops = {
> +	.mmap		= kvm_gmem_mmap,
>  	.open		= generic_file_open,
>  	.release	= kvm_gmem_release,
>  	.fallocate	= kvm_gmem_fallocate,
> @@ -463,6 +533,9 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args)
>  	u64 flags = args->flags;
>  	u64 valid_flags = 0;
>  
> +	if (kvm_arch_supports_gmem_shared_mem(kvm))
> +		valid_flags |= GUEST_MEMFD_FLAG_SUPPORT_SHARED;
> +
>  	if (flags & ~valid_flags)
>  		return -EINVAL;
>  

LGTM!

Reviewed-by: Shivank Garg <shivankg@amd.com>

Thanks,
Shivank
Fuad Tabba June 16, 2025, 6:52 a.m. UTC | #2
Hi Sean,

On Fri, 13 Jun 2025 at 22:03, Sean Christopherson <seanjc@google.com> wrote:
>
> On Wed, Jun 11, 2025, Fuad Tabba wrote:
> > This patch enables support for shared memory in guest_memfd, including
>
> Please don't lead with with "This patch", simply state what changes are being
> made as a command.

Ack.

> > mapping that memory from host userspace.
>
> > This functionality is gated by the KVM_GMEM_SHARED_MEM Kconfig option,
> > and enabled for a given instance by the GUEST_MEMFD_FLAG_SUPPORT_SHARED
> > flag at creation time.
>
> Why?  I can see that from the patch.

It's in the patch series, not this patch. Would it help if I rephrase
it along the lines of:

This functionality isn't enabled until the introduction of the
KVM_GMEM_SHARED_MEM Kconfig option, and enabled for a given instance
by the GUEST_MEMFD_FLAG_SUPPORT_SHARED flag at creation time. Both of
which are introduced in a subsequent patch.

> This changelog is way, way, waaay too light on details.  Sorry for jumping in at
> the 11th hour, but we've spent what, 2 years working on this?

I'll expand this. Just to make sure that I include the right details,
are you looking for implementation details, motivation, use cases?

> > Reviewed-by: Gavin Shan <gshan@redhat.com>
> > Acked-by: David Hildenbrand <david@redhat.com>
> > Co-developed-by: Ackerley Tng <ackerleytng@google.com>
> > Signed-off-by: Ackerley Tng <ackerleytng@google.com>
> > Signed-off-by: Fuad Tabba <tabba@google.com>
> > ---
> > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > index d00b85cb168c..cb19150fd595 100644
> > --- a/include/uapi/linux/kvm.h
> > +++ b/include/uapi/linux/kvm.h
> > @@ -1570,6 +1570,7 @@ struct kvm_memory_attributes {
> >  #define KVM_MEMORY_ATTRIBUTE_PRIVATE           (1ULL << 3)
> >
> >  #define KVM_CREATE_GUEST_MEMFD       _IOWR(KVMIO,  0xd4, struct kvm_create_guest_memfd)
> > +#define GUEST_MEMFD_FLAG_SUPPORT_SHARED      (1ULL << 0)
>
> I find the SUPPORT_SHARED terminology to be super confusing.  I had to dig quite
> deep to undesrtand that "support shared" actually mean "userspace explicitly
> enable sharing on _this_ guest_memfd instance".  E.g. I was surprised to see
>
> IMO, GUEST_MEMFD_FLAG_SHAREABLE would be more appropriate.  But even that is
> weird to me.  For non-CoCo VMs, there is no concept of shared vs. private.  What's
> novel and notable is that the memory is _mappable_.  Yeah, yeah, pKVM's use case
> is to share memory, but that's a _use case_, not the property of guest_memfd that
> is being controlled by userspace.
>
> And kvm_gmem_memslot_supports_shared() is even worse.  It's simply that the
> memslot is bound to a mappable guest_memfd instance, it's that the guest_memfd
> instance is the _only_ entry point to the memslot.
>
> So my vote would be "GUEST_MEMFD_FLAG_MAPPABLE", and then something like
> KVM_MEMSLOT_GUEST_MEMFD_ONLY.  That will make code like this:
>
>         if (kvm_slot_has_gmem(slot) &&
>             (kvm_gmem_memslot_supports_shared(slot) ||
>              kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE)) {
>                 return kvm_gmem_max_mapping_level(slot, gfn, max_level);
>         }
>
> much more intutive:
>
>         if (kvm_is_memslot_gmem_only(slot) ||
>             kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE))
>                 return kvm_gmem_max_mapping_level(slot, gfn, max_level);
>
> And then have kvm_gmem_mapping_order() do:
>
>         WARN_ON_ONCE(!kvm_slot_has_gmem(slot));
>         return 0;

I have no preference really. To me this was intuitive, but I guess I
have been staring at this way too long. If you and all the
stakeholders are happy with your suggested changes, then I am happy
making them :)


> >  struct kvm_create_guest_memfd {
> >       __u64 size;
> > diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
> > index 559c93ad90be..e90884f74404 100644
> > --- a/virt/kvm/Kconfig
> > +++ b/virt/kvm/Kconfig
> > @@ -128,3 +128,7 @@ config HAVE_KVM_ARCH_GMEM_PREPARE
> >  config HAVE_KVM_ARCH_GMEM_INVALIDATE
> >         bool
> >         depends on KVM_GMEM
> > +
> > +config KVM_GMEM_SHARED_MEM
> > +       select KVM_GMEM
> > +       bool
> > diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> > index 6db515833f61..06616b6b493b 100644
> > --- a/virt/kvm/guest_memfd.c
> > +++ b/virt/kvm/guest_memfd.c
> > @@ -312,7 +312,77 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
> >       return gfn - slot->base_gfn + slot->gmem.pgoff;
> >  }
> >
> > +static bool kvm_gmem_supports_shared(struct inode *inode)
> > +{
> > +     const u64 flags = (u64)inode->i_private;
> > +
> > +     if (!IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM))
> > +             return false;
> > +
> > +     return flags & GUEST_MEMFD_FLAG_SUPPORT_SHARED;
> > +}
> > +
> > +static vm_fault_t kvm_gmem_fault_shared(struct vm_fault *vmf)
>
> And to my point about "shared", this is also very confusing, because there are
> zero checks in here about shared vs. private.

As you noted in a later email, it was you who suggested this name, but
like I said, I am happy to change it.

> > +{
> > +     struct inode *inode = file_inode(vmf->vma->vm_file);
> > +     struct folio *folio;
> > +     vm_fault_t ret = VM_FAULT_LOCKED;
> > +
> > +     if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
> > +             return VM_FAULT_SIGBUS;
> > +
> > +     folio = kvm_gmem_get_folio(inode, vmf->pgoff);
> > +     if (IS_ERR(folio)) {
> > +             int err = PTR_ERR(folio);
> > +
> > +             if (err == -EAGAIN)
> > +                     return VM_FAULT_RETRY;
> > +
> > +             return vmf_error(err);
> > +     }
> > +
> > +     if (WARN_ON_ONCE(folio_test_large(folio))) {
> > +             ret = VM_FAULT_SIGBUS;
> > +             goto out_folio;
> > +     }
> > +
> > +     if (!folio_test_uptodate(folio)) {
> > +             clear_highpage(folio_page(folio, 0));
> > +             kvm_gmem_mark_prepared(folio);
> > +     }
> > +
> > +     vmf->page = folio_file_page(folio, vmf->pgoff);
> > +
> > +out_folio:
> > +     if (ret != VM_FAULT_LOCKED) {
> > +             folio_unlock(folio);
> > +             folio_put(folio);
> > +     }
> > +
> > +     return ret;
> > +}
> > +
> > +static const struct vm_operations_struct kvm_gmem_vm_ops = {
> > +     .fault = kvm_gmem_fault_shared,
> > +};
> > +
> > +static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma)
> > +{
> > +     if (!kvm_gmem_supports_shared(file_inode(file)))
> > +             return -ENODEV;
> > +
> > +     if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) !=
> > +         (VM_SHARED | VM_MAYSHARE)) {
>
> And the SHARED terminology gets really confusing here, due to colliding with the
> existing notion of SHARED file mappings.

Ack.

Before I respin, let's make sure we're all on the same page in terms
of terminology. Hopefully David can chime in again now that he's had
the weekend to ponder over the latest exchange :)

Thanks,
/fuad

> > +             return -EINVAL;
> > +     }
> > +
> > +     vma->vm_ops = &kvm_gmem_vm_ops;
> > +
> > +     return 0;
> > +}
> > +
> >  static struct file_operations kvm_gmem_fops = {
> > +     .mmap           = kvm_gmem_mmap,
> >       .open           = generic_file_open,
> >       .release        = kvm_gmem_release,
> >       .fallocate      = kvm_gmem_fallocate,
> > @@ -463,6 +533,9 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args)
> >       u64 flags = args->flags;
> >       u64 valid_flags = 0;
> >
> > +     if (kvm_arch_supports_gmem_shared_mem(kvm))
> > +             valid_flags |= GUEST_MEMFD_FLAG_SUPPORT_SHARED;
> > +
> >       if (flags & ~valid_flags)
> >               return -EINVAL;
> >
> > --
> > 2.50.0.rc0.642.g800a2b2222-goog
> >
Ira Weiny June 16, 2025, 1:44 p.m. UTC | #3
Sean Christopherson wrote:
> On Wed, Jun 11, 2025, Fuad Tabba wrote:
> > This patch enables support for shared memory in guest_memfd, including
> 
> Please don't lead with with "This patch", simply state what changes are being
> made as a command.
> 
> > mapping that memory from host userspace.
> 
> > This functionality is gated by the KVM_GMEM_SHARED_MEM Kconfig option,
> > and enabled for a given instance by the GUEST_MEMFD_FLAG_SUPPORT_SHARED
> > flag at creation time.
> 
> Why?  I can see that from the patch.
> 
> This changelog is way, way, waaay too light on details.  Sorry for jumping in at
> the 11th hour, but we've spent what, 2 years working on this? 
> 
> > Reviewed-by: Gavin Shan <gshan@redhat.com>
> > Acked-by: David Hildenbrand <david@redhat.com>
> > Co-developed-by: Ackerley Tng <ackerleytng@google.com>
> > Signed-off-by: Ackerley Tng <ackerleytng@google.com>
> > Signed-off-by: Fuad Tabba <tabba@google.com>
> > ---
> > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > index d00b85cb168c..cb19150fd595 100644
> > --- a/include/uapi/linux/kvm.h
> > +++ b/include/uapi/linux/kvm.h
> > @@ -1570,6 +1570,7 @@ struct kvm_memory_attributes {
> >  #define KVM_MEMORY_ATTRIBUTE_PRIVATE           (1ULL << 3)
> >  
> >  #define KVM_CREATE_GUEST_MEMFD	_IOWR(KVMIO,  0xd4, struct kvm_create_guest_memfd)
> > +#define GUEST_MEMFD_FLAG_SUPPORT_SHARED	(1ULL << 0)
> 
> I find the SUPPORT_SHARED terminology to be super confusing.  I had to dig quite
> deep to undesrtand that "support shared" actually mean "userspace explicitly
> enable sharing on _this_ guest_memfd instance".  E.g. I was surprised to see
> 
> IMO, GUEST_MEMFD_FLAG_SHAREABLE would be more appropriate.  But even that is
> weird to me.  For non-CoCo VMs, there is no concept of shared vs. private.  What's
> novel and notable is that the memory is _mappable_.  Yeah, yeah, pKVM's use case
> is to share memory, but that's a _use case_, not the property of guest_memfd that
> is being controlled by userspace.
> 
> And kvm_gmem_memslot_supports_shared() is even worse.  It's simply that the
> memslot is bound to a mappable guest_memfd instance, it's that the guest_memfd
> instance is the _only_ entry point to the memslot.
> 
> So my vote would be "GUEST_MEMFD_FLAG_MAPPABLE", and then something like

If we are going to change this; FLAG_MAPPABLE is not clear to me either.
The guest can map private memory, right?  I see your point about shared
being overloaded with file shared but it would not be the first time a
term is overloaded.  kvm_slot_has_gmem() does makes a lot of sense.

If it is going to change; how about GUEST_MEMFD_FLAG_USER_MAPPABLE?

Ira

> KVM_MEMSLOT_GUEST_MEMFD_ONLY.  That will make code like this:
> 
> 	if (kvm_slot_has_gmem(slot) &&
> 	    (kvm_gmem_memslot_supports_shared(slot) ||
> 	     kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE)) {
> 		return kvm_gmem_max_mapping_level(slot, gfn, max_level);
> 	}
> 
> much more intutive:
> 
> 	if (kvm_is_memslot_gmem_only(slot) ||
> 	    kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE))
> 		return kvm_gmem_max_mapping_level(slot, gfn, max_level);
> 
> And then have kvm_gmem_mapping_order() do:
> 
> 	WARN_ON_ONCE(!kvm_slot_has_gmem(slot));
> 	return 0;
> 
> >  struct kvm_create_guest_memfd {
> >  	__u64 size;
> > diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
> > index 559c93ad90be..e90884f74404 100644
> > --- a/virt/kvm/Kconfig
> > +++ b/virt/kvm/Kconfig
> > @@ -128,3 +128,7 @@ config HAVE_KVM_ARCH_GMEM_PREPARE
> >  config HAVE_KVM_ARCH_GMEM_INVALIDATE
> >         bool
> >         depends on KVM_GMEM
> > +
> > +config KVM_GMEM_SHARED_MEM
> > +       select KVM_GMEM
> > +       bool
> > diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> > index 6db515833f61..06616b6b493b 100644
> > --- a/virt/kvm/guest_memfd.c
> > +++ b/virt/kvm/guest_memfd.c
> > @@ -312,7 +312,77 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
> >  	return gfn - slot->base_gfn + slot->gmem.pgoff;
> >  }
> >  
> > +static bool kvm_gmem_supports_shared(struct inode *inode)
> > +{
> > +	const u64 flags = (u64)inode->i_private;
> > +
> > +	if (!IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM))
> > +		return false;
> > +
> > +	return flags & GUEST_MEMFD_FLAG_SUPPORT_SHARED;
> > +}
> > +
> > +static vm_fault_t kvm_gmem_fault_shared(struct vm_fault *vmf)
> 
> And to my point about "shared", this is also very confusing, because there are
> zero checks in here about shared vs. private.
> 
> > +{
> > +	struct inode *inode = file_inode(vmf->vma->vm_file);
> > +	struct folio *folio;
> > +	vm_fault_t ret = VM_FAULT_LOCKED;
> > +
> > +	if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
> > +		return VM_FAULT_SIGBUS;
> > +
> > +	folio = kvm_gmem_get_folio(inode, vmf->pgoff);
> > +	if (IS_ERR(folio)) {
> > +		int err = PTR_ERR(folio);
> > +
> > +		if (err == -EAGAIN)
> > +			return VM_FAULT_RETRY;
> > +
> > +		return vmf_error(err);
> > +	}
> > +
> > +	if (WARN_ON_ONCE(folio_test_large(folio))) {
> > +		ret = VM_FAULT_SIGBUS;
> > +		goto out_folio;
> > +	}
> > +
> > +	if (!folio_test_uptodate(folio)) {
> > +		clear_highpage(folio_page(folio, 0));
> > +		kvm_gmem_mark_prepared(folio);
> > +	}
> > +
> > +	vmf->page = folio_file_page(folio, vmf->pgoff);
> > +
> > +out_folio:
> > +	if (ret != VM_FAULT_LOCKED) {
> > +		folio_unlock(folio);
> > +		folio_put(folio);
> > +	}
> > +
> > +	return ret;
> > +}
> > +
> > +static const struct vm_operations_struct kvm_gmem_vm_ops = {
> > +	.fault = kvm_gmem_fault_shared,
> > +};
> > +
> > +static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma)
> > +{
> > +	if (!kvm_gmem_supports_shared(file_inode(file)))
> > +		return -ENODEV;
> > +
> > +	if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) !=
> > +	    (VM_SHARED | VM_MAYSHARE)) {
> 
> And the SHARED terminology gets really confusing here, due to colliding with the
> existing notion of SHARED file mappings.
> 
> > +		return -EINVAL;
> > +	}
> > +
> > +	vma->vm_ops = &kvm_gmem_vm_ops;
> > +
> > +	return 0;
> > +}
> > +
> >  static struct file_operations kvm_gmem_fops = {
> > +	.mmap		= kvm_gmem_mmap,
> >  	.open		= generic_file_open,
> >  	.release	= kvm_gmem_release,
> >  	.fallocate	= kvm_gmem_fallocate,
> > @@ -463,6 +533,9 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args)
> >  	u64 flags = args->flags;
> >  	u64 valid_flags = 0;
> >  
> > +	if (kvm_arch_supports_gmem_shared_mem(kvm))
> > +		valid_flags |= GUEST_MEMFD_FLAG_SUPPORT_SHARED;
> > +
> >  	if (flags & ~valid_flags)
> >  		return -EINVAL;
> >  
> > -- 
> > 2.50.0.rc0.642.g800a2b2222-goog
> >
Fuad Tabba June 16, 2025, 2:16 p.m. UTC | #4
On Mon, 16 Jun 2025 at 15:03, David Hildenbrand <david@redhat.com> wrote:
>
> On 16.06.25 15:44, Ira Weiny wrote:
> > Sean Christopherson wrote:
> >> On Wed, Jun 11, 2025, Fuad Tabba wrote:
> >>> This patch enables support for shared memory in guest_memfd, including
> >>
> >> Please don't lead with with "This patch", simply state what changes are being
> >> made as a command.
> >>
> >>> mapping that memory from host userspace.
> >>
> >>> This functionality is gated by the KVM_GMEM_SHARED_MEM Kconfig option,
> >>> and enabled for a given instance by the GUEST_MEMFD_FLAG_SUPPORT_SHARED
> >>> flag at creation time.
> >>
> >> Why?  I can see that from the patch.
> >>
> >> This changelog is way, way, waaay too light on details.  Sorry for jumping in at
> >> the 11th hour, but we've spent what, 2 years working on this?
> >>
> >>> Reviewed-by: Gavin Shan <gshan@redhat.com>
> >>> Acked-by: David Hildenbrand <david@redhat.com>
> >>> Co-developed-by: Ackerley Tng <ackerleytng@google.com>
> >>> Signed-off-by: Ackerley Tng <ackerleytng@google.com>
> >>> Signed-off-by: Fuad Tabba <tabba@google.com>
> >>> ---
> >>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> >>> index d00b85cb168c..cb19150fd595 100644
> >>> --- a/include/uapi/linux/kvm.h
> >>> +++ b/include/uapi/linux/kvm.h
> >>> @@ -1570,6 +1570,7 @@ struct kvm_memory_attributes {
> >>>   #define KVM_MEMORY_ATTRIBUTE_PRIVATE           (1ULL << 3)
> >>>
> >>>   #define KVM_CREATE_GUEST_MEMFD    _IOWR(KVMIO,  0xd4, struct kvm_create_guest_memfd)
> >>> +#define GUEST_MEMFD_FLAG_SUPPORT_SHARED    (1ULL << 0)
> >>
> >> I find the SUPPORT_SHARED terminology to be super confusing.  I had to dig quite
> >> deep to undesrtand that "support shared" actually mean "userspace explicitly
> >> enable sharing on _this_ guest_memfd instance".  E.g. I was surprised to see
> >>
> >> IMO, GUEST_MEMFD_FLAG_SHAREABLE would be more appropriate.  But even that is
> >> weird to me.  For non-CoCo VMs, there is no concept of shared vs. private.  What's
> >> novel and notable is that the memory is _mappable_.  Yeah, yeah, pKVM's use case
> >> is to share memory, but that's a _use case_, not the property of guest_memfd that
> >> is being controlled by userspace.
> >>
> >> And kvm_gmem_memslot_supports_shared() is even worse.  It's simply that the
> >> memslot is bound to a mappable guest_memfd instance, it's that the guest_memfd
> >> instance is the _only_ entry point to the memslot.
> >>
> >> So my vote would be "GUEST_MEMFD_FLAG_MAPPABLE", and then something like
> >
> > If we are going to change this; FLAG_MAPPABLE is not clear to me either.
> > The guest can map private memory, right?  I see your point about shared
> > being overloaded with file shared but it would not be the first time a
> > term is overloaded.  kvm_slot_has_gmem() does makes a lot of sense.
> >
> > If it is going to change; how about GUEST_MEMFD_FLAG_USER_MAPPABLE?
>
> If "shared" is not good enough terminology ...
>
> ... can we please just find a way to name what this "non-private" memory
> is called? That something is mappable into $whatever is not the right
> way to look at this IMHO. As raised in the past, we can easily support
> read()/write()/etc to this non-private memory.
>
>
> I'll note, the "non-private" memory in guest-memfd behaves just like ...
> the "shared" memory in shmem ... well, or like other memory in memfd.
> (which is based on mm/shmem.c).
>
> "Private" is also not the best way to describe the "protected\encrypted"
> memory, but that ship has sailed with KVM_MEMORY_ATTRIBUTE_PRIVATE.
>
> I'll further note that in the doc of KVM_SET_USER_MEMORY_REGION2 we talk
> about "private" vs "shared" memory ... so that would have to be improved
> as well.

To add to what David just wrote, V1 of this series used the term
"mappable" [1].  After a few discussions, I thought the consensus was
that "shared" was a more accurate description --- i.e., mappability
was a side effect of it being shared with the host.

One could argue that non-CoCo VMs have no concept of "shared" vs
"private". A different way of looking at it is, non-CoCo VMs have
their state as shared by default.

I don't have a strong opinion. What would be good if we could agree on
the terminology before I respin this.

Thanks,
/fuad


[1] https://lore.kernel.org/all/20250122152738.1173160-4-tabba@google.com/


> --
> Cheers,
>
> David / dhildenb
>
David Hildenbrand June 16, 2025, 2:25 p.m. UTC | #5
On 16.06.25 16:16, Fuad Tabba wrote:
> On Mon, 16 Jun 2025 at 15:03, David Hildenbrand <david@redhat.com> wrote:
>>
>> On 16.06.25 15:44, Ira Weiny wrote:
>>> Sean Christopherson wrote:
>>>> On Wed, Jun 11, 2025, Fuad Tabba wrote:
>>>>> This patch enables support for shared memory in guest_memfd, including
>>>>
>>>> Please don't lead with with "This patch", simply state what changes are being
>>>> made as a command.
>>>>
>>>>> mapping that memory from host userspace.
>>>>
>>>>> This functionality is gated by the KVM_GMEM_SHARED_MEM Kconfig option,
>>>>> and enabled for a given instance by the GUEST_MEMFD_FLAG_SUPPORT_SHARED
>>>>> flag at creation time.
>>>>
>>>> Why?  I can see that from the patch.
>>>>
>>>> This changelog is way, way, waaay too light on details.  Sorry for jumping in at
>>>> the 11th hour, but we've spent what, 2 years working on this?
>>>>
>>>>> Reviewed-by: Gavin Shan <gshan@redhat.com>
>>>>> Acked-by: David Hildenbrand <david@redhat.com>
>>>>> Co-developed-by: Ackerley Tng <ackerleytng@google.com>
>>>>> Signed-off-by: Ackerley Tng <ackerleytng@google.com>
>>>>> Signed-off-by: Fuad Tabba <tabba@google.com>
>>>>> ---
>>>>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>>>>> index d00b85cb168c..cb19150fd595 100644
>>>>> --- a/include/uapi/linux/kvm.h
>>>>> +++ b/include/uapi/linux/kvm.h
>>>>> @@ -1570,6 +1570,7 @@ struct kvm_memory_attributes {
>>>>>    #define KVM_MEMORY_ATTRIBUTE_PRIVATE           (1ULL << 3)
>>>>>
>>>>>    #define KVM_CREATE_GUEST_MEMFD    _IOWR(KVMIO,  0xd4, struct kvm_create_guest_memfd)
>>>>> +#define GUEST_MEMFD_FLAG_SUPPORT_SHARED    (1ULL << 0)
>>>>
>>>> I find the SUPPORT_SHARED terminology to be super confusing.  I had to dig quite
>>>> deep to undesrtand that "support shared" actually mean "userspace explicitly
>>>> enable sharing on _this_ guest_memfd instance".  E.g. I was surprised to see
>>>>
>>>> IMO, GUEST_MEMFD_FLAG_SHAREABLE would be more appropriate.  But even that is
>>>> weird to me.  For non-CoCo VMs, there is no concept of shared vs. private.  What's
>>>> novel and notable is that the memory is _mappable_.  Yeah, yeah, pKVM's use case
>>>> is to share memory, but that's a _use case_, not the property of guest_memfd that
>>>> is being controlled by userspace.
>>>>
>>>> And kvm_gmem_memslot_supports_shared() is even worse.  It's simply that the
>>>> memslot is bound to a mappable guest_memfd instance, it's that the guest_memfd
>>>> instance is the _only_ entry point to the memslot.
>>>>
>>>> So my vote would be "GUEST_MEMFD_FLAG_MAPPABLE", and then something like
>>>
>>> If we are going to change this; FLAG_MAPPABLE is not clear to me either.
>>> The guest can map private memory, right?  I see your point about shared
>>> being overloaded with file shared but it would not be the first time a
>>> term is overloaded.  kvm_slot_has_gmem() does makes a lot of sense.
>>>
>>> If it is going to change; how about GUEST_MEMFD_FLAG_USER_MAPPABLE?
>>
>> If "shared" is not good enough terminology ...
>>
>> ... can we please just find a way to name what this "non-private" memory
>> is called? That something is mappable into $whatever is not the right
>> way to look at this IMHO. As raised in the past, we can easily support
>> read()/write()/etc to this non-private memory.
>>
>>
>> I'll note, the "non-private" memory in guest-memfd behaves just like ...
>> the "shared" memory in shmem ... well, or like other memory in memfd.
>> (which is based on mm/shmem.c).
>>
>> "Private" is also not the best way to describe the "protected\encrypted"
>> memory, but that ship has sailed with KVM_MEMORY_ATTRIBUTE_PRIVATE.
>>
>> I'll further note that in the doc of KVM_SET_USER_MEMORY_REGION2 we talk
>> about "private" vs "shared" memory ... so that would have to be improved
>> as well.
> 
> To add to what David just wrote, V1 of this series used the term
> "mappable" [1].  After a few discussions, I thought the consensus was
> that "shared" was a more accurate description --- i.e., mappability
> was a side effect of it being shared with the host.
> 
> One could argue that non-CoCo VMs have no concept of "shared" vs
> "private". A different way of looking at it is, non-CoCo VMs have
> their state as shared by default.

All memory of these VMs behaves similar to other memory-based shared 
memory backends (memfd, shmem) in the system, yes. You can map it into 
multiple processes and use it like shmem/memfd.

I'm still thinking about another way to call non-private memory ... no 
success so far. "ordinary" or "generic" is .... not better.
diff mbox series

Patch

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9a6712151a74..6b63556ca150 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -729,6 +729,19 @@  static inline bool kvm_arch_supports_gmem(struct kvm *kvm)
 }
 #endif
 
+/*
+ * Returns true if this VM supports shared mem in guest_memfd.
+ *
+ * Arch code must define kvm_arch_supports_gmem_shared_mem if support for
+ * guest_memfd is enabled.
+ */
+#if !defined(kvm_arch_supports_gmem_shared_mem)
+static inline bool kvm_arch_supports_gmem_shared_mem(struct kvm *kvm)
+{
+	return false;
+}
+#endif
+
 #ifndef kvm_arch_has_readonly_mem
 static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
 {
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index d00b85cb168c..cb19150fd595 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1570,6 +1570,7 @@  struct kvm_memory_attributes {
 #define KVM_MEMORY_ATTRIBUTE_PRIVATE           (1ULL << 3)
 
 #define KVM_CREATE_GUEST_MEMFD	_IOWR(KVMIO,  0xd4, struct kvm_create_guest_memfd)
+#define GUEST_MEMFD_FLAG_SUPPORT_SHARED	(1ULL << 0)
 
 struct kvm_create_guest_memfd {
 	__u64 size;
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 559c93ad90be..e90884f74404 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -128,3 +128,7 @@  config HAVE_KVM_ARCH_GMEM_PREPARE
 config HAVE_KVM_ARCH_GMEM_INVALIDATE
        bool
        depends on KVM_GMEM
+
+config KVM_GMEM_SHARED_MEM
+       select KVM_GMEM
+       bool
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 6db515833f61..06616b6b493b 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -312,7 +312,77 @@  static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
 	return gfn - slot->base_gfn + slot->gmem.pgoff;
 }
 
+static bool kvm_gmem_supports_shared(struct inode *inode)
+{
+	const u64 flags = (u64)inode->i_private;
+
+	if (!IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM))
+		return false;
+
+	return flags & GUEST_MEMFD_FLAG_SUPPORT_SHARED;
+}
+
+static vm_fault_t kvm_gmem_fault_shared(struct vm_fault *vmf)
+{
+	struct inode *inode = file_inode(vmf->vma->vm_file);
+	struct folio *folio;
+	vm_fault_t ret = VM_FAULT_LOCKED;
+
+	if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
+		return VM_FAULT_SIGBUS;
+
+	folio = kvm_gmem_get_folio(inode, vmf->pgoff);
+	if (IS_ERR(folio)) {
+		int err = PTR_ERR(folio);
+
+		if (err == -EAGAIN)
+			return VM_FAULT_RETRY;
+
+		return vmf_error(err);
+	}
+
+	if (WARN_ON_ONCE(folio_test_large(folio))) {
+		ret = VM_FAULT_SIGBUS;
+		goto out_folio;
+	}
+
+	if (!folio_test_uptodate(folio)) {
+		clear_highpage(folio_page(folio, 0));
+		kvm_gmem_mark_prepared(folio);
+	}
+
+	vmf->page = folio_file_page(folio, vmf->pgoff);
+
+out_folio:
+	if (ret != VM_FAULT_LOCKED) {
+		folio_unlock(folio);
+		folio_put(folio);
+	}
+
+	return ret;
+}
+
+static const struct vm_operations_struct kvm_gmem_vm_ops = {
+	.fault = kvm_gmem_fault_shared,
+};
+
+static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!kvm_gmem_supports_shared(file_inode(file)))
+		return -ENODEV;
+
+	if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) !=
+	    (VM_SHARED | VM_MAYSHARE)) {
+		return -EINVAL;
+	}
+
+	vma->vm_ops = &kvm_gmem_vm_ops;
+
+	return 0;
+}
+
 static struct file_operations kvm_gmem_fops = {
+	.mmap		= kvm_gmem_mmap,
 	.open		= generic_file_open,
 	.release	= kvm_gmem_release,
 	.fallocate	= kvm_gmem_fallocate,
@@ -463,6 +533,9 @@  int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args)
 	u64 flags = args->flags;
 	u64 valid_flags = 0;
 
+	if (kvm_arch_supports_gmem_shared_mem(kvm))
+		valid_flags |= GUEST_MEMFD_FLAG_SUPPORT_SHARED;
+
 	if (flags & ~valid_flags)
 		return -EINVAL;