Message ID | 20250527180245.1413463-1-tabba@google.com |
---|---|
Headers | show |
Series | KVM: Mapping guest_memfd backed memory at the host for software protected VMs | expand |
On 5/27/2025 11:32 PM, Fuad Tabba wrote: > The option KVM_PRIVATE_MEM enables guest_memfd in general. Subsequent > patches add shared memory support to guest_memfd. Therefore, rename it > to KVM_GMEM to make its purpose clearer. > > Reviewed-by: Gavin Shan <gshan@redhat.com> > Reviewed-by: Ira Weiny <ira.weiny@intel.com> > Co-developed-by: David Hildenbrand <david@redhat.com> > Signed-off-by: David Hildenbrand <david@redhat.com> > Signed-off-by: Fuad Tabba <tabba@google.com> > --- > arch/x86/include/asm/kvm_host.h | 2 +- > include/linux/kvm_host.h | 10 +++++----- > virt/kvm/Kconfig | 8 ++++---- > virt/kvm/Makefile.kvm | 2 +- > virt/kvm/kvm_main.c | 4 ++-- > virt/kvm/kvm_mm.h | 4 ++-- > 6 files changed, 15 insertions(+), 15 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 7bc174a1f1cb..52f6f6d08558 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -2253,7 +2253,7 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, > int tdp_max_root_level, int tdp_huge_page_level); > > > -#ifdef CONFIG_KVM_PRIVATE_MEM > +#ifdef CONFIG_KVM_GMEM > #define kvm_arch_has_private_mem(kvm) ((kvm)->arch.has_private_mem) > #else > #define kvm_arch_has_private_mem(kvm) false > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index 291d49b9bf05..d6900995725d 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -601,7 +601,7 @@ struct kvm_memory_slot { > short id; > u16 as_id; > > -#ifdef CONFIG_KVM_PRIVATE_MEM > +#ifdef CONFIG_KVM_GMEM > struct { > /* > * Writes protected by kvm->slots_lock. Acquiring a > @@ -722,7 +722,7 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu) > * Arch code must define kvm_arch_has_private_mem if support for private memory > * is enabled. > */ > -#if !defined(kvm_arch_has_private_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM) > +#if !defined(kvm_arch_has_private_mem) && !IS_ENABLED(CONFIG_KVM_GMEM) > static inline bool kvm_arch_has_private_mem(struct kvm *kvm) > { > return false; > @@ -2504,7 +2504,7 @@ bool kvm_arch_post_set_memory_attributes(struct kvm *kvm, > > static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) > { > - return IS_ENABLED(CONFIG_KVM_PRIVATE_MEM) && > + return IS_ENABLED(CONFIG_KVM_GMEM) && > kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE; > } > #else > @@ -2514,7 +2514,7 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) > } > #endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */ > > -#ifdef CONFIG_KVM_PRIVATE_MEM > +#ifdef CONFIG_KVM_GMEM > int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, > gfn_t gfn, kvm_pfn_t *pfn, struct page **page, > int *max_order); > @@ -2527,7 +2527,7 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, > KVM_BUG_ON(1, kvm); > return -EIO; > } > -#endif /* CONFIG_KVM_PRIVATE_MEM */ > +#endif /* CONFIG_KVM_GMEM */ > > #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE > int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order); > diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig > index 727b542074e7..49df4e32bff7 100644 > --- a/virt/kvm/Kconfig > +++ b/virt/kvm/Kconfig > @@ -112,19 +112,19 @@ config KVM_GENERIC_MEMORY_ATTRIBUTES > depends on KVM_GENERIC_MMU_NOTIFIER > bool > > -config KVM_PRIVATE_MEM > +config KVM_GMEM > select XARRAY_MULTI > bool > > config KVM_GENERIC_PRIVATE_MEM > select KVM_GENERIC_MEMORY_ATTRIBUTES > - select KVM_PRIVATE_MEM > + select KVM_GMEM > bool > > config HAVE_KVM_ARCH_GMEM_PREPARE > bool > - depends on KVM_PRIVATE_MEM > + depends on KVM_GMEM > > config HAVE_KVM_ARCH_GMEM_INVALIDATE > bool > - depends on KVM_PRIVATE_MEM > + depends on KVM_GMEM > diff --git a/virt/kvm/Makefile.kvm b/virt/kvm/Makefile.kvm > index 724c89af78af..8d00918d4c8b 100644 > --- a/virt/kvm/Makefile.kvm > +++ b/virt/kvm/Makefile.kvm > @@ -12,4 +12,4 @@ kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o > kvm-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o > kvm-$(CONFIG_HAVE_KVM_DIRTY_RING) += $(KVM)/dirty_ring.o > kvm-$(CONFIG_HAVE_KVM_PFNCACHE) += $(KVM)/pfncache.o > -kvm-$(CONFIG_KVM_PRIVATE_MEM) += $(KVM)/guest_memfd.o > +kvm-$(CONFIG_KVM_GMEM) += $(KVM)/guest_memfd.o > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index e85b33a92624..4996cac41a8f 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -4842,7 +4842,7 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) > case KVM_CAP_MEMORY_ATTRIBUTES: > return kvm_supported_mem_attributes(kvm); > #endif > -#ifdef CONFIG_KVM_PRIVATE_MEM > +#ifdef CONFIG_KVM_GMEM > case KVM_CAP_GUEST_MEMFD: > return !kvm || kvm_arch_has_private_mem(kvm); > #endif > @@ -5276,7 +5276,7 @@ static long kvm_vm_ioctl(struct file *filp, > case KVM_GET_STATS_FD: > r = kvm_vm_ioctl_get_stats_fd(kvm); > break; > -#ifdef CONFIG_KVM_PRIVATE_MEM > +#ifdef CONFIG_KVM_GMEM > case KVM_CREATE_GUEST_MEMFD: { > struct kvm_create_guest_memfd guest_memfd; > > diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h > index acef3f5c582a..ec311c0d6718 100644 > --- a/virt/kvm/kvm_mm.h > +++ b/virt/kvm/kvm_mm.h > @@ -67,7 +67,7 @@ static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, > } > #endif /* HAVE_KVM_PFNCACHE */ > > -#ifdef CONFIG_KVM_PRIVATE_MEM > +#ifdef CONFIG_KVM_GMEM > void kvm_gmem_init(struct module *module); > int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args); > int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot, > @@ -91,6 +91,6 @@ static inline void kvm_gmem_unbind(struct kvm_memory_slot *slot) > { > WARN_ON_ONCE(1); > } > -#endif /* CONFIG_KVM_PRIVATE_MEM */ > +#endif /* CONFIG_KVM_GMEM */ > > #endif /* __KVM_MM_H__ */ Reviewed-by: Shivank Garg <shivankg@amd.com>
On 5/27/2025 11:32 PM, Fuad Tabba wrote: > The function kvm_arch_has_private_mem() is used to indicate whether > guest_memfd is supported by the architecture, which until now implies > that its private. To decouple guest_memfd support from whether the > memory is private, rename this function to kvm_arch_supports_gmem(). > > Reviewed-by: Gavin Shan <gshan@redhat.com> > Reviewed-by: Ira Weiny <ira.weiny@intel.com> > Co-developed-by: David Hildenbrand <david@redhat.com> > Signed-off-by: David Hildenbrand <david@redhat.com> > Signed-off-by: Fuad Tabba <tabba@google.com> > --- > arch/x86/include/asm/kvm_host.h | 8 ++++---- > arch/x86/kvm/mmu/mmu.c | 8 ++++---- > include/linux/kvm_host.h | 6 +++--- > virt/kvm/kvm_main.c | 6 +++--- > 4 files changed, 14 insertions(+), 14 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 52f6f6d08558..4a83fbae7056 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -2254,9 +2254,9 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, > > > #ifdef CONFIG_KVM_GMEM > -#define kvm_arch_has_private_mem(kvm) ((kvm)->arch.has_private_mem) > +#define kvm_arch_supports_gmem(kvm) ((kvm)->arch.has_private_mem) > #else > -#define kvm_arch_has_private_mem(kvm) false > +#define kvm_arch_supports_gmem(kvm) false > #endif > > #define kvm_arch_has_readonly_mem(kvm) (!(kvm)->arch.has_protected_state) > @@ -2309,8 +2309,8 @@ enum { > #define HF_SMM_INSIDE_NMI_MASK (1 << 2) > > # define KVM_MAX_NR_ADDRESS_SPACES 2 > -/* SMM is currently unsupported for guests with private memory. */ > -# define kvm_arch_nr_memslot_as_ids(kvm) (kvm_arch_has_private_mem(kvm) ? 1 : 2) > +/* SMM is currently unsupported for guests with guest_memfd (esp private) memory. */ > +# define kvm_arch_nr_memslot_as_ids(kvm) (kvm_arch_supports_gmem(kvm) ? 1 : 2) > # define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0) > # define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm) > #else > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > index 8d1b632e33d2..b66f1bf24e06 100644 > --- a/arch/x86/kvm/mmu/mmu.c > +++ b/arch/x86/kvm/mmu/mmu.c > @@ -4917,7 +4917,7 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, > if (r) > return r; > > - if (kvm_arch_has_private_mem(vcpu->kvm) && > + if (kvm_arch_supports_gmem(vcpu->kvm) && > kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(range->gpa))) > error_code |= PFERR_PRIVATE_ACCESS; > > @@ -7705,7 +7705,7 @@ bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm, > * Zapping SPTEs in this case ensures KVM will reassess whether or not > * a hugepage can be used for affected ranges. > */ > - if (WARN_ON_ONCE(!kvm_arch_has_private_mem(kvm))) > + if (WARN_ON_ONCE(!kvm_arch_supports_gmem(kvm))) > return false; > > if (WARN_ON_ONCE(range->end <= range->start)) > @@ -7784,7 +7784,7 @@ bool kvm_arch_post_set_memory_attributes(struct kvm *kvm, > * a range that has PRIVATE GFNs, and conversely converting a range to > * SHARED may now allow hugepages. > */ > - if (WARN_ON_ONCE(!kvm_arch_has_private_mem(kvm))) > + if (WARN_ON_ONCE(!kvm_arch_supports_gmem(kvm))) > return false; > > /* > @@ -7840,7 +7840,7 @@ void kvm_mmu_init_memslot_memory_attributes(struct kvm *kvm, > { > int level; > > - if (!kvm_arch_has_private_mem(kvm)) > + if (!kvm_arch_supports_gmem(kvm)) > return; > > for (level = PG_LEVEL_2M; level <= KVM_MAX_HUGEPAGE_LEVEL; level++) { > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index 7ca23837fa52..6ca7279520cf 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -719,11 +719,11 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu) > #endif > > /* > - * Arch code must define kvm_arch_has_private_mem if support for private memory > + * Arch code must define kvm_arch_supports_gmem if support for guest_memfd > * is enabled. > */ > -#if !defined(kvm_arch_has_private_mem) && !IS_ENABLED(CONFIG_KVM_GMEM) > -static inline bool kvm_arch_has_private_mem(struct kvm *kvm) > +#if !defined(kvm_arch_supports_gmem) && !IS_ENABLED(CONFIG_KVM_GMEM) > +static inline bool kvm_arch_supports_gmem(struct kvm *kvm) > { > return false; > } > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 4996cac41a8f..2468d50a9ed4 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -1531,7 +1531,7 @@ static int check_memory_region_flags(struct kvm *kvm, > { > u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES; > > - if (kvm_arch_has_private_mem(kvm)) > + if (kvm_arch_supports_gmem(kvm)) > valid_flags |= KVM_MEM_GUEST_MEMFD; > > /* Dirty logging private memory is not currently supported. */ > @@ -2362,7 +2362,7 @@ static int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, > #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES > static u64 kvm_supported_mem_attributes(struct kvm *kvm) > { > - if (!kvm || kvm_arch_has_private_mem(kvm)) > + if (!kvm || kvm_arch_supports_gmem(kvm)) > return KVM_MEMORY_ATTRIBUTE_PRIVATE; > > return 0; > @@ -4844,7 +4844,7 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) > #endif > #ifdef CONFIG_KVM_GMEM > case KVM_CAP_GUEST_MEMFD: > - return !kvm || kvm_arch_has_private_mem(kvm); > + return !kvm || kvm_arch_supports_gmem(kvm); > #endif > default: > break; Reviewed-by: Shivank Garg <shivankg@amd.com>
On 5/27/2025 11:32 PM, Fuad Tabba wrote: > The function kvm_slot_can_be_private() is used to check whether a memory > slot is backed by guest_memfd. Rename it to kvm_slot_has_gmem() to make > that clearer and to decouple memory being private from guest_memfd. > > Reviewed-by: Gavin Shan <gshan@redhat.com> > Reviewed-by: Ira Weiny <ira.weiny@intel.com> > Co-developed-by: David Hildenbrand <david@redhat.com> > Signed-off-by: David Hildenbrand <david@redhat.com> > Signed-off-by: Fuad Tabba <tabba@google.com> > --- > arch/x86/kvm/mmu/mmu.c | 4 ++-- > arch/x86/kvm/svm/sev.c | 4 ++-- > include/linux/kvm_host.h | 2 +- > virt/kvm/guest_memfd.c | 2 +- > 4 files changed, 6 insertions(+), 6 deletions(-) > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > index 69bf2ef22ed0..2b6376986f96 100644 > --- a/arch/x86/kvm/mmu/mmu.c > +++ b/arch/x86/kvm/mmu/mmu.c > @@ -3283,7 +3283,7 @@ static int __kvm_mmu_max_mapping_level(struct kvm *kvm, > int kvm_mmu_max_mapping_level(struct kvm *kvm, > const struct kvm_memory_slot *slot, gfn_t gfn) > { > - bool is_private = kvm_slot_can_be_private(slot) && > + bool is_private = kvm_slot_has_gmem(slot) && > kvm_mem_is_private(kvm, gfn); > > return __kvm_mmu_max_mapping_level(kvm, slot, gfn, PG_LEVEL_NUM, is_private); > @@ -4496,7 +4496,7 @@ static int kvm_mmu_faultin_pfn_private(struct kvm_vcpu *vcpu, > { > int max_order, r; > > - if (!kvm_slot_can_be_private(fault->slot)) { > + if (!kvm_slot_has_gmem(fault->slot)) { > kvm_mmu_prepare_memory_fault_exit(vcpu, fault); > return -EFAULT; > } > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c > index a7a7dc507336..27759ca6d2f2 100644 > --- a/arch/x86/kvm/svm/sev.c > +++ b/arch/x86/kvm/svm/sev.c > @@ -2378,7 +2378,7 @@ static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp) > mutex_lock(&kvm->slots_lock); > > memslot = gfn_to_memslot(kvm, params.gfn_start); > - if (!kvm_slot_can_be_private(memslot)) { > + if (!kvm_slot_has_gmem(memslot)) { > ret = -EINVAL; > goto out; > } > @@ -4688,7 +4688,7 @@ void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code) > } > > slot = gfn_to_memslot(kvm, gfn); > - if (!kvm_slot_can_be_private(slot)) { > + if (!kvm_slot_has_gmem(slot)) { > pr_warn_ratelimited("SEV: Unexpected RMP fault, non-private slot for GPA 0x%llx\n", > gpa); > return; > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index 6ca7279520cf..d9616ee6acc7 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -614,7 +614,7 @@ struct kvm_memory_slot { > #endif > }; > > -static inline bool kvm_slot_can_be_private(const struct kvm_memory_slot *slot) > +static inline bool kvm_slot_has_gmem(const struct kvm_memory_slot *slot) > { > return slot && (slot->flags & KVM_MEM_GUEST_MEMFD); > } > diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c > index befea51bbc75..6db515833f61 100644 > --- a/virt/kvm/guest_memfd.c > +++ b/virt/kvm/guest_memfd.c > @@ -654,7 +654,7 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long > return -EINVAL; > > slot = gfn_to_memslot(kvm, start_gfn); > - if (!kvm_slot_can_be_private(slot)) > + if (!kvm_slot_has_gmem(slot)) > return -EINVAL; > > file = kvm_gmem_get_file(slot); Reviewed-by: Shivank Garg <shivankg@amd.com>
On 5/27/2025 11:32 PM, Fuad Tabba wrote: > The comment that refers to the path where the user-visible memslot flags > are refers to an outdated path and has a typo. Make it refer to the > correct path. > > Signed-off-by: Fuad Tabba <tabba@google.com> > --- > include/linux/kvm_host.h | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index ae70e4e19700..80371475818f 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -52,7 +52,7 @@ > /* > * The bit 16 ~ bit 31 of kvm_userspace_memory_region::flags are internally > * used in kvm, other bits are visible for userspace which are defined in > - * include/linux/kvm_h. > + * include/uapi/linux/kvm.h. Reviewed-by: Shivank Garg <shivankg@amd.com> > */ > #define KVM_MEMSLOT_INVALID (1UL << 16) >
On Sat, 31 May 2025 at 20:20, Shivank Garg <shivankg@amd.com> wrote: > > > > On 5/27/2025 11:32 PM, Fuad Tabba wrote: > > The comment that refers to the path where the user-visible memslot flags > > are refers to an outdated path and has a typo. Make it refer to the > > correct path. > > > > Signed-off-by: Fuad Tabba <tabba@google.com> > > --- > > include/linux/kvm_host.h | 2 +- > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > > index ae70e4e19700..80371475818f 100644 > > --- a/include/linux/kvm_host.h > > +++ b/include/linux/kvm_host.h > > @@ -52,7 +52,7 @@ > > /* > > * The bit 16 ~ bit 31 of kvm_userspace_memory_region::flags are internally > > * used in kvm, other bits are visible for userspace which are defined in > > - * include/linux/kvm_h. > > + * include/uapi/linux/kvm.h. > > Reviewed-by: Shivank Garg <shivankg@amd.com> Thanks for the reviews! /fuad > > */ > > #define KVM_MEMSLOT_INVALID (1UL << 16) > > > >
On 27.05.25 20:02, Fuad Tabba wrote: > Track whether a guest_memfd-backed memslot supports shared memory within > the memslot itself, using the flags field. The top half of memslot flags > is reserved for internal use in KVM. Add a flag there to track shared > memory support. > > This saves the caller from having to check the guest_memfd-backed file > for this support, a potentially more expensive operation due to the need > to get/put the file. > > Suggested-by: David Hildenbrand <david@redhat.com> > Signed-off-by: Fuad Tabba <tabba@google.com> > --- > include/linux/kvm_host.h | 11 ++++++++++- > virt/kvm/guest_memfd.c | 8 ++++++-- > 2 files changed, 16 insertions(+), 3 deletions(-) > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index ba83547e62b0..edb3795a64b9 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -54,7 +54,8 @@ > * used in kvm, other bits are visible for userspace which are defined in > * include/uapi/linux/kvm.h. > */ > -#define KVM_MEMSLOT_INVALID (1UL << 16) > +#define KVM_MEMSLOT_INVALID (1UL << 16) > +#define KVM_MEMSLOT_SUPPORTS_SHARED (1UL << 17) Should there be a "GMEM" in there?
Hi David On Wed, 4 Jun 2025 at 14:17, David Hildenbrand <david@redhat.com> wrote: > > On 27.05.25 20:02, Fuad Tabba wrote: > > Add arm64 support for handling guest page faults on guest_memfd backed > > memslots. Until guest_memfd supports huge pages, the fault granule is > > restricted to PAGE_SIZE. > > > > Signed-off-by: Fuad Tabba <tabba@google.com> > > > > --- > > > > Note: This patch introduces a new function, gmem_abort() rather than > > previous attempts at trying to expand user_mem_abort(). This is because > > there are many differences in how faults are handled when backed by > > guest_memfd vs regular memslots with anonymous memory, e.g., lack of > > VMA, and for now, lack of huge page support for guest_memfd. The > > function user_mem_abort() is already big and unwieldly, adding more > > complexity to it made things more difficult to understand. > > > > Once larger page size support is added to guest_memfd, we could factor > > out the common code between these two functions. > > > > --- > > arch/arm64/kvm/mmu.c | 89 +++++++++++++++++++++++++++++++++++++++++++- > > 1 file changed, 87 insertions(+), 2 deletions(-) > > > > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c > > index 9865ada04a81..896c56683d88 100644 > > --- a/arch/arm64/kvm/mmu.c > > +++ b/arch/arm64/kvm/mmu.c > > @@ -1466,6 +1466,87 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma) > > return vma->vm_flags & VM_MTE_ALLOWED; > > } > > > > +static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, > > + struct kvm_memory_slot *memslot, bool is_perm) > > TBH, I have no idea why the existing function is called "_abort". I am > sure there is a good reason :) > The reason is ARM. They're called "memory aborts", see D8.15 Memory aborts in the ARM ARM: https://developer.arm.com/documentation/ddi0487/latest/ Warning: PDF is 100mb+ with almost 15k pages :) > > +{ > > + enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED; > > + enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; > > + bool logging, write_fault, exec_fault, writable; > > + struct kvm_pgtable *pgt; > > + struct page *page; > > + struct kvm *kvm; > > + void *memcache; > > + kvm_pfn_t pfn; > > + gfn_t gfn; > > + int ret; > > + > > + if (!is_perm) { > > + int min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu); > > + > > + if (!is_protected_kvm_enabled()) { > > + memcache = &vcpu->arch.mmu_page_cache; > > + ret = kvm_mmu_topup_memory_cache(memcache, min_pages); > > + } else { > > + memcache = &vcpu->arch.pkvm_memcache; > > + ret = topup_hyp_memcache(memcache, min_pages); > > + } > > + if (ret) > > + return ret; > > + } > > + > > + kvm = vcpu->kvm; > > + gfn = fault_ipa >> PAGE_SHIFT; > > These two can be initialized directly above. > I was trying to go with reverse christmas tree order of declarations, but I'll do that. > > + > > + logging = memslot_is_logging(memslot); > > + write_fault = kvm_is_write_fault(vcpu); > > + exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); > > + VM_BUG_ON(write_fault && exec_fault); > > No VM_BUG_ON please. > > VM_WARN_ON_ONCE() maybe. Or just handle it along the "Unexpected L2 read > permission error" below cleanly. I'm following the same pattern as the existing user_mem_abort(), but I'll change it. > > + > > + if (is_perm && !write_fault && !exec_fault) { > > + kvm_err("Unexpected L2 read permission error\n"); > > + return -EFAULT; > > + } > > + > > + ret = kvm_gmem_get_pfn(vcpu->kvm, memslot, gfn, &pfn, &page, NULL); > > + if (ret) { > > + kvm_prepare_memory_fault_exit(vcpu, fault_ipa, PAGE_SIZE, > > + write_fault, exec_fault, false); > > + return ret; > > + } > > + > > + writable = !(memslot->flags & KVM_MEM_READONLY) && > > + (!logging || write_fault); > > + > > + if (writable) > > + prot |= KVM_PGTABLE_PROT_W; > > + > > + if (exec_fault || cpus_have_final_cap(ARM64_HAS_CACHE_DIC)) > > + prot |= KVM_PGTABLE_PROT_X; > > + > > + pgt = vcpu->arch.hw_mmu->pgt; > > Can probably also initialize directly above. Ack. > > + > > + kvm_fault_lock(kvm); > > + if (is_perm) { > > + /* > > + * Drop the SW bits in favour of those stored in the > > + * PTE, which will be preserved. > > + */ > > + prot &= ~KVM_NV_GUEST_MAP_SZ; > > + ret = KVM_PGT_FN(kvm_pgtable_stage2_relax_perms)(pgt, fault_ipa, prot, flags); > > + } else { > > + ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, fault_ipa, PAGE_SIZE, > > + __pfn_to_phys(pfn), prot, > > + memcache, flags); > > + } > > + kvm_release_faultin_page(kvm, page, !!ret, writable); > > + kvm_fault_unlock(kvm); > > + > > + if (writable && !ret) > > + mark_page_dirty_in_slot(kvm, memslot, gfn); > > + > > + return ret != -EAGAIN ? ret : 0; > > +} > > + > > Nothing else jumped at me. But just like on the x86 code, I think we > need some arch experts take a look at this one ... Thanks! /fuad > -- > Cheers, > > David / dhildenb >
On 04.06.25 15:30, Fuad Tabba wrote: > Hi David > > On Wed, 4 Jun 2025 at 14:17, David Hildenbrand <david@redhat.com> wrote: >> >> On 27.05.25 20:02, Fuad Tabba wrote: >>> Add arm64 support for handling guest page faults on guest_memfd backed >>> memslots. Until guest_memfd supports huge pages, the fault granule is >>> restricted to PAGE_SIZE. >>> >>> Signed-off-by: Fuad Tabba <tabba@google.com> >>> >>> --- >>> >>> Note: This patch introduces a new function, gmem_abort() rather than >>> previous attempts at trying to expand user_mem_abort(). This is because >>> there are many differences in how faults are handled when backed by >>> guest_memfd vs regular memslots with anonymous memory, e.g., lack of >>> VMA, and for now, lack of huge page support for guest_memfd. The >>> function user_mem_abort() is already big and unwieldly, adding more >>> complexity to it made things more difficult to understand. >>> >>> Once larger page size support is added to guest_memfd, we could factor >>> out the common code between these two functions. >>> >>> --- >>> arch/arm64/kvm/mmu.c | 89 +++++++++++++++++++++++++++++++++++++++++++- >>> 1 file changed, 87 insertions(+), 2 deletions(-) >>> >>> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c >>> index 9865ada04a81..896c56683d88 100644 >>> --- a/arch/arm64/kvm/mmu.c >>> +++ b/arch/arm64/kvm/mmu.c >>> @@ -1466,6 +1466,87 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma) >>> return vma->vm_flags & VM_MTE_ALLOWED; >>> } >>> >>> +static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, >>> + struct kvm_memory_slot *memslot, bool is_perm) >> >> TBH, I have no idea why the existing function is called "_abort". I am >> sure there is a good reason :) >> > > The reason is ARM. They're called "memory aborts", see D8.15 Memory > aborts in the ARM ARM: > > https://developer.arm.com/documentation/ddi0487/latest/ > > Warning: PDF is 100mb+ with almost 15k pages :) > >>> +{ >>> + enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED; >>> + enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; >>> + bool logging, write_fault, exec_fault, writable; >>> + struct kvm_pgtable *pgt; >>> + struct page *page; >>> + struct kvm *kvm; >>> + void *memcache; >>> + kvm_pfn_t pfn; >>> + gfn_t gfn; >>> + int ret; >>> + >>> + if (!is_perm) { >>> + int min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu); >>> + >>> + if (!is_protected_kvm_enabled()) { >>> + memcache = &vcpu->arch.mmu_page_cache; >>> + ret = kvm_mmu_topup_memory_cache(memcache, min_pages); >>> + } else { >>> + memcache = &vcpu->arch.pkvm_memcache; >>> + ret = topup_hyp_memcache(memcache, min_pages); >>> + } >>> + if (ret) >>> + return ret; >>> + } >>> + >>> + kvm = vcpu->kvm; >>> + gfn = fault_ipa >> PAGE_SHIFT; >> >> These two can be initialized directly above. >> > > I was trying to go with reverse christmas tree order of declarations, > but I'll do that. Can still do that, no? vcpu and fault_ipa are input parameters, so no dependency between them. > >>> + >>> + logging = memslot_is_logging(memslot); >>> + write_fault = kvm_is_write_fault(vcpu); >>> + exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); >> > + VM_BUG_ON(write_fault && exec_fault); >> >> No VM_BUG_ON please. >> >> VM_WARN_ON_ONCE() maybe. Or just handle it along the "Unexpected L2 read >> permission error" below cleanly. > > I'm following the same pattern as the existing user_mem_abort(), but > I'll change it. Yeah, there are a lot of BUG_ON thingies that should be reworked.