Message ID | 20250513163438.3942405-11-tabba@google.com |
---|---|
State | New |
Headers | show |
Series | KVM: Mapping guest_memfd backed memory at the host for software protected VMs | expand |
On 5/13/2025 10:04 PM, Fuad Tabba wrote: > From: Ackerley Tng <ackerleytng@google.com> > > This patch adds kvm_gmem_max_mapping_level(), which always returns > PG_LEVEL_4K since guest_memfd only supports 4K pages for now. > > When guest_memfd supports shared memory, max_mapping_level (especially > when recovering huge pages - see call to __kvm_mmu_max_mapping_level() > from recover_huge_pages_range()) should take input from > guest_memfd. > > Input from guest_memfd should be taken in these cases: > > + if the memslot supports shared memory (guest_memfd is used for > shared memory, or in future both shared and private memory) or > + if the memslot is only used for private memory and that gfn is > private. > > If the memslot doesn't use guest_memfd, figure out the > max_mapping_level using the host page tables like before. > > This patch also refactors and inlines the other call to > __kvm_mmu_max_mapping_level(). > > In kvm_mmu_hugepage_adjust(), guest_memfd's input is already > provided (if applicable) in fault->max_level. Hence, there is no need > to query guest_memfd. > > lpage_info is queried like before, and then if the fault is not from > guest_memfd, adjust fault->req_level based on input from host page > tables. > > Signed-off-by: Ackerley Tng <ackerleytng@google.com> > Signed-off-by: Fuad Tabba <tabba@google.com> > --- > arch/x86/kvm/mmu/mmu.c | 92 ++++++++++++++++++++++++++-------------- > include/linux/kvm_host.h | 7 +++ > virt/kvm/guest_memfd.c | 12 ++++++ > 3 files changed, 79 insertions(+), 32 deletions(-) > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > index cfbb471f7c70..9e0bc8114859 100644 > --- a/arch/x86/kvm/mmu/mmu.c > +++ b/arch/x86/kvm/mmu/mmu.c > @@ -3256,12 +3256,11 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, > return level; > } > > -static int __kvm_mmu_max_mapping_level(struct kvm *kvm, > - const struct kvm_memory_slot *slot, > - gfn_t gfn, int max_level, bool is_private) > +static int kvm_lpage_info_max_mapping_level(struct kvm *kvm, > + const struct kvm_memory_slot *slot, > + gfn_t gfn, int max_level) > { > struct kvm_lpage_info *linfo; > - int host_level; > > max_level = min(max_level, max_huge_page_level); > for ( ; max_level > PG_LEVEL_4K; max_level--) { > @@ -3270,23 +3269,61 @@ static int __kvm_mmu_max_mapping_level(struct kvm *kvm, > break; > } > > - if (is_private) > - return max_level; > + return max_level; > +} > + > +static inline u8 kvm_max_level_for_order(int order) > +{ > + BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G); > + > + KVM_MMU_WARN_ON(order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G) && > + order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M) && > + order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K)); > + > + if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G)) > + return PG_LEVEL_1G; > + > + if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M)) > + return PG_LEVEL_2M; > + > + return PG_LEVEL_4K; > +} > + > +static inline int kvm_gmem_max_mapping_level(const struct kvm_memory_slot *slot, > + gfn_t gfn, int max_level) > +{ > + int max_order; > > if (max_level == PG_LEVEL_4K) > return PG_LEVEL_4K; > > - host_level = host_pfn_mapping_level(kvm, gfn, slot); > - return min(host_level, max_level); > + max_order = kvm_gmem_mapping_order(slot, gfn); > + return min(max_level, kvm_max_level_for_order(max_order)); > } > > int kvm_mmu_max_mapping_level(struct kvm *kvm, > const struct kvm_memory_slot *slot, gfn_t gfn) > { > - bool is_private = kvm_slot_has_gmem(slot) && > - kvm_mem_is_private(kvm, gfn); > + int max_level; > + > + max_level = kvm_lpage_info_max_mapping_level(kvm, slot, gfn, PG_LEVEL_NUM); > + if (max_level == PG_LEVEL_4K) > + return PG_LEVEL_4K; > > - return __kvm_mmu_max_mapping_level(kvm, slot, gfn, PG_LEVEL_NUM, is_private); > + if (kvm_slot_has_gmem(slot) && > + (kvm_gmem_memslot_supports_shared(slot) || > + kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE)) { > + return kvm_gmem_max_mapping_level(slot, gfn, max_level); > + } > + > + return min(max_level, host_pfn_mapping_level(kvm, gfn, slot)); > +} > + > +static inline bool fault_from_gmem(struct kvm_page_fault *fault) > +{ > + return fault->is_private || > + (kvm_slot_has_gmem(fault->slot) && > + kvm_gmem_memslot_supports_shared(fault->slot)); > } > > void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) > @@ -3309,12 +3346,20 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault > * Enforce the iTLB multihit workaround after capturing the requested > * level, which will be used to do precise, accurate accounting. > */ > - fault->req_level = __kvm_mmu_max_mapping_level(vcpu->kvm, slot, > - fault->gfn, fault->max_level, > - fault->is_private); > + fault->req_level = kvm_lpage_info_max_mapping_level(vcpu->kvm, slot, > + fault->gfn, fault->max_level); > if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed) > return; > > + if (!fault_from_gmem(fault)) { > + int host_level; > + > + host_level = host_pfn_mapping_level(vcpu->kvm, fault->gfn, slot); > + fault->req_level = min(fault->req_level, host_level); > + if (fault->req_level == PG_LEVEL_4K) > + return; > + } > + > /* > * mmu_invalidate_retry() was successful and mmu_lock is held, so > * the pmd can't be split from under us. > @@ -4448,23 +4493,6 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) > vcpu->stat.pf_fixed++; > } > > -static inline u8 kvm_max_level_for_order(int order) > -{ > - BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G); > - > - KVM_MMU_WARN_ON(order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G) && > - order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M) && > - order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K)); > - > - if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G)) > - return PG_LEVEL_1G; > - > - if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M)) > - return PG_LEVEL_2M; > - > - return PG_LEVEL_4K; > -} > - > static u8 kvm_max_level_for_fault_and_order(struct kvm *kvm, > struct kvm_page_fault *fault, > int order) > @@ -4523,7 +4551,7 @@ static int __kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu, > { > unsigned int foll = fault->write ? FOLL_WRITE : 0; > > - if (fault->is_private || kvm_gmem_memslot_supports_shared(fault->slot)) > + if (fault_from_gmem(fault)) > return kvm_mmu_faultin_pfn_gmem(vcpu, fault); > > foll |= FOLL_NOWAIT; > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index de7b46ee1762..f9bb025327c3 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -2560,6 +2560,7 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) > int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, > gfn_t gfn, kvm_pfn_t *pfn, struct page **page, > int *max_order); > +int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot, gfn_t gfn); > #else > static inline int kvm_gmem_get_pfn(struct kvm *kvm, > struct kvm_memory_slot *slot, gfn_t gfn, > @@ -2569,6 +2570,12 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, > KVM_BUG_ON(1, kvm); > return -EIO; > } > +static inline int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot, > + gfn_t gfn) > +{ > + BUG(); > + return 0; > +} > #endif /* CONFIG_KVM_GMEM */ > > #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE > diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c > index fe0245335c96..b8e247063b20 100644 > --- a/virt/kvm/guest_memfd.c > +++ b/virt/kvm/guest_memfd.c > @@ -774,6 +774,18 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, > } > EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn); > > +/** > + * Returns the mapping order for this @gfn in @slot. > + * > + * This is equal to max_order that would be returned if kvm_gmem_get_pfn() were > + * called now. > + */ make W=1 ./ -s generates following warnings- warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Returns the mapping order for this @gfn in @slot This will fix it. Subject: [PATCH] tmp Signed-off-by: Shivank Garg <shivankg@amd.com> --- virt/kvm/guest_memfd.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index b8e247063b20..d880b9098cc0 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -775,10 +775,12 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn); /** - * Returns the mapping order for this @gfn in @slot. + * kvm_gmem_mapping_order - Get the mapping order for a GFN. + * @slot: The KVM memory slot containing the @gfn. + * @gfn: The guest frame number to check. * - * This is equal to max_order that would be returned if kvm_gmem_get_pfn() were - * called now. + * Returns: The mapping order for a @gfn in @slot. This is equal to max_order + * that kvm_gmem_get_pfn() would return for this @gfn. */ int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot, gfn_t gfn) {
On Wed, 14 May 2025 at 08:14, Shivank Garg <shivankg@amd.com> wrote: > > On 5/13/2025 10:04 PM, Fuad Tabba wrote: > > From: Ackerley Tng <ackerleytng@google.com> > > > > This patch adds kvm_gmem_max_mapping_level(), which always returns > > PG_LEVEL_4K since guest_memfd only supports 4K pages for now. > > > > When guest_memfd supports shared memory, max_mapping_level (especially > > when recovering huge pages - see call to __kvm_mmu_max_mapping_level() > > from recover_huge_pages_range()) should take input from > > guest_memfd. > > > > Input from guest_memfd should be taken in these cases: > > > > + if the memslot supports shared memory (guest_memfd is used for > > shared memory, or in future both shared and private memory) or > > + if the memslot is only used for private memory and that gfn is > > private. > > > > If the memslot doesn't use guest_memfd, figure out the > > max_mapping_level using the host page tables like before. > > > > This patch also refactors and inlines the other call to > > __kvm_mmu_max_mapping_level(). > > > > In kvm_mmu_hugepage_adjust(), guest_memfd's input is already > > provided (if applicable) in fault->max_level. Hence, there is no need > > to query guest_memfd. > > > > lpage_info is queried like before, and then if the fault is not from > > guest_memfd, adjust fault->req_level based on input from host page > > tables. > > > > Signed-off-by: Ackerley Tng <ackerleytng@google.com> > > Signed-off-by: Fuad Tabba <tabba@google.com> > > --- > > arch/x86/kvm/mmu/mmu.c | 92 ++++++++++++++++++++++++++-------------- > > include/linux/kvm_host.h | 7 +++ > > virt/kvm/guest_memfd.c | 12 ++++++ > > 3 files changed, 79 insertions(+), 32 deletions(-) > > > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > > index cfbb471f7c70..9e0bc8114859 100644 > > --- a/arch/x86/kvm/mmu/mmu.c > > +++ b/arch/x86/kvm/mmu/mmu.c > > @@ -3256,12 +3256,11 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, > > return level; > > } > > > > -static int __kvm_mmu_max_mapping_level(struct kvm *kvm, > > - const struct kvm_memory_slot *slot, > > - gfn_t gfn, int max_level, bool is_private) > > +static int kvm_lpage_info_max_mapping_level(struct kvm *kvm, > > + const struct kvm_memory_slot *slot, > > + gfn_t gfn, int max_level) > > { > > struct kvm_lpage_info *linfo; > > - int host_level; > > > > max_level = min(max_level, max_huge_page_level); > > for ( ; max_level > PG_LEVEL_4K; max_level--) { > > @@ -3270,23 +3269,61 @@ static int __kvm_mmu_max_mapping_level(struct kvm *kvm, > > break; > > } > > > > - if (is_private) > > - return max_level; > > + return max_level; > > +} > > + > > +static inline u8 kvm_max_level_for_order(int order) > > +{ > > + BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G); > > + > > + KVM_MMU_WARN_ON(order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G) && > > + order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M) && > > + order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K)); > > + > > + if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G)) > > + return PG_LEVEL_1G; > > + > > + if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M)) > > + return PG_LEVEL_2M; > > + > > + return PG_LEVEL_4K; > > +} > > + > > +static inline int kvm_gmem_max_mapping_level(const struct kvm_memory_slot *slot, > > + gfn_t gfn, int max_level) > > +{ > > + int max_order; > > > > if (max_level == PG_LEVEL_4K) > > return PG_LEVEL_4K; > > > > - host_level = host_pfn_mapping_level(kvm, gfn, slot); > > - return min(host_level, max_level); > > + max_order = kvm_gmem_mapping_order(slot, gfn); > > + return min(max_level, kvm_max_level_for_order(max_order)); > > } > > > > int kvm_mmu_max_mapping_level(struct kvm *kvm, > > const struct kvm_memory_slot *slot, gfn_t gfn) > > { > > - bool is_private = kvm_slot_has_gmem(slot) && > > - kvm_mem_is_private(kvm, gfn); > > + int max_level; > > + > > + max_level = kvm_lpage_info_max_mapping_level(kvm, slot, gfn, PG_LEVEL_NUM); > > + if (max_level == PG_LEVEL_4K) > > + return PG_LEVEL_4K; > > > > - return __kvm_mmu_max_mapping_level(kvm, slot, gfn, PG_LEVEL_NUM, is_private); > > + if (kvm_slot_has_gmem(slot) && > > + (kvm_gmem_memslot_supports_shared(slot) || > > + kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE)) { > > + return kvm_gmem_max_mapping_level(slot, gfn, max_level); > > + } > > + > > + return min(max_level, host_pfn_mapping_level(kvm, gfn, slot)); > > +} > > + > > +static inline bool fault_from_gmem(struct kvm_page_fault *fault) > > +{ > > + return fault->is_private || > > + (kvm_slot_has_gmem(fault->slot) && > > + kvm_gmem_memslot_supports_shared(fault->slot)); > > } > > > > void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) > > @@ -3309,12 +3346,20 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault > > * Enforce the iTLB multihit workaround after capturing the requested > > * level, which will be used to do precise, accurate accounting. > > */ > > - fault->req_level = __kvm_mmu_max_mapping_level(vcpu->kvm, slot, > > - fault->gfn, fault->max_level, > > - fault->is_private); > > + fault->req_level = kvm_lpage_info_max_mapping_level(vcpu->kvm, slot, > > + fault->gfn, fault->max_level); > > if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed) > > return; > > > > + if (!fault_from_gmem(fault)) { > > + int host_level; > > + > > + host_level = host_pfn_mapping_level(vcpu->kvm, fault->gfn, slot); > > + fault->req_level = min(fault->req_level, host_level); > > + if (fault->req_level == PG_LEVEL_4K) > > + return; > > + } > > + > > /* > > * mmu_invalidate_retry() was successful and mmu_lock is held, so > > * the pmd can't be split from under us. > > @@ -4448,23 +4493,6 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) > > vcpu->stat.pf_fixed++; > > } > > > > -static inline u8 kvm_max_level_for_order(int order) > > -{ > > - BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G); > > - > > - KVM_MMU_WARN_ON(order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G) && > > - order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M) && > > - order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K)); > > - > > - if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G)) > > - return PG_LEVEL_1G; > > - > > - if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M)) > > - return PG_LEVEL_2M; > > - > > - return PG_LEVEL_4K; > > -} > > - > > static u8 kvm_max_level_for_fault_and_order(struct kvm *kvm, > > struct kvm_page_fault *fault, > > int order) > > @@ -4523,7 +4551,7 @@ static int __kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu, > > { > > unsigned int foll = fault->write ? FOLL_WRITE : 0; > > > > - if (fault->is_private || kvm_gmem_memslot_supports_shared(fault->slot)) > > + if (fault_from_gmem(fault)) > > return kvm_mmu_faultin_pfn_gmem(vcpu, fault); > > > > foll |= FOLL_NOWAIT; > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > > index de7b46ee1762..f9bb025327c3 100644 > > --- a/include/linux/kvm_host.h > > +++ b/include/linux/kvm_host.h > > @@ -2560,6 +2560,7 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) > > int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, > > gfn_t gfn, kvm_pfn_t *pfn, struct page **page, > > int *max_order); > > +int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot, gfn_t gfn); > > #else > > static inline int kvm_gmem_get_pfn(struct kvm *kvm, > > struct kvm_memory_slot *slot, gfn_t gfn, > > @@ -2569,6 +2570,12 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, > > KVM_BUG_ON(1, kvm); > > return -EIO; > > } > > +static inline int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot, > > + gfn_t gfn) > > +{ > > + BUG(); > > + return 0; > > +} > > #endif /* CONFIG_KVM_GMEM */ > > > > #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE > > diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c > > index fe0245335c96..b8e247063b20 100644 > > --- a/virt/kvm/guest_memfd.c > > +++ b/virt/kvm/guest_memfd.c > > @@ -774,6 +774,18 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, > > } > > EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn); > > > > +/** > > + * Returns the mapping order for this @gfn in @slot. > > + * > > + * This is equal to max_order that would be returned if kvm_gmem_get_pfn() were > > + * called now. > > + */ > make W=1 ./ -s generates following warnings- > > warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst > * Returns the mapping order for this @gfn in @slot > > This will fix it. Thank you! /fuad > Subject: [PATCH] tmp > > Signed-off-by: Shivank Garg <shivankg@amd.com> > --- > virt/kvm/guest_memfd.c | 8 +++++--- > 1 file changed, 5 insertions(+), 3 deletions(-) > > diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c > index b8e247063b20..d880b9098cc0 100644 > --- a/virt/kvm/guest_memfd.c > +++ b/virt/kvm/guest_memfd.c > @@ -775,10 +775,12 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, > EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn); > > /** > - * Returns the mapping order for this @gfn in @slot. > + * kvm_gmem_mapping_order - Get the mapping order for a GFN. > + * @slot: The KVM memory slot containing the @gfn. > + * @gfn: The guest frame number to check. > * > - * This is equal to max_order that would be returned if kvm_gmem_get_pfn() were > - * called now. > + * Returns: The mapping order for a @gfn in @slot. This is equal to max_order > + * that kvm_gmem_get_pfn() would return for this @gfn. > */ > int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot, gfn_t gfn) > { > -- > 2.34.1 > > Thanks, > Shivank > > > > +int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot, gfn_t gfn) > > +{ > > + return 0; > > +} > > +EXPORT_SYMBOL_GPL(kvm_gmem_mapping_order); > > + > > #ifdef CONFIG_KVM_GENERIC_GMEM_POPULATE > > long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages, > > kvm_gmem_populate_cb post_populate, void *opaque) >
Hi Fuad, kernel test robot noticed the following build errors: [auto build test ERROR on 82f2b0b97b36ee3fcddf0f0780a9a0825d52fec3] url: https://github.com/intel-lab-lkp/linux/commits/Fuad-Tabba/KVM-Rename-CONFIG_KVM_PRIVATE_MEM-to-CONFIG_KVM_GMEM/20250514-003900 base: 82f2b0b97b36ee3fcddf0f0780a9a0825d52fec3 patch link: https://lore.kernel.org/r/20250513163438.3942405-11-tabba%40google.com patch subject: [PATCH v9 10/17] KVM: x86: Compute max_mapping_level with input from guest_memfd config: x86_64-buildonly-randconfig-002-20250514 (https://download.01.org/0day-ci/archive/20250514/202505142334.6dQb5Sei-lkp@intel.com/config) compiler: gcc-12 (Debian 12.2.0-14) 12.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250514/202505142334.6dQb5Sei-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202505142334.6dQb5Sei-lkp@intel.com/ All errors (new ones prefixed by >>): arch/x86/kvm/mmu/mmu.c: In function 'kvm_mmu_max_mapping_level': >> arch/x86/kvm/mmu/mmu.c:3315:14: error: implicit declaration of function 'kvm_get_memory_attributes' [-Werror=implicit-function-declaration] 3315 | kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE)) { | ^~~~~~~~~~~~~~~~~~~~~~~~~ cc1: some warnings being treated as errors vim +/kvm_get_memory_attributes +3315 arch/x86/kvm/mmu/mmu.c 3303 3304 int kvm_mmu_max_mapping_level(struct kvm *kvm, 3305 const struct kvm_memory_slot *slot, gfn_t gfn) 3306 { 3307 int max_level; 3308 3309 max_level = kvm_lpage_info_max_mapping_level(kvm, slot, gfn, PG_LEVEL_NUM); 3310 if (max_level == PG_LEVEL_4K) 3311 return PG_LEVEL_4K; 3312 3313 if (kvm_slot_has_gmem(slot) && 3314 (kvm_gmem_memslot_supports_shared(slot) || > 3315 kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE)) { 3316 return kvm_gmem_max_mapping_level(slot, gfn, max_level); 3317 } 3318 3319 return min(max_level, host_pfn_mapping_level(kvm, gfn, slot)); 3320 } 3321
On 13.05.25 18:34, Fuad Tabba wrote: > From: Ackerley Tng <ackerleytng@google.com> > > This patch adds kvm_gmem_max_mapping_level(), which always returns > PG_LEVEL_4K since guest_memfd only supports 4K pages for now. > > When guest_memfd supports shared memory, max_mapping_level (especially > when recovering huge pages - see call to __kvm_mmu_max_mapping_level() > from recover_huge_pages_range()) should take input from > guest_memfd. > > Input from guest_memfd should be taken in these cases: > > + if the memslot supports shared memory (guest_memfd is used for > shared memory, or in future both shared and private memory) or > + if the memslot is only used for private memory and that gfn is > private. > > If the memslot doesn't use guest_memfd, figure out the > max_mapping_level using the host page tables like before. > > This patch also refactors and inlines the other call to > __kvm_mmu_max_mapping_level(). > > In kvm_mmu_hugepage_adjust(), guest_memfd's input is already > provided (if applicable) in fault->max_level. Hence, there is no need > to query guest_memfd. > > lpage_info is queried like before, and then if the fault is not from > guest_memfd, adjust fault->req_level based on input from host page > tables. > > Signed-off-by: Ackerley Tng <ackerleytng@google.com> > Signed-off-by: Fuad Tabba <tabba@google.com> > --- > arch/x86/kvm/mmu/mmu.c | 92 ++++++++++++++++++++++++++-------------- > include/linux/kvm_host.h | 7 +++ > virt/kvm/guest_memfd.c | 12 ++++++ > 3 files changed, 79 insertions(+), 32 deletions(-) > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > index cfbb471f7c70..9e0bc8114859 100644 > --- a/arch/x86/kvm/mmu/mmu.c > +++ b/arch/x86/kvm/mmu/mmu.c > @@ -3256,12 +3256,11 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, > return level; > } [...] > static u8 kvm_max_level_for_fault_and_order(struct kvm *kvm, > struct kvm_page_fault *fault, > int order) > @@ -4523,7 +4551,7 @@ static int __kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu, > { > unsigned int foll = fault->write ? FOLL_WRITE : 0; > > - if (fault->is_private || kvm_gmem_memslot_supports_shared(fault->slot)) > + if (fault_from_gmem(fault)) Should this change rather have been done in the previous patch? (then only adjust fault_from_gmem() in this function as required) > return kvm_mmu_faultin_pfn_gmem(vcpu, fault); > > foll |= FOLL_NOWAIT; > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index de7b46ee1762..f9bb025327c3 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -2560,6 +2560,7 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) > int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, > gfn_t gfn, kvm_pfn_t *pfn, struct page **page, > int *max_order); > +int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot, gfn_t gfn); > #else > static inline int kvm_gmem_get_pfn(struct kvm *kvm, > struct kvm_memory_slot *slot, gfn_t gfn, > @@ -2569,6 +2570,12 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, > KVM_BUG_ON(1, kvm); > return -EIO; > } > +static inline int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot, > + gfn_t gfn) Probably should indent with two tabs here.
Hi David, On Wed, 21 May 2025 at 09:01, David Hildenbrand <david@redhat.com> wrote: > > On 13.05.25 18:34, Fuad Tabba wrote: > > From: Ackerley Tng <ackerleytng@google.com> > > > > This patch adds kvm_gmem_max_mapping_level(), which always returns > > PG_LEVEL_4K since guest_memfd only supports 4K pages for now. > > > > When guest_memfd supports shared memory, max_mapping_level (especially > > when recovering huge pages - see call to __kvm_mmu_max_mapping_level() > > from recover_huge_pages_range()) should take input from > > guest_memfd. > > > > Input from guest_memfd should be taken in these cases: > > > > + if the memslot supports shared memory (guest_memfd is used for > > shared memory, or in future both shared and private memory) or > > + if the memslot is only used for private memory and that gfn is > > private. > > > > If the memslot doesn't use guest_memfd, figure out the > > max_mapping_level using the host page tables like before. > > > > This patch also refactors and inlines the other call to > > __kvm_mmu_max_mapping_level(). > > > > In kvm_mmu_hugepage_adjust(), guest_memfd's input is already > > provided (if applicable) in fault->max_level. Hence, there is no need > > to query guest_memfd. > > > > lpage_info is queried like before, and then if the fault is not from > > guest_memfd, adjust fault->req_level based on input from host page > > tables. > > > > Signed-off-by: Ackerley Tng <ackerleytng@google.com> > > Signed-off-by: Fuad Tabba <tabba@google.com> > > --- > > arch/x86/kvm/mmu/mmu.c | 92 ++++++++++++++++++++++++++-------------- > > include/linux/kvm_host.h | 7 +++ > > virt/kvm/guest_memfd.c | 12 ++++++ > > 3 files changed, 79 insertions(+), 32 deletions(-) > > > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > > index cfbb471f7c70..9e0bc8114859 100644 > > --- a/arch/x86/kvm/mmu/mmu.c > > +++ b/arch/x86/kvm/mmu/mmu.c > > @@ -3256,12 +3256,11 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, > > return level; > > } > [...] > > > static u8 kvm_max_level_for_fault_and_order(struct kvm *kvm, > > struct kvm_page_fault *fault, > > int order) > > @@ -4523,7 +4551,7 @@ static int __kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu, > > { > > unsigned int foll = fault->write ? FOLL_WRITE : 0; > > > > - if (fault->is_private || kvm_gmem_memslot_supports_shared(fault->slot)) > > + if (fault_from_gmem(fault)) > > Should this change rather have been done in the previous patch? > > (then only adjust fault_from_gmem() in this function as required) > > > return kvm_mmu_faultin_pfn_gmem(vcpu, fault); > > > > foll |= FOLL_NOWAIT; > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > > index de7b46ee1762..f9bb025327c3 100644 > > --- a/include/linux/kvm_host.h > > +++ b/include/linux/kvm_host.h > > @@ -2560,6 +2560,7 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) > > int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, > > gfn_t gfn, kvm_pfn_t *pfn, struct page **page, > > int *max_order); > > +int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot, gfn_t gfn); > > #else > > static inline int kvm_gmem_get_pfn(struct kvm *kvm, > > struct kvm_memory_slot *slot, gfn_t gfn, > > @@ -2569,6 +2570,12 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, > > KVM_BUG_ON(1, kvm); > > return -EIO; > > } > > +static inline int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot, > > + gfn_t gfn) > > Probably should indent with two tabs here. (I'm fixing the patch before respinning, hence it's me asking) Not sure I understand. Indentation here matches the same style as that for kvm_gmem_get_pfn() right above it in the alignment of the parameters, i.e., the parameter `gfn_t gfn` is aligned with the parameter `const struct kvm_memory_slot *slot` (four tabs and a space). Thanks, /fuad > > > -- > Cheers, > > David / dhildenb >
On 22.05.25 09:22, Fuad Tabba wrote: > Hi David, > > On Wed, 21 May 2025 at 09:01, David Hildenbrand <david@redhat.com> wrote: >> >> On 13.05.25 18:34, Fuad Tabba wrote: >>> From: Ackerley Tng <ackerleytng@google.com> >>> >>> This patch adds kvm_gmem_max_mapping_level(), which always returns >>> PG_LEVEL_4K since guest_memfd only supports 4K pages for now. >>> >>> When guest_memfd supports shared memory, max_mapping_level (especially >>> when recovering huge pages - see call to __kvm_mmu_max_mapping_level() >>> from recover_huge_pages_range()) should take input from >>> guest_memfd. >>> >>> Input from guest_memfd should be taken in these cases: >>> >>> + if the memslot supports shared memory (guest_memfd is used for >>> shared memory, or in future both shared and private memory) or >>> + if the memslot is only used for private memory and that gfn is >>> private. >>> >>> If the memslot doesn't use guest_memfd, figure out the >>> max_mapping_level using the host page tables like before. >>> >>> This patch also refactors and inlines the other call to >>> __kvm_mmu_max_mapping_level(). >>> >>> In kvm_mmu_hugepage_adjust(), guest_memfd's input is already >>> provided (if applicable) in fault->max_level. Hence, there is no need >>> to query guest_memfd. >>> >>> lpage_info is queried like before, and then if the fault is not from >>> guest_memfd, adjust fault->req_level based on input from host page >>> tables. >>> >>> Signed-off-by: Ackerley Tng <ackerleytng@google.com> >>> Signed-off-by: Fuad Tabba <tabba@google.com> >>> --- >>> arch/x86/kvm/mmu/mmu.c | 92 ++++++++++++++++++++++++++-------------- >>> include/linux/kvm_host.h | 7 +++ >>> virt/kvm/guest_memfd.c | 12 ++++++ >>> 3 files changed, 79 insertions(+), 32 deletions(-) >>> >>> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c >>> index cfbb471f7c70..9e0bc8114859 100644 >>> --- a/arch/x86/kvm/mmu/mmu.c >>> +++ b/arch/x86/kvm/mmu/mmu.c >>> @@ -3256,12 +3256,11 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, >>> return level; >>> } >> [...] >> >>> static u8 kvm_max_level_for_fault_and_order(struct kvm *kvm, >>> struct kvm_page_fault *fault, >>> int order) >>> @@ -4523,7 +4551,7 @@ static int __kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu, >>> { >>> unsigned int foll = fault->write ? FOLL_WRITE : 0; >>> >>> - if (fault->is_private || kvm_gmem_memslot_supports_shared(fault->slot)) >>> + if (fault_from_gmem(fault)) >> >> Should this change rather have been done in the previous patch? >> >> (then only adjust fault_from_gmem() in this function as required) >> >>> return kvm_mmu_faultin_pfn_gmem(vcpu, fault); >>> >>> foll |= FOLL_NOWAIT; >>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h >>> index de7b46ee1762..f9bb025327c3 100644 >>> --- a/include/linux/kvm_host.h >>> +++ b/include/linux/kvm_host.h >>> @@ -2560,6 +2560,7 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) >>> int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, >>> gfn_t gfn, kvm_pfn_t *pfn, struct page **page, >>> int *max_order); >>> +int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot, gfn_t gfn); >>> #else >>> static inline int kvm_gmem_get_pfn(struct kvm *kvm, >>> struct kvm_memory_slot *slot, gfn_t gfn, >>> @@ -2569,6 +2570,12 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, >>> KVM_BUG_ON(1, kvm); >>> return -EIO; >>> } >>> +static inline int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot, >>> + gfn_t gfn) >> >> Probably should indent with two tabs here. > > (I'm fixing the patch before respinning, hence it's me asking) > > Not sure I understand. Indentation here matches the same style as that > for kvm_gmem_get_pfn() right above it in the alignment of the > parameters, i.e., the parameter `gfn_t gfn` is aligned with the > parameter `const struct kvm_memory_slot *slot` (four tabs and a > space). Yeah, that way of indenting is rather bad practice. Especially for new code we're adding or when we touch existing code, we should just use two tabs. That way, we can fit more stuff into a single line, and when doing simple changes, such as renaming the function or changing the return type, we won't have to touch all the parameters. Maybe KVM has its own rules on that ... that's why I said "probably" :)
On Thu, 22 May 2025 at 09:56, David Hildenbrand <david@redhat.com> wrote: > > On 22.05.25 09:22, Fuad Tabba wrote: > > Hi David, > > > > On Wed, 21 May 2025 at 09:01, David Hildenbrand <david@redhat.com> wrote: > >> > >> On 13.05.25 18:34, Fuad Tabba wrote: > >>> From: Ackerley Tng <ackerleytng@google.com> > >>> > >>> This patch adds kvm_gmem_max_mapping_level(), which always returns > >>> PG_LEVEL_4K since guest_memfd only supports 4K pages for now. > >>> > >>> When guest_memfd supports shared memory, max_mapping_level (especially > >>> when recovering huge pages - see call to __kvm_mmu_max_mapping_level() > >>> from recover_huge_pages_range()) should take input from > >>> guest_memfd. > >>> > >>> Input from guest_memfd should be taken in these cases: > >>> > >>> + if the memslot supports shared memory (guest_memfd is used for > >>> shared memory, or in future both shared and private memory) or > >>> + if the memslot is only used for private memory and that gfn is > >>> private. > >>> > >>> If the memslot doesn't use guest_memfd, figure out the > >>> max_mapping_level using the host page tables like before. > >>> > >>> This patch also refactors and inlines the other call to > >>> __kvm_mmu_max_mapping_level(). > >>> > >>> In kvm_mmu_hugepage_adjust(), guest_memfd's input is already > >>> provided (if applicable) in fault->max_level. Hence, there is no need > >>> to query guest_memfd. > >>> > >>> lpage_info is queried like before, and then if the fault is not from > >>> guest_memfd, adjust fault->req_level based on input from host page > >>> tables. > >>> > >>> Signed-off-by: Ackerley Tng <ackerleytng@google.com> > >>> Signed-off-by: Fuad Tabba <tabba@google.com> > >>> --- > >>> arch/x86/kvm/mmu/mmu.c | 92 ++++++++++++++++++++++++++-------------- > >>> include/linux/kvm_host.h | 7 +++ > >>> virt/kvm/guest_memfd.c | 12 ++++++ > >>> 3 files changed, 79 insertions(+), 32 deletions(-) > >>> > >>> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > >>> index cfbb471f7c70..9e0bc8114859 100644 > >>> --- a/arch/x86/kvm/mmu/mmu.c > >>> +++ b/arch/x86/kvm/mmu/mmu.c > >>> @@ -3256,12 +3256,11 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, > >>> return level; > >>> } > >> [...] > >> > >>> static u8 kvm_max_level_for_fault_and_order(struct kvm *kvm, > >>> struct kvm_page_fault *fault, > >>> int order) > >>> @@ -4523,7 +4551,7 @@ static int __kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu, > >>> { > >>> unsigned int foll = fault->write ? FOLL_WRITE : 0; > >>> > >>> - if (fault->is_private || kvm_gmem_memslot_supports_shared(fault->slot)) > >>> + if (fault_from_gmem(fault)) > >> > >> Should this change rather have been done in the previous patch? > >> > >> (then only adjust fault_from_gmem() in this function as required) > >> > >>> return kvm_mmu_faultin_pfn_gmem(vcpu, fault); > >>> > >>> foll |= FOLL_NOWAIT; > >>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > >>> index de7b46ee1762..f9bb025327c3 100644 > >>> --- a/include/linux/kvm_host.h > >>> +++ b/include/linux/kvm_host.h > >>> @@ -2560,6 +2560,7 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) > >>> int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, > >>> gfn_t gfn, kvm_pfn_t *pfn, struct page **page, > >>> int *max_order); > >>> +int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot, gfn_t gfn); > >>> #else > >>> static inline int kvm_gmem_get_pfn(struct kvm *kvm, > >>> struct kvm_memory_slot *slot, gfn_t gfn, > >>> @@ -2569,6 +2570,12 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, > >>> KVM_BUG_ON(1, kvm); > >>> return -EIO; > >>> } > >>> +static inline int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot, > >>> + gfn_t gfn) > >> > >> Probably should indent with two tabs here. > > > > (I'm fixing the patch before respinning, hence it's me asking) > > > > Not sure I understand. Indentation here matches the same style as that > > for kvm_gmem_get_pfn() right above it in the alignment of the > > parameters, i.e., the parameter `gfn_t gfn` is aligned with the > > parameter `const struct kvm_memory_slot *slot` (four tabs and a > > space). > > Yeah, that way of indenting is rather bad practice. Especially for new > code we're adding or when we touch existing code, we should just use two > tabs. > > That way, we can fit more stuff into a single line, and when doing > simple changes, such as renaming the function or changing the return > type, we won't have to touch all the parameters. > > Maybe KVM has its own rules on that ... that's why I said "probably" :) :) I see, although I agree with you, I'd rather that indentation be consistent within the same file. Thanks, /fuad > -- > Cheers, > > David / dhildenb >
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index cfbb471f7c70..9e0bc8114859 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3256,12 +3256,11 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, return level; } -static int __kvm_mmu_max_mapping_level(struct kvm *kvm, - const struct kvm_memory_slot *slot, - gfn_t gfn, int max_level, bool is_private) +static int kvm_lpage_info_max_mapping_level(struct kvm *kvm, + const struct kvm_memory_slot *slot, + gfn_t gfn, int max_level) { struct kvm_lpage_info *linfo; - int host_level; max_level = min(max_level, max_huge_page_level); for ( ; max_level > PG_LEVEL_4K; max_level--) { @@ -3270,23 +3269,61 @@ static int __kvm_mmu_max_mapping_level(struct kvm *kvm, break; } - if (is_private) - return max_level; + return max_level; +} + +static inline u8 kvm_max_level_for_order(int order) +{ + BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G); + + KVM_MMU_WARN_ON(order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G) && + order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M) && + order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K)); + + if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G)) + return PG_LEVEL_1G; + + if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M)) + return PG_LEVEL_2M; + + return PG_LEVEL_4K; +} + +static inline int kvm_gmem_max_mapping_level(const struct kvm_memory_slot *slot, + gfn_t gfn, int max_level) +{ + int max_order; if (max_level == PG_LEVEL_4K) return PG_LEVEL_4K; - host_level = host_pfn_mapping_level(kvm, gfn, slot); - return min(host_level, max_level); + max_order = kvm_gmem_mapping_order(slot, gfn); + return min(max_level, kvm_max_level_for_order(max_order)); } int kvm_mmu_max_mapping_level(struct kvm *kvm, const struct kvm_memory_slot *slot, gfn_t gfn) { - bool is_private = kvm_slot_has_gmem(slot) && - kvm_mem_is_private(kvm, gfn); + int max_level; + + max_level = kvm_lpage_info_max_mapping_level(kvm, slot, gfn, PG_LEVEL_NUM); + if (max_level == PG_LEVEL_4K) + return PG_LEVEL_4K; - return __kvm_mmu_max_mapping_level(kvm, slot, gfn, PG_LEVEL_NUM, is_private); + if (kvm_slot_has_gmem(slot) && + (kvm_gmem_memslot_supports_shared(slot) || + kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE)) { + return kvm_gmem_max_mapping_level(slot, gfn, max_level); + } + + return min(max_level, host_pfn_mapping_level(kvm, gfn, slot)); +} + +static inline bool fault_from_gmem(struct kvm_page_fault *fault) +{ + return fault->is_private || + (kvm_slot_has_gmem(fault->slot) && + kvm_gmem_memslot_supports_shared(fault->slot)); } void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) @@ -3309,12 +3346,20 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault * Enforce the iTLB multihit workaround after capturing the requested * level, which will be used to do precise, accurate accounting. */ - fault->req_level = __kvm_mmu_max_mapping_level(vcpu->kvm, slot, - fault->gfn, fault->max_level, - fault->is_private); + fault->req_level = kvm_lpage_info_max_mapping_level(vcpu->kvm, slot, + fault->gfn, fault->max_level); if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed) return; + if (!fault_from_gmem(fault)) { + int host_level; + + host_level = host_pfn_mapping_level(vcpu->kvm, fault->gfn, slot); + fault->req_level = min(fault->req_level, host_level); + if (fault->req_level == PG_LEVEL_4K) + return; + } + /* * mmu_invalidate_retry() was successful and mmu_lock is held, so * the pmd can't be split from under us. @@ -4448,23 +4493,6 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) vcpu->stat.pf_fixed++; } -static inline u8 kvm_max_level_for_order(int order) -{ - BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G); - - KVM_MMU_WARN_ON(order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G) && - order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M) && - order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K)); - - if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G)) - return PG_LEVEL_1G; - - if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M)) - return PG_LEVEL_2M; - - return PG_LEVEL_4K; -} - static u8 kvm_max_level_for_fault_and_order(struct kvm *kvm, struct kvm_page_fault *fault, int order) @@ -4523,7 +4551,7 @@ static int __kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu, { unsigned int foll = fault->write ? FOLL_WRITE : 0; - if (fault->is_private || kvm_gmem_memslot_supports_shared(fault->slot)) + if (fault_from_gmem(fault)) return kvm_mmu_faultin_pfn_gmem(vcpu, fault); foll |= FOLL_NOWAIT; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index de7b46ee1762..f9bb025327c3 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2560,6 +2560,7 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, kvm_pfn_t *pfn, struct page **page, int *max_order); +int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot, gfn_t gfn); #else static inline int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, @@ -2569,6 +2570,12 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, KVM_BUG_ON(1, kvm); return -EIO; } +static inline int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot, + gfn_t gfn) +{ + BUG(); + return 0; +} #endif /* CONFIG_KVM_GMEM */ #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index fe0245335c96..b8e247063b20 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -774,6 +774,18 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, } EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn); +/** + * Returns the mapping order for this @gfn in @slot. + * + * This is equal to max_order that would be returned if kvm_gmem_get_pfn() were + * called now. + */ +int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot, gfn_t gfn) +{ + return 0; +} +EXPORT_SYMBOL_GPL(kvm_gmem_mapping_order); + #ifdef CONFIG_KVM_GENERIC_GMEM_POPULATE long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages, kvm_gmem_populate_cb post_populate, void *opaque)