diff mbox series

[v9,13/17] KVM: arm64: Handle guest_memfd()-backed guest page faults

Message ID 20250513163438.3942405-14-tabba@google.com
State New
Headers show
Series KVM: Mapping guest_memfd backed memory at the host for software protected VMs | expand

Commit Message

Fuad Tabba May 13, 2025, 4:34 p.m. UTC
Add arm64 support for handling guest page faults on guest_memfd
backed memslots.

For now, the fault granule is restricted to PAGE_SIZE.

Signed-off-by: Fuad Tabba <tabba@google.com>
---
 arch/arm64/kvm/mmu.c     | 94 +++++++++++++++++++++++++---------------
 include/linux/kvm_host.h |  5 +++
 virt/kvm/kvm_main.c      |  5 ---
 3 files changed, 64 insertions(+), 40 deletions(-)

Comments

David Hildenbrand May 21, 2025, 8:04 a.m. UTC | #1
On 13.05.25 18:34, Fuad Tabba wrote:
> Add arm64 support for handling guest page faults on guest_memfd
> backed memslots.
> 
> For now, the fault granule is restricted to PAGE_SIZE.
> 
> Signed-off-by: Fuad Tabba <tabba@google.com>
> ---

[...]

> +	if (!is_gmem) {

Should we add a comment somewhere, stating that we don't support VMs 
with private memory, so if we have a gmem, all faults are routed through 
that?

> +		mmap_read_lock(current->mm);
> +		vma = vma_lookup(current->mm, hva);
> +		if (unlikely(!vma)) {
> +			kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
> +			mmap_read_unlock(current->mm);
> +			return -EFAULT;
> +		}
> +
> +		vfio_allow_any_uc = vma->vm_flags & VM_ALLOW_ANY_UNCACHED;
> +		mte_allowed = kvm_vma_mte_allowed(vma);
Fuad Tabba May 21, 2025, 11:10 a.m. UTC | #2
Hi David,

On Wed, 21 May 2025 at 09:04, David Hildenbrand <david@redhat.com> wrote:
>
> On 13.05.25 18:34, Fuad Tabba wrote:
> > Add arm64 support for handling guest page faults on guest_memfd
> > backed memslots.
> >
> > For now, the fault granule is restricted to PAGE_SIZE.
> >
> > Signed-off-by: Fuad Tabba <tabba@google.com>
> > ---
>
> [...]
>
> > +     if (!is_gmem) {
>
> Should we add a comment somewhere, stating that we don't support VMs
> with private memory, so if we have a gmem, all faults are routed through
> that?

I guess this is related to the other thread we had. This would handle
private memory correctly. It's just that for arm64 as it is, having
private memory isn't that useful.

There might be a use-case where a user would create a
guest_memfd-backed slot that supports private memory, and one that
doesn't, which only the guest would use. I doubt that that's actually
useful, but it would work and behave as expected.

Cheers,
/fuad

> > +             mmap_read_lock(current->mm);
> > +             vma = vma_lookup(current->mm, hva);
> > +             if (unlikely(!vma)) {
> > +                     kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
> > +                     mmap_read_unlock(current->mm);
> > +                     return -EFAULT;
> > +             }
> > +
> > +             vfio_allow_any_uc = vma->vm_flags & VM_ALLOW_ANY_UNCACHED;
> > +             mte_allowed = kvm_vma_mte_allowed(vma);
>
> --
> Cheers,
>
> David / dhildenb
>
diff mbox series

Patch

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index d756c2b5913f..9a48ef08491d 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1466,6 +1466,30 @@  static bool kvm_vma_mte_allowed(struct vm_area_struct *vma)
 	return vma->vm_flags & VM_MTE_ALLOWED;
 }
 
+static kvm_pfn_t faultin_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
+			     gfn_t gfn, bool write_fault, bool *writable,
+			     struct page **page, bool is_gmem)
+{
+	kvm_pfn_t pfn;
+	int ret;
+
+	if (!is_gmem)
+		return __kvm_faultin_pfn(slot, gfn, write_fault ? FOLL_WRITE : 0, writable, page);
+
+	*writable = false;
+
+	ret = kvm_gmem_get_pfn(kvm, slot, gfn, &pfn, page, NULL);
+	if (!ret) {
+		*writable = !memslot_is_readonly(slot);
+		return pfn;
+	}
+
+	if (ret == -EHWPOISON)
+		return KVM_PFN_ERR_HWPOISON;
+
+	return KVM_PFN_ERR_NOSLOT_MASK;
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			  struct kvm_s2_trans *nested,
 			  struct kvm_memory_slot *memslot, unsigned long hva,
@@ -1473,19 +1497,20 @@  static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 {
 	int ret = 0;
 	bool write_fault, writable;
-	bool exec_fault, mte_allowed;
+	bool exec_fault, mte_allowed = false;
 	bool device = false, vfio_allow_any_uc = false;
 	unsigned long mmu_seq;
 	phys_addr_t ipa = fault_ipa;
 	struct kvm *kvm = vcpu->kvm;
-	struct vm_area_struct *vma;
-	short page_shift;
+	struct vm_area_struct *vma = NULL;
+	short page_shift = PAGE_SHIFT;
 	void *memcache;
-	gfn_t gfn;
+	gfn_t gfn = ipa >> PAGE_SHIFT;
 	kvm_pfn_t pfn;
 	bool logging_active = memslot_is_logging(memslot);
-	bool force_pte = logging_active || is_protected_kvm_enabled();
-	long page_size, fault_granule;
+	bool is_gmem = kvm_slot_has_gmem(memslot);
+	bool force_pte = logging_active || is_gmem || is_protected_kvm_enabled();
+	long page_size, fault_granule = PAGE_SIZE;
 	enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
 	struct kvm_pgtable *pgt;
 	struct page *page;
@@ -1529,17 +1554,20 @@  static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	 * Let's check if we will get back a huge page backed by hugetlbfs, or
 	 * get block mapping for device MMIO region.
 	 */
-	mmap_read_lock(current->mm);
-	vma = vma_lookup(current->mm, hva);
-	if (unlikely(!vma)) {
-		kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
-		mmap_read_unlock(current->mm);
-		return -EFAULT;
+	if (!is_gmem) {
+		mmap_read_lock(current->mm);
+		vma = vma_lookup(current->mm, hva);
+		if (unlikely(!vma)) {
+			kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
+			mmap_read_unlock(current->mm);
+			return -EFAULT;
+		}
+
+		vfio_allow_any_uc = vma->vm_flags & VM_ALLOW_ANY_UNCACHED;
+		mte_allowed = kvm_vma_mte_allowed(vma);
 	}
 
-	if (force_pte)
-		page_shift = PAGE_SHIFT;
-	else
+	if (!force_pte)
 		page_shift = get_vma_page_shift(vma, hva);
 
 	switch (page_shift) {
@@ -1605,27 +1633,23 @@  static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		ipa &= ~(page_size - 1);
 	}
 
-	gfn = ipa >> PAGE_SHIFT;
-	mte_allowed = kvm_vma_mte_allowed(vma);
-
-	vfio_allow_any_uc = vma->vm_flags & VM_ALLOW_ANY_UNCACHED;
-
-	/* Don't use the VMA after the unlock -- it may have vanished */
-	vma = NULL;
+	if (!is_gmem) {
+		/* Don't use the VMA after the unlock -- it may have vanished */
+		vma = NULL;
 
-	/*
-	 * Read mmu_invalidate_seq so that KVM can detect if the results of
-	 * vma_lookup() or __kvm_faultin_pfn() become stale prior to
-	 * acquiring kvm->mmu_lock.
-	 *
-	 * Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs
-	 * with the smp_wmb() in kvm_mmu_invalidate_end().
-	 */
-	mmu_seq = vcpu->kvm->mmu_invalidate_seq;
-	mmap_read_unlock(current->mm);
+		/*
+		 * Read mmu_invalidate_seq so that KVM can detect if the results
+		 * of vma_lookup() or faultin_pfn() become stale prior to
+		 * acquiring kvm->mmu_lock.
+		 *
+		 * Rely on mmap_read_unlock() for an implicit smp_rmb(), which
+		 * pairs with the smp_wmb() in kvm_mmu_invalidate_end().
+		 */
+		mmu_seq = vcpu->kvm->mmu_invalidate_seq;
+		mmap_read_unlock(current->mm);
+	}
 
-	pfn = __kvm_faultin_pfn(memslot, gfn, write_fault ? FOLL_WRITE : 0,
-				&writable, &page);
+	pfn = faultin_pfn(kvm, memslot, gfn, write_fault, &writable, &page, is_gmem);
 	if (pfn == KVM_PFN_ERR_HWPOISON) {
 		kvm_send_hwpoison_signal(hva, page_shift);
 		return 0;
@@ -1677,7 +1701,7 @@  static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 
 	kvm_fault_lock(kvm);
 	pgt = vcpu->arch.hw_mmu->pgt;
-	if (mmu_invalidate_retry(kvm, mmu_seq)) {
+	if (!is_gmem && mmu_invalidate_retry(kvm, mmu_seq)) {
 		ret = -EAGAIN;
 		goto out_unlock;
 	}
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f9bb025327c3..b317392453a5 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1884,6 +1884,11 @@  static inline int memslot_id(struct kvm *kvm, gfn_t gfn)
 	return gfn_to_memslot(kvm, gfn)->id;
 }
 
+static inline bool memslot_is_readonly(const struct kvm_memory_slot *slot)
+{
+	return slot->flags & KVM_MEM_READONLY;
+}
+
 static inline gfn_t
 hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot)
 {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 6289ea1685dd..6261d8638cd2 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2640,11 +2640,6 @@  unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn)
 	return size;
 }
 
-static bool memslot_is_readonly(const struct kvm_memory_slot *slot)
-{
-	return slot->flags & KVM_MEM_READONLY;
-}
-
 static unsigned long __gfn_to_hva_many(const struct kvm_memory_slot *slot, gfn_t gfn,
 				       gfn_t *nr_pages, bool write)
 {