@@ -37,3 +37,31 @@ bool gzvm_handle_guest_exception(struct gzvm_vcpu *vcpu)
else
return false;
}
+
+/**
+ * gzvm_handle_guest_hvc() - Handle guest hvc
+ * @vcpu: Pointer to struct gzvm_vcpu_run in userspace
+ * Return:
+ * * true - This hvc has been processed, no need to back to VMM.
+ * * false - This hvc has not been processed, require userspace.
+ */
+bool gzvm_handle_guest_hvc(struct gzvm_vcpu *vcpu)
+{
+ unsigned long ipa;
+ int ret;
+
+ switch (vcpu->run->hypercall.args[0]) {
+ case GZVM_HVC_MEM_RELINQUISH:
+ ipa = vcpu->run->hypercall.args[1];
+ ret = gzvm_handle_relinquish(vcpu, ipa);
+ break;
+ default:
+ ret = false;
+ break;
+ }
+
+ if (!ret)
+ return true;
+ else
+ return false;
+}
@@ -107,6 +107,78 @@ int gzvm_gfn_to_pfn_memslot(struct gzvm_memslot *memslot, u64 gfn,
return 0;
}
+static int cmp_ppages(struct rb_node *node, const struct rb_node *parent)
+{
+ struct gzvm_pinned_page *a = container_of(node,
+ struct gzvm_pinned_page,
+ node);
+ struct gzvm_pinned_page *b = container_of(parent,
+ struct gzvm_pinned_page,
+ node);
+
+ if (a->ipa < b->ipa)
+ return -1;
+ if (a->ipa > b->ipa)
+ return 1;
+ return 0;
+}
+
+static int rb_ppage_cmp(const void *key, const struct rb_node *node)
+{
+ struct gzvm_pinned_page *p = container_of(node,
+ struct gzvm_pinned_page,
+ node);
+ phys_addr_t ipa = (phys_addr_t)key;
+
+ return (ipa < p->ipa) ? -1 : (ipa > p->ipa);
+}
+
+static int gzvm_insert_ppage(struct gzvm *vm, struct gzvm_pinned_page *ppage)
+{
+ if (rb_find_add(&ppage->node, &vm->pinned_pages, cmp_ppages))
+ return -EEXIST;
+ return 0;
+}
+
+static int pin_one_page(unsigned long hva, struct page **page)
+{
+ struct mm_struct *mm = current->mm;
+ unsigned int flags = FOLL_HWPOISON | FOLL_LONGTERM | FOLL_WRITE;
+
+ mmap_read_lock(mm);
+ pin_user_pages(hva, 1, flags, page);
+ mmap_read_unlock(mm);
+
+ return 0;
+}
+
+/**
+ * gzvm_handle_relinquish() - Handle memory relinquish request from hypervisor
+ *
+ * @vcpu: Pointer to struct gzvm_vcpu_run in userspace
+ * @ipa: Start address(gpa) of a reclaimed page
+ *
+ * Return: Always return 0 because there are no cases of failure
+ */
+int gzvm_handle_relinquish(struct gzvm_vcpu *vcpu, phys_addr_t ipa)
+{
+ struct gzvm_pinned_page *ppage;
+ struct rb_node *node;
+ struct gzvm *vm = vcpu->gzvm;
+
+ node = rb_find((void *)ipa, &vm->pinned_pages, rb_ppage_cmp);
+
+ if (node)
+ rb_erase(node, &vm->pinned_pages);
+ else
+ return 0;
+
+ ppage = container_of(node, struct gzvm_pinned_page, node);
+ unpin_user_pages_dirty_lock(&ppage->page, 1, true);
+ kfree(ppage);
+ return 0;
+}
+
/**
* gzvm_handle_page_fault() - Handle guest page fault, find corresponding page
* for the faulting gpa
@@ -118,7 +190,10 @@ int gzvm_gfn_to_pfn_memslot(struct gzvm_memslot *memslot, u64 gfn,
*/
int gzvm_handle_page_fault(struct gzvm_vcpu *vcpu)
{
+ struct gzvm_pinned_page *ppage = NULL;
struct gzvm *vm = vcpu->gzvm;
+ struct page *page = NULL;
+ unsigned long hva;
u64 pfn, gfn;
int memslot_id;
int ret;
@@ -136,5 +211,19 @@ int gzvm_handle_page_fault(struct gzvm_vcpu *vcpu)
if (unlikely(ret))
return -EFAULT;
+ hva = gzvm_gfn_to_hva_memslot(&vm->memslot[memslot_id], gfn);
+ pin_one_page(hva, &page);
+
+ if (!page)
+ return -EFAULT;
+
+ ppage = kmalloc(sizeof(*ppage), GFP_KERNEL_ACCOUNT);
+ if (!ppage)
+ return -ENOMEM;
+
+ ppage->page = page;
+ ppage->ipa = vcpu->run->exception.fault_gpa;
+ gzvm_insert_ppage(vm, ppage);
+
return 0;
}
@@ -36,8 +36,7 @@ static long gzvm_vcpu_update_one_reg(struct gzvm_vcpu *vcpu,
return -EINVAL;
if (is_write) {
- if (vcpu->vcpuid > 0)
- return -EPERM;
+ /* GZ hypervisor would filter out invalid vcpu register access */
if (copy_from_user(&data, reg_addr, reg_size))
return -EFAULT;
} else {
@@ -119,7 +118,9 @@ static long gzvm_vcpu_run(struct gzvm_vcpu *vcpu, void * __user argp)
need_userspace = true;
break;
case GZVM_EXIT_HYPERCALL:
- fallthrough;
+ if (!gzvm_handle_guest_hvc(vcpu))
+ need_userspace = true;
+ break;
case GZVM_EXIT_DEBUG:
fallthrough;
case GZVM_EXIT_FAIL_ENTRY:
@@ -292,6 +292,21 @@ static long gzvm_vm_ioctl(struct file *filp, unsigned int ioctl,
return ret;
}
+static void gzvm_destroy_ppage(struct gzvm *gzvm)
+{
+ struct gzvm_pinned_page *ppage;
+ struct rb_node *node;
+
+ node = rb_first(&gzvm->pinned_pages);
+ while (node) {
+ ppage = rb_entry(node, struct gzvm_pinned_page, node);
+ unpin_user_pages_dirty_lock(&ppage->page, 1, true);
+ node = rb_next(node);
+ rb_erase(&ppage->node, &gzvm->pinned_pages);
+ kfree(ppage);
+ }
+}
+
static void gzvm_destroy_vm(struct gzvm *gzvm)
{
pr_debug("VM-%u is going to be destroyed\n", gzvm->vm_id);
@@ -308,6 +323,8 @@ static void gzvm_destroy_vm(struct gzvm *gzvm)
mutex_unlock(&gzvm->lock);
+ gzvm_destroy_ppage(gzvm);
+
kfree(gzvm);
}
@@ -343,6 +360,7 @@ static struct gzvm *gzvm_create_vm(unsigned long vm_type)
gzvm->vm_id = ret;
gzvm->mm = current->mm;
mutex_init(&gzvm->lock);
+ gzvm->pinned_pages = RB_ROOT;
ret = gzvm_vm_irqfd_init(gzvm);
if (ret) {
@@ -12,6 +12,8 @@
#include <linux/mutex.h>
#include <linux/gzvm.h>
#include <linux/srcu.h>
+#include <linux/rbtree.h>
+#include <linux/mm.h>
/*
* For the normal physical address, the highest 12 bits should be zero, so we
@@ -80,6 +82,12 @@ struct gzvm_vcpu {
struct gzvm_vcpu_hwstate *hwstate;
};
+struct gzvm_pinned_page {
+ struct rb_node node;
+ struct page *page;
+ u64 ipa;
+};
+
struct gzvm {
struct gzvm_vcpu *vcpus[GZVM_MAX_VCPUS];
/* userspace tied to this vm */
@@ -106,6 +114,9 @@ struct gzvm {
struct srcu_struct irq_srcu;
/* lock for irq injection */
struct mutex irq_lock;
+
+ /* Use rb-tree to record pin/unpin page */
+ struct rb_root pinned_pages;
};
long gzvm_dev_ioctl_check_extension(struct gzvm *gzvm, unsigned long args);
@@ -147,6 +158,8 @@ int gzvm_arch_inform_exit(u16 vm_id);
int gzvm_find_memslot(struct gzvm *vm, u64 gpa);
int gzvm_handle_page_fault(struct gzvm_vcpu *vcpu);
bool gzvm_handle_guest_exception(struct gzvm_vcpu *vcpu);
+bool gzvm_handle_guest_hvc(struct gzvm_vcpu *vcpu);
+int gzvm_handle_relinquish(struct gzvm_vcpu *vcpu, phys_addr_t ipa);
int gzvm_arch_create_device(u16 vm_id, struct gzvm_create_device *gzvm_dev);
int gzvm_arch_inject_irq(struct gzvm *gzvm, unsigned int vcpu_idx,
@@ -191,6 +191,11 @@ enum {
GZVM_EXCEPTION_PAGE_FAULT = 0x1,
};
+/* hypercall definitions of GZVM_EXIT_HYPERCALL */
+enum {
+ GZVM_HVC_MEM_RELINQUISH = 0xc6000009,
+};
+
/**
* struct gzvm_vcpu_run: Same purpose as kvm_run, this struct is
* shared between userspace, kernel and