@@ -102,6 +102,8 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_prev_roots(struct kvm_vcpu *vcpu);
void kvm_mmu_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
int bytes);
+int kvm_mmu_move_mirror_pages_from(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu *src_vcpu);
static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
{
@@ -3943,6 +3943,72 @@ static int mmu_first_shadow_root_alloc(struct kvm *kvm)
return r;
}
+int kvm_mmu_move_mirror_pages_from(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu *src_vcpu)
+{
+ struct kvm_mmu *mmu = vcpu->arch.mmu;
+ struct kvm_mmu *src_mmu = src_vcpu->arch.mmu;
+ gfn_t gfn_shared = kvm_gfn_direct_bits(vcpu->kvm);
+ hpa_t mirror_root_hpa;
+ int r = -EINVAL;
+
+ if (!gfn_shared)
+ return r;
+
+ r = mmu_topup_memory_caches(vcpu, !vcpu->arch.mmu->root_role.direct);
+ if (r)
+ return r;
+
+ /* Hold locks for both src and dst. Always take the src lock first. */
+ read_lock(&src_vcpu->kvm->mmu_lock);
+ write_lock_nested(&vcpu->kvm->mmu_lock, SINGLE_DEPTH_NESTING);
+
+ WARN_ON_ONCE(!is_tdp_mmu_active(vcpu));
+ WARN_ON_ONCE(!is_tdp_mmu_active(src_vcpu));
+
+ /*
+ * The mirror root is moved from the src to the dst and is marked as
+ * invalid in the src.
+ */
+ mirror_root_hpa = kvm_tdp_mmu_move_mirror_pages_from(vcpu, src_vcpu);
+ if (mirror_root_hpa == INVALID_PAGE) {
+ struct kvm_mmu_page *mirror_root;
+ union kvm_mmu_page_role role = vcpu->arch.mmu->root_role;
+
+ /*
+ * This likely means that the mirror root was already moved by
+ * another vCPU.
+ */
+ role.is_mirror = true;
+ mirror_root = kvm_tdp_mmu_get_vcpu_root(vcpu, role);
+ if (!mirror_root) {
+ r = -EINVAL;
+ goto out_unlock;
+ }
+ mirror_root_hpa = __pa(mirror_root->spt);
+ }
+
+ mmu->mirror_root_hpa = mirror_root_hpa;
+ mmu_free_root_page(src_vcpu->kvm, &src_mmu->mirror_root_hpa, NULL);
+ write_unlock(&vcpu->kvm->mmu_lock);
+ read_unlock(&src_vcpu->kvm->mmu_lock);
+
+ /* The direct root is allocated normally and is not moved from src. */
+ kvm_tdp_mmu_alloc_root(vcpu, false);
+
+ kvm_mmu_load_pgd(vcpu);
+ kvm_x86_call(flush_tlb_current)(vcpu);
+
+ return r;
+
+out_unlock:
+ write_unlock(&vcpu->kvm->mmu_lock);
+ read_unlock(&src_vcpu->kvm->mmu_lock);
+
+ return r;
+}
+EXPORT_SYMBOL(kvm_mmu_move_mirror_pages_from);
+
static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
{
struct kvm_mmu *mmu = vcpu->arch.mmu;
@@ -251,6 +251,22 @@ static void tdp_mmu_init_child_sp(struct kvm_mmu_page *child_sp,
tdp_mmu_init_sp(child_sp, iter->sptep, iter->gfn, role);
}
+struct kvm_mmu_page *
+kvm_tdp_mmu_get_vcpu_root(struct kvm_vcpu *vcpu,
+ union kvm_mmu_page_role role)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_mmu_page *root;
+
+ lockdep_assert_held(&kvm->mmu_lock);
+ list_for_each_entry(root, &kvm->arch.tdp_mmu_roots, link) {
+ if (root->role.word == role.word &&
+ !WARN_ON_ONCE(!kvm_tdp_mmu_get_root(root)))
+ return root;
+ }
+ return NULL;
+}
+
void kvm_tdp_mmu_alloc_root(struct kvm_vcpu *vcpu, bool mirror)
{
struct kvm_mmu *mmu = vcpu->arch.mmu;
@@ -285,11 +301,9 @@ void kvm_tdp_mmu_alloc_root(struct kvm_vcpu *vcpu, bool mirror)
* fails, as the last reference to a root can only be put *after* the
* root has been invalidated, which requires holding mmu_lock for write.
*/
- list_for_each_entry(root, &kvm->arch.tdp_mmu_roots, link) {
- if (root->role.word == role.word &&
- !WARN_ON_ONCE(!kvm_tdp_mmu_get_root(root)))
- goto out_spin_unlock;
- }
+ root = kvm_tdp_mmu_get_vcpu_root(vcpu, role);
+ if (!!root)
+ goto out_spin_unlock;
root = tdp_mmu_alloc_sp(vcpu);
tdp_mmu_init_sp(root, NULL, 0, role);
@@ -321,6 +335,43 @@ void kvm_tdp_mmu_alloc_root(struct kvm_vcpu *vcpu, bool mirror)
}
}
+hpa_t kvm_tdp_mmu_move_mirror_pages_from(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu *src_vcpu)
+{
+ union kvm_mmu_page_role role = vcpu->arch.mmu->root_role;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm *src_kvm = src_vcpu->kvm;
+ struct kvm_mmu_page *mirror_root = NULL;
+ s64 num_mirror_pages, old;
+
+ lockdep_assert_held_read(&src_vcpu->kvm->mmu_lock);
+ lockdep_assert_held_write(&vcpu->kvm->mmu_lock);
+
+ /* Find the mirror root of the source. */
+ role.is_mirror = true;
+ mirror_root = kvm_tdp_mmu_get_vcpu_root(src_vcpu, role);
+ if (!mirror_root)
+ return INVALID_PAGE;
+
+ /* Remove the mirror root from the src kvm and add it to dst kvm. */
+ spin_lock(&src_vcpu->kvm->arch.tdp_mmu_pages_lock);
+ list_del_rcu(&mirror_root->link);
+ spin_unlock(&src_vcpu->kvm->arch.tdp_mmu_pages_lock);
+
+ /* The destination holds a write lock so no spin_lock required. */
+ list_add_rcu(&mirror_root->link, &kvm->arch.tdp_mmu_roots);
+
+#ifdef CONFIG_KVM_PROVE_MMU
+ num_mirror_pages = atomic64_read(&src_kvm->arch.tdp_mirror_mmu_pages);
+ old = atomic64_cmpxchg(&kvm->arch.tdp_mirror_mmu_pages, 0,
+ num_mirror_pages);
+ /* The destination VM should have no mirror pages at this point. */
+ WARN_ON(old);
+ atomic64_set(&src_kvm->arch.tdp_mirror_mmu_pages, 0);
+#endif
+ return __pa(mirror_root->spt);
+}
+
static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
u64 old_spte, u64 new_spte, int level,
bool shared);
@@ -63,6 +63,12 @@ static inline struct kvm_mmu_page *tdp_mmu_get_root(struct kvm_vcpu *vcpu,
return root_to_sp(vcpu->arch.mmu->root.hpa);
}
+struct kvm_mmu_page *
+kvm_tdp_mmu_get_vcpu_root(struct kvm_vcpu *vcpu,
+ union kvm_mmu_page_role role);
+hpa_t kvm_tdp_mmu_move_mirror_pages_from(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu *src_vcpu);
+
bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush);
bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp);
void kvm_tdp_mmu_zap_all(struct kvm *kvm);