Message ID | 20241129123929.64790-2-kalyazin@amazon.com |
---|---|
State | New |
Headers | show |
Series | [RFC,v2,1/2] KVM: guest_memfd: add generic population via write | expand |
On 11/29/24 06:39, Nikita Kalyazin wrote: > > +#if defined(CONFIG_KVM_GENERIC_PRIVATE_MEM) && !defined(CONFIG_KVM_AMD_SEV) Another option is to use the confidential computing (coco) attributes to keep the write operation limited to clear-text guests (diff against patch 1/2): There are a couple of benefits and shortcomings that I've listed below the diff. diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 9aba0ba25276..b7a0c7f2f82d 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/backing-dev.h> +#include <linux/cc_platform.h> #include <linux/falloc.h> #include <linux/kvm_host.h> #include <linux/pagemap.h> @@ -274,7 +275,14 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index) return filemap_grab_folio(inode->i_mapping, index); } -#if defined(CONFIG_KVM_GENERIC_PRIVATE_MEM) && !defined(CONFIG_KVM_AMD_SEV) +static bool kvm_has_cc(void) +{ + if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) + return true; + return false; +} + +#if defined(CONFIG_KVM_GENERIC_PRIVATE_MEM) static ssize_t kvm_kmem_gmem_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) { @@ -290,6 +298,9 @@ static ssize_t kvm_kmem_gmem_write(struct file *file, const char __user *buf, if (!buf) return -EINVAL; + if (kvm_has_cc()) + return -EIO; + start = *offset >> PAGE_SHIFT; end = (*offset + count) >> PAGE_SHIFT; @@ -564,7 +575,7 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn) } static struct file_operations kvm_gmem_fops = { -#if defined(CONFIG_KVM_GENERIC_PRIVATE_MEM) && !defined(CONFIG_KVM_AMD_SEV) +#if defined(CONFIG_KVM_GENERIC_PRIVATE_MEM) .llseek = default_llseek, .write = kvm_kmem_gmem_write, #endif Advantages: * works with multiple architectures (powerpc and x86 so far) * enumerates specific types of coco attributes Disadvantages: * The platform can have an encryption attribute but still be running a guest in clear text * Some guests could be encrypted while others are clear text To remedy the disadvantage, the write function would need to check if guest encryption is currently active for a specific guest. Mike > +static ssize_t kvm_kmem_gmem_write(struct file *file, const char __user *buf, > + size_t count, loff_t *offset) > +{ > + pgoff_t start, end, index; > + ssize_t ret = 0;
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 47a9f68f7b24..e80566ef56e9 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -102,6 +102,80 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index) return filemap_grab_folio(inode->i_mapping, index); } +#if defined(CONFIG_KVM_GENERIC_PRIVATE_MEM) && !defined(CONFIG_KVM_AMD_SEV) +static ssize_t kvm_kmem_gmem_write(struct file *file, const char __user *buf, + size_t count, loff_t *offset) +{ + pgoff_t start, end, index; + ssize_t ret = 0; + + if (!PAGE_ALIGNED(*offset) || !PAGE_ALIGNED(count)) + return -EINVAL; + + if (*offset + count > i_size_read(file_inode(file))) + return -EINVAL; + + if (!buf) + return -EINVAL; + + start = *offset >> PAGE_SHIFT; + end = (*offset + count) >> PAGE_SHIFT; + + filemap_invalidate_lock(file->f_mapping); + + for (index = start; index < end; ) { + struct folio *folio; + void *vaddr; + pgoff_t buf_offset = (index - start) << PAGE_SHIFT; + + if (signal_pending(current)) { + ret = -EINTR; + goto out; + } + + folio = kvm_gmem_get_folio(file_inode(file), index); + if (IS_ERR(folio)) { + ret = -EFAULT; + goto out; + } + + if (folio_test_hwpoison(folio)) { + folio_unlock(folio); + folio_put(folio); + ret = -EFAULT; + goto out; + } + + if (folio_test_uptodate(folio)) { + folio_unlock(folio); + folio_put(folio); + ret = -ENOSPC; + goto out; + } + + folio_unlock(folio); + + vaddr = kmap_local_folio(folio, 0); + ret = copy_from_user(vaddr, buf + buf_offset, PAGE_SIZE); + if (ret) + ret = -EINVAL; + kunmap_local(vaddr); + + kvm_gmem_mark_prepared(folio); + folio_put(folio); + + index = folio_next_index(folio); + *offset += PAGE_SIZE; + } + +out: + filemap_invalidate_unlock(file->f_mapping); + + return ret && start == (*offset >> PAGE_SHIFT) ? + ret : *offset - (start << PAGE_SHIFT); +} +#endif + static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start, pgoff_t end) { @@ -308,6 +382,10 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn) } static struct file_operations kvm_gmem_fops = { +#if defined(CONFIG_KVM_GENERIC_PRIVATE_MEM) && !defined(CONFIG_KVM_AMD_SEV) + .llseek = default_llseek, + .write = kvm_kmem_gmem_write, +#endif .open = generic_file_open, .release = kvm_gmem_release, .fallocate = kvm_gmem_fallocate, @@ -423,6 +501,7 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) } file->f_flags |= O_LARGEFILE; + file->f_mode |= FMODE_LSEEK | FMODE_PWRITE; inode = file->f_inode; WARN_ON(file->f_mapping != inode->i_mapping);
write syscall populates guest_memfd with user-supplied data in a generic way, ie no vendor-specific preparation is performed. This is supposed to be used in non-CoCo setups where guest memory is not hardware-encrypted. The following behaviour is implemented: - only page-aligned count and offset are allowed - if the memory is already allocated, the call will successfully populate it - if the memory is not allocated, the call will both allocate and populate - if the memory is already populated, the call will not repopulate it Signed-off-by: Nikita Kalyazin <kalyazin@amazon.com> --- virt/kvm/guest_memfd.c | 79 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+)