linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH v2 0/2] KVM: guest_memfd: use write for population
@ 2024-11-29 12:39 Nikita Kalyazin
  2024-11-29 12:39 ` [RFC PATCH v2 1/2] KVM: guest_memfd: add generic population via write Nikita Kalyazin
  2024-11-29 12:39 ` [RFC PATCH v2 2/2] KVM: selftests: update guest_memfd write tests Nikita Kalyazin
  0 siblings, 2 replies; 4+ messages in thread
From: Nikita Kalyazin @ 2024-11-29 12:39 UTC (permalink / raw)
  To: pbonzini, shuah, kvm, linux-kselftest, linux-kernel
  Cc: linux-mm, michael.day, david, quic_eberman, jthoughton,
	brijesh.singh, michael.roth, graf, jgowans, roypat, derekmn,
	nsaenz, xmarcalx, kalyazin

As discussed in the v1 [1], with guest_memfd moving from KVM to mm, it
is more practical to have a non-KVM-specific API to populate guest
memory in a generic way.  The series proposes using the write syscall
for this purpose instead of a KVM ioctl as in the v1.  The approach also
has an advantage that the guest_memfd handle can be sent to another
process that would be responsible for population.  I also included a
suggestion from Mike Day for excluding the code from compilation if AMD
SEV is configured.

There is a potential for refactoring of the kvm_gmem_populate to extract
common parts with the write.  I did not do that in this series yet to
keep it clear what the write would do and get feedback on whether
write's behaviour is sensible.

Nikita

[1]: https://lore.kernel.org/kvm/20241024095429.54052-1-kalyazin@amazon.com/T/

Nikita Kalyazin (2):
  KVM: guest_memfd: add generic population via write
  KVM: selftests: update guest_memfd write tests

 .../testing/selftests/kvm/guest_memfd_test.c  | 85 +++++++++++++++++--
 virt/kvm/guest_memfd.c                        | 79 +++++++++++++++++
 2 files changed, 158 insertions(+), 6 deletions(-)


base-commit: 1508bae37044ebffd7c7e09915f041936f338123
-- 
2.40.1



^ permalink raw reply	[flat|nested] 4+ messages in thread

* [RFC PATCH v2 1/2] KVM: guest_memfd: add generic population via write
  2024-11-29 12:39 [RFC PATCH v2 0/2] KVM: guest_memfd: use write for population Nikita Kalyazin
@ 2024-11-29 12:39 ` Nikita Kalyazin
  2024-12-03 14:55   ` Mike Day
  2024-11-29 12:39 ` [RFC PATCH v2 2/2] KVM: selftests: update guest_memfd write tests Nikita Kalyazin
  1 sibling, 1 reply; 4+ messages in thread
From: Nikita Kalyazin @ 2024-11-29 12:39 UTC (permalink / raw)
  To: pbonzini, shuah, kvm, linux-kselftest, linux-kernel
  Cc: linux-mm, michael.day, david, quic_eberman, jthoughton,
	brijesh.singh, michael.roth, graf, jgowans, roypat, derekmn,
	nsaenz, xmarcalx, kalyazin

write syscall populates guest_memfd with user-supplied data in a generic
way, ie no vendor-specific preparation is performed.  This is supposed
to be used in non-CoCo setups where guest memory is not
hardware-encrypted.

The following behaviour is implemented:
 - only page-aligned count and offset are allowed
 - if the memory is already allocated, the call will successfully
   populate it
 - if the memory is not allocated, the call will both allocate and
   populate
 - if the memory is already populated, the call will not repopulate it

Signed-off-by: Nikita Kalyazin <kalyazin@amazon.com>
---
 virt/kvm/guest_memfd.c | 79 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 79 insertions(+)

diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 47a9f68f7b24..e80566ef56e9 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -102,6 +102,80 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
 	return filemap_grab_folio(inode->i_mapping, index);
 }
 
+#if defined(CONFIG_KVM_GENERIC_PRIVATE_MEM) && !defined(CONFIG_KVM_AMD_SEV)
+static ssize_t kvm_kmem_gmem_write(struct file *file, const char __user *buf,
+				   size_t count, loff_t *offset)
+{
+	pgoff_t start, end, index;
+	ssize_t ret = 0;
+
+	if (!PAGE_ALIGNED(*offset) || !PAGE_ALIGNED(count))
+		return -EINVAL;
+
+	if (*offset + count > i_size_read(file_inode(file)))
+		return -EINVAL;
+
+	if (!buf)
+		return -EINVAL;
+
+	start = *offset >> PAGE_SHIFT;
+        end = (*offset + count) >> PAGE_SHIFT;
+
+	filemap_invalidate_lock(file->f_mapping);
+
+	for (index = start; index < end; ) {
+		struct folio *folio;
+		void *vaddr;
+		pgoff_t buf_offset = (index - start) << PAGE_SHIFT;
+
+		if (signal_pending(current)) {
+			ret = -EINTR;
+			goto out;
+		}
+
+		folio = kvm_gmem_get_folio(file_inode(file), index);
+		if (IS_ERR(folio)) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		if (folio_test_hwpoison(folio)) {
+			folio_unlock(folio);
+			folio_put(folio);
+			ret = -EFAULT;
+			goto out;
+		}
+
+		if (folio_test_uptodate(folio)) {
+			folio_unlock(folio);
+			folio_put(folio);
+			ret = -ENOSPC;
+			goto out;
+		}
+
+		folio_unlock(folio);
+
+		vaddr = kmap_local_folio(folio, 0);
+		ret = copy_from_user(vaddr, buf + buf_offset, PAGE_SIZE);
+		if (ret)
+			ret = -EINVAL;
+		kunmap_local(vaddr);
+
+		kvm_gmem_mark_prepared(folio);
+		folio_put(folio);
+
+		index = folio_next_index(folio);
+		*offset += PAGE_SIZE;
+	}
+
+out:
+	filemap_invalidate_unlock(file->f_mapping);
+
+	return ret && start == (*offset >> PAGE_SHIFT) ?
+		ret : *offset - (start << PAGE_SHIFT);
+}
+#endif
+
 static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
 				      pgoff_t end)
 {
@@ -308,6 +382,10 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
 }
 
 static struct file_operations kvm_gmem_fops = {
+#if defined(CONFIG_KVM_GENERIC_PRIVATE_MEM) && !defined(CONFIG_KVM_AMD_SEV)
+	.llseek         = default_llseek,
+	.write          = kvm_kmem_gmem_write,
+#endif
 	.open		= generic_file_open,
 	.release	= kvm_gmem_release,
 	.fallocate	= kvm_gmem_fallocate,
@@ -423,6 +501,7 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
 	}
 
 	file->f_flags |= O_LARGEFILE;
+	file->f_mode |= FMODE_LSEEK | FMODE_PWRITE;
 
 	inode = file->f_inode;
 	WARN_ON(file->f_mapping != inode->i_mapping);
-- 
2.40.1



^ permalink raw reply	[flat|nested] 4+ messages in thread

* [RFC PATCH v2 2/2] KVM: selftests: update guest_memfd write tests
  2024-11-29 12:39 [RFC PATCH v2 0/2] KVM: guest_memfd: use write for population Nikita Kalyazin
  2024-11-29 12:39 ` [RFC PATCH v2 1/2] KVM: guest_memfd: add generic population via write Nikita Kalyazin
@ 2024-11-29 12:39 ` Nikita Kalyazin
  1 sibling, 0 replies; 4+ messages in thread
From: Nikita Kalyazin @ 2024-11-29 12:39 UTC (permalink / raw)
  To: pbonzini, shuah, kvm, linux-kselftest, linux-kernel
  Cc: linux-mm, michael.day, david, quic_eberman, jthoughton,
	brijesh.singh, michael.roth, graf, jgowans, roypat, derekmn,
	nsaenz, xmarcalx, kalyazin

This is to reflect that the write syscall is now implemented for
guest_memfd.

Signed-off-by: Nikita Kalyazin <kalyazin@amazon.com>
---
 .../testing/selftests/kvm/guest_memfd_test.c  | 85 +++++++++++++++++--
 1 file changed, 79 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c
index ce687f8d248f..e10d0f51da93 100644
--- a/tools/testing/selftests/kvm/guest_memfd_test.c
+++ b/tools/testing/selftests/kvm/guest_memfd_test.c
@@ -20,18 +20,90 @@
 #include "kvm_util.h"
 #include "test_util.h"
 
-static void test_file_read_write(int fd)
+static void test_file_read(int fd)
 {
 	char buf[64];
 
 	TEST_ASSERT(read(fd, buf, sizeof(buf)) < 0,
 		    "read on a guest_mem fd should fail");
-	TEST_ASSERT(write(fd, buf, sizeof(buf)) < 0,
-		    "write on a guest_mem fd should fail");
 	TEST_ASSERT(pread(fd, buf, sizeof(buf), 0) < 0,
 		    "pread on a guest_mem fd should fail");
-	TEST_ASSERT(pwrite(fd, buf, sizeof(buf), 0) < 0,
-		    "pwrite on a guest_mem fd should fail");
+}
+
+static void test_file_write(int fd, size_t total_size)
+{
+	size_t page_size = getpagesize();
+	void *buf = NULL;
+	int ret;
+
+	ret = posix_memalign(&buf, page_size, total_size);
+	TEST_ASSERT_EQ(ret, 0);
+
+	/* Check arguments correctness checks work as expected */
+
+	ret = pwrite(fd, buf, page_size - 1, 0);
+	TEST_ASSERT(ret == -1, "write unaligned count on a guest_mem fd should fail");
+	TEST_ASSERT_EQ(errno, EINVAL);
+
+	ret = pwrite(fd, buf, page_size, 1);
+	TEST_ASSERT(ret == -1, "write unaligned offset on a guest_mem fd should fail");
+	TEST_ASSERT_EQ(errno, EINVAL);
+
+	ret = pwrite(fd, buf, page_size, total_size);
+	TEST_ASSERT(ret == -1, "writing past the file size on a guest_mem fd should fail");
+	TEST_ASSERT_EQ(errno, EINVAL);
+
+	ret = pwrite(fd, NULL, page_size, 0);
+	TEST_ASSERT(ret == -1, "supplying a NULL buffer when writing a guest_mem fd should fail");
+	TEST_ASSERT_EQ(errno, EINVAL);
+
+	/* Check double population is not allowed */
+
+	ret = pwrite(fd, buf, page_size, 0);
+	TEST_ASSERT(ret == page_size, "page-aligned write on a guest_mem fd should succeed");
+
+	ret = pwrite(fd, buf, page_size, 0);
+	TEST_ASSERT(ret == -1, "write on already populated guest_mem fd should fail");
+	TEST_ASSERT_EQ(errno, ENOSPC);
+
+	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, page_size);
+	TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) should succeed");
+
+	/* Check population is allowed again after punching a hole */
+
+	ret = pwrite(fd, buf, page_size, 0);
+	TEST_ASSERT(ret == page_size, "page-aligned write on a punched guest_mem fd should succeed");
+
+	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, page_size);
+	TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) should succeed");
+
+	/* Check population of already allocated memory is allowed */
+
+	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, page_size);
+	TEST_ASSERT(!ret, "fallocate with aligned offset and size should succeed");
+
+	ret = pwrite(fd, buf, page_size, 0);
+	TEST_ASSERT(ret == page_size, "write on a preallocated guest_mem fd should succeed");
+
+	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, page_size);
+	TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) should succeed");
+
+	/* Check population works until an already populated page is encountered */
+
+	ret = pwrite(fd, buf, total_size, 0);
+	TEST_ASSERT(ret == total_size, "page-aligned write on a guest_mem fd should succeed");
+
+	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, page_size);
+	TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) should succeed");
+
+	ret = pwrite(fd, buf, total_size, 0);
+	TEST_ASSERT(ret == page_size, "write on a guest_mem fd should not overwrite data");
+
+	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, total_size);
+	TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) should succeed");
+
+
+	free(buf);
 }
 
 static void test_mmap(int fd, size_t page_size)
@@ -189,7 +261,8 @@ int main(int argc, char *argv[])
 
 	fd = vm_create_guest_memfd(vm, total_size, 0);
 
-	test_file_read_write(fd);
+	test_file_read(fd);
+	test_file_write(fd, total_size);
 	test_mmap(fd, page_size);
 	test_file_size(fd, page_size, total_size);
 	test_fallocate(fd, page_size, total_size);
-- 
2.40.1



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC PATCH v2 1/2] KVM: guest_memfd: add generic population via write
  2024-11-29 12:39 ` [RFC PATCH v2 1/2] KVM: guest_memfd: add generic population via write Nikita Kalyazin
@ 2024-12-03 14:55   ` Mike Day
  0 siblings, 0 replies; 4+ messages in thread
From: Mike Day @ 2024-12-03 14:55 UTC (permalink / raw)
  To: Nikita Kalyazin, pbonzini, shuah, kvm, linux-kselftest, linux-kernel
  Cc: linux-mm, david, quic_eberman, jthoughton, brijesh.singh,
	michael.roth, graf, jgowans, roypat, derekmn, nsaenz, xmarcalx

On 11/29/24 06:39, Nikita Kalyazin wrote:
>   
> +#if defined(CONFIG_KVM_GENERIC_PRIVATE_MEM) && !defined(CONFIG_KVM_AMD_SEV)

Another option is to use the confidential computing (coco) attributes to keep
the write operation limited to clear-text guests (diff against patch 1/2):
There are a couple of benefits and shortcomings that I've listed below the diff.

diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 9aba0ba25276..b7a0c7f2f82d 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -1,5 +1,6 @@
  // SPDX-License-Identifier: GPL-2.0
  #include <linux/backing-dev.h>
+#include <linux/cc_platform.h>
  #include <linux/falloc.h>
  #include <linux/kvm_host.h>
  #include <linux/pagemap.h>
@@ -274,7 +275,14 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
         return filemap_grab_folio(inode->i_mapping, index);
  }

-#if defined(CONFIG_KVM_GENERIC_PRIVATE_MEM) && !defined(CONFIG_KVM_AMD_SEV)
+static bool kvm_has_cc(void)
+{
+       if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
+               return true;
+       return false;
+}
+
+#if defined(CONFIG_KVM_GENERIC_PRIVATE_MEM)
  static ssize_t kvm_kmem_gmem_write(struct file *file, const char __user *buf,
                                    size_t count, loff_t *offset)
  {
@@ -290,6 +298,9 @@ static ssize_t kvm_kmem_gmem_write(struct file *file, const char __user *buf,
         if (!buf)
                 return -EINVAL;

+       if (kvm_has_cc())
+               return -EIO;
+
         start = *offset >> PAGE_SHIFT;
          end = (*offset + count) >> PAGE_SHIFT;

@@ -564,7 +575,7 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
  }

  static struct file_operations kvm_gmem_fops = {
-#if defined(CONFIG_KVM_GENERIC_PRIVATE_MEM) && !defined(CONFIG_KVM_AMD_SEV)
+#if defined(CONFIG_KVM_GENERIC_PRIVATE_MEM)
         .llseek         = default_llseek,
         .write          = kvm_kmem_gmem_write,
  #endif

Advantages:
  * works with multiple architectures (powerpc and x86 so far)
  * enumerates specific types of coco attributes

Disadvantages:
  * The platform can have an encryption attribute but still be running a guest in clear text
  * Some guests could be encrypted while others are clear text

To remedy the disadvantage, the write function would need to check if guest encryption is
currently active for a specific guest.

Mike

> +static ssize_t kvm_kmem_gmem_write(struct file *file, const char __user *buf,
> +				   size_t count, loff_t *offset)
> +{
> +	pgoff_t start, end, index;
> +	ssize_t ret = 0;


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2024-12-03 14:56 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-11-29 12:39 [RFC PATCH v2 0/2] KVM: guest_memfd: use write for population Nikita Kalyazin
2024-11-29 12:39 ` [RFC PATCH v2 1/2] KVM: guest_memfd: add generic population via write Nikita Kalyazin
2024-12-03 14:55   ` Mike Day
2024-11-29 12:39 ` [RFC PATCH v2 2/2] KVM: selftests: update guest_memfd write tests Nikita Kalyazin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox