linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: James Gowans <jgowans@amazon.com>
To: <linux-kernel@vger.kernel.org>
Cc: Eric Biederman <ebiederm@xmission.com>,
	<kexec@lists.infradead.org>, "Joerg Roedel" <joro@8bytes.org>,
	Will Deacon <will@kernel.org>, <iommu@lists.linux.dev>,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	"Christian Brauner" <brauner@kernel.org>,
	<linux-fsdevel@vger.kernel.org>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Sean Christopherson <seanjc@google.com>, <kvm@vger.kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>, <linux-mm@kvack.org>,
	Alexander Graf <graf@amazon.com>,
	David Woodhouse <dwmw@amazon.co.uk>,
	"Jan H . Schoenherr" <jschoenh@amazon.de>,
	Usama Arif <usama.arif@bytedance.com>,
	Anthony Yznaga <anthony.yznaga@oracle.com>,
	Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>,
	<madvenka@linux.microsoft.com>, <steven.sistare@oracle.com>,
	<yuleixzhang@tencent.com>
Subject: [RFC 13/18] vfio: add ioctl to define persistent pgtables on container
Date: Mon, 5 Feb 2024 12:01:58 +0000	[thread overview]
Message-ID: <20240205120203.60312-14-jgowans@amazon.com> (raw)
In-Reply-To: <20240205120203.60312-1-jgowans@amazon.com>

The previous commits added a file type in pkernfs for IOMMU persistent
page tables. Now support actually setting persistent page tables on an
IOMMU domain. This is done via a VFIO ioctl on a VFIO container.

Userspace needs to create and open a IOMMU persistent page tables file
and then supply that fd to the new VFIO_CONTAINER_SET_PERSISTENT_PGTABLES
ioctl. That ioctl sets the supplied struct file on the
struct vfio_container. Later when the IOMMU domain is allocated by VFIO,
VFIO  will check to see if the persistent pagetables have been defined
and if they have will use the iommu_domain_alloc_persistent API which
was introduced in the previous commit to pass the struct file down to
the IOMMU which will actually use it for page tables.

After kexec userspace needs to open the same IOMMU page table file and
set it again via the same ioctl so that the IOMMU continues to use the
same memory region for its page tables for that domain.
---
 drivers/vfio/container.c        | 27 +++++++++++++++++++++++++++
 drivers/vfio/vfio.h             |  2 ++
 drivers/vfio/vfio_iommu_type1.c | 27 +++++++++++++++++++++++++--
 include/uapi/linux/vfio.h       |  9 +++++++++
 4 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c
index d53d08f16973..b60fcbf7bad0 100644
--- a/drivers/vfio/container.c
+++ b/drivers/vfio/container.c
@@ -11,6 +11,7 @@
 #include <linux/iommu.h>
 #include <linux/miscdevice.h>
 #include <linux/vfio.h>
+#include <linux/pkernfs.h>
 #include <uapi/linux/vfio.h>
 
 #include "vfio.h"
@@ -21,6 +22,7 @@ struct vfio_container {
 	struct rw_semaphore		group_lock;
 	struct vfio_iommu_driver	*iommu_driver;
 	void				*iommu_data;
+	struct file			*persistent_pgtables;
 	bool				noiommu;
 };
 
@@ -306,6 +308,8 @@ static long vfio_ioctl_set_iommu(struct vfio_container *container,
 			continue;
 		}
 
+		driver->ops->set_persistent_pgtables(data, container->persistent_pgtables);
+
 		ret = __vfio_container_attach_groups(container, driver, data);
 		if (ret) {
 			driver->ops->release(data);
@@ -324,6 +328,26 @@ static long vfio_ioctl_set_iommu(struct vfio_container *container,
 	return ret;
 }
 
+static int vfio_ioctl_set_persistent_pgtables(struct vfio_container *container,
+		unsigned long arg)
+{
+	struct vfio_set_persistent_pgtables set_ppts;
+	struct file *ppts;
+
+	if (copy_from_user(&set_ppts, (void __user *)arg, sizeof(set_ppts)))
+		return -EFAULT;
+
+	ppts = fget(set_ppts.persistent_pgtables_fd);
+	if (!ppts)
+		return -EBADF;
+	if (!pkernfs_is_iommu_domain_pgtables(ppts)) {
+		fput(ppts);
+		return -EBADF;
+	}
+	container->persistent_pgtables = ppts;
+	return 0;
+}
+
 static long vfio_fops_unl_ioctl(struct file *filep,
 				unsigned int cmd, unsigned long arg)
 {
@@ -345,6 +369,9 @@ static long vfio_fops_unl_ioctl(struct file *filep,
 	case VFIO_SET_IOMMU:
 		ret = vfio_ioctl_set_iommu(container, arg);
 		break;
+	case VFIO_CONTAINER_SET_PERSISTENT_PGTABLES:
+		ret = vfio_ioctl_set_persistent_pgtables(container, arg);
+		break;
 	default:
 		driver = container->iommu_driver;
 		data = container->iommu_data;
diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
index 307e3f29b527..6fa301bf6474 100644
--- a/drivers/vfio/vfio.h
+++ b/drivers/vfio/vfio.h
@@ -226,6 +226,8 @@ struct vfio_iommu_driver_ops {
 				  void *data, size_t count, bool write);
 	struct iommu_domain *(*group_iommu_domain)(void *iommu_data,
 						   struct iommu_group *group);
+	int		(*set_persistent_pgtables)(void *iommu_data,
+						   struct file *ppts);
 };
 
 struct vfio_iommu_driver {
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index eacd6ec04de5..b36edfc5c9ef 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -75,6 +75,7 @@ struct vfio_iommu {
 	bool			nesting;
 	bool			dirty_page_tracking;
 	struct list_head	emulated_iommu_groups;
+	struct file		*persistent_pgtables;
 };
 
 struct vfio_domain {
@@ -2143,9 +2144,14 @@ static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu,
 
 static int vfio_iommu_domain_alloc(struct device *dev, void *data)
 {
+	/* data is an in pointer to PPTs, and an out to the new domain. */
+	struct file *ppts = *(struct file **) data;
 	struct iommu_domain **domain = data;
 
-	*domain = iommu_domain_alloc(dev->bus);
+	if (ppts)
+		*domain = iommu_domain_alloc_persistent(dev->bus, ppts);
+	else
+		*domain = iommu_domain_alloc(dev->bus);
 	return 1; /* Don't iterate */
 }
 
@@ -2156,6 +2162,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 	struct vfio_iommu_group *group;
 	struct vfio_domain *domain, *d;
 	bool resv_msi;
+	/* In/out ptr to iommu_domain_alloc. */
+	void *domain_alloc_data;
 	phys_addr_t resv_msi_base = 0;
 	struct iommu_domain_geometry *geo;
 	LIST_HEAD(iova_copy);
@@ -2203,8 +2211,12 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 	 * want to iterate beyond the first device (if any).
 	 */
 	ret = -EIO;
-	iommu_group_for_each_dev(iommu_group, &domain->domain,
+	/* Smuggle the PPTs in the data field; it will be clobbered with the new domain */
+	domain_alloc_data = iommu->persistent_pgtables;
+	iommu_group_for_each_dev(iommu_group, &domain_alloc_data,
 				 vfio_iommu_domain_alloc);
+	domain->domain = domain_alloc_data;
+
 	if (!domain->domain)
 		goto out_free_domain;
 
@@ -3165,6 +3177,16 @@ vfio_iommu_type1_group_iommu_domain(void *iommu_data,
 	return domain;
 }
 
+int vfio_iommu_type1_set_persistent_pgtables(void *iommu_data,
+				    struct file *ppts)
+{
+
+	struct vfio_iommu *iommu = iommu_data;
+
+	iommu->persistent_pgtables = ppts;
+	return 0;
+}
+
 static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = {
 	.name			= "vfio-iommu-type1",
 	.owner			= THIS_MODULE,
@@ -3179,6 +3201,7 @@ static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = {
 	.unregister_device	= vfio_iommu_type1_unregister_device,
 	.dma_rw			= vfio_iommu_type1_dma_rw,
 	.group_iommu_domain	= vfio_iommu_type1_group_iommu_domain,
+	.set_persistent_pgtables = vfio_iommu_type1_set_persistent_pgtables,
 };
 
 static int __init vfio_iommu_type1_init(void)
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index afc1369216d9..fa9676bb4b26 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -1797,6 +1797,15 @@ struct vfio_iommu_spapr_tce_remove {
 };
 #define VFIO_IOMMU_SPAPR_TCE_REMOVE	_IO(VFIO_TYPE, VFIO_BASE + 20)
 
+struct vfio_set_persistent_pgtables {
+	/*
+	 * File descriptor for a pkernfs IOMMU pgtables
+	 * file to be used for persistence.
+	 */
+	__u32	persistent_pgtables_fd;
+};
+#define VFIO_CONTAINER_SET_PERSISTENT_PGTABLES	_IO(VFIO_TYPE, VFIO_BASE + 21)
+
 /* ***************************************************************** */
 
 #endif /* _UAPIVFIO_H */
-- 
2.40.1



  parent reply	other threads:[~2024-02-05 12:05 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-05 12:01 [RFC 00/18] Pkernfs: Support persistence for live update James Gowans
2024-02-05 12:01 ` [RFC 01/18] pkernfs: Introduce filesystem skeleton James Gowans
2024-02-05 12:01 ` [RFC 02/18] pkernfs: Add persistent inodes hooked into directies James Gowans
2024-02-05 12:01 ` [RFC 03/18] pkernfs: Define an allocator for persistent pages James Gowans
2024-02-05 12:01 ` [RFC 04/18] pkernfs: support file truncation James Gowans
2024-02-05 12:01 ` [RFC 05/18] pkernfs: add file mmap callback James Gowans
2024-02-05 23:34   ` Dave Chinner
2024-02-05 12:01 ` [RFC 06/18] init: Add liveupdate cmdline param James Gowans
2024-02-05 12:01 ` [RFC 07/18] pkernfs: Add file type for IOMMU root pgtables James Gowans
2024-02-05 12:01 ` [RFC 08/18] iommu: Add allocator for pgtables from persistent region James Gowans
2024-02-05 12:01 ` [RFC 09/18] intel-iommu: Use pkernfs for root/context pgtable pages James Gowans
2024-02-05 12:01 ` [RFC 10/18] iommu/intel: zap context table entries on kexec James Gowans
2024-02-05 12:01 ` [RFC 11/18] dma-iommu: Always enable deferred attaches for liveupdate James Gowans
2024-02-05 17:45   ` Jason Gunthorpe
2024-02-05 12:01 ` [RFC 12/18] pkernfs: Add IOMMU domain pgtables file James Gowans
2024-02-05 12:01 ` James Gowans [this message]
2024-02-05 17:08   ` [RFC 13/18] vfio: add ioctl to define persistent pgtables on container Jason Gunthorpe
2024-02-05 12:01 ` [RFC 14/18] intel-iommu: Allocate domain pgtable pages from pkernfs James Gowans
2024-02-05 17:12   ` Jason Gunthorpe
2024-02-05 12:02 ` [RFC 15/18] pkernfs: register device memory for IOMMU domain pgtables James Gowans
2024-02-05 12:02 ` [RFC 16/18] vfio: support not mapping IOMMU pgtables on live-update James Gowans
2024-02-05 12:02 ` [RFC 17/18] pci: Don't clear bus master is persistence enabled James Gowans
2024-02-05 12:02 ` [RFC 18/18] vfio-pci: Assume device working after liveupdate James Gowans
2024-02-05 17:10 ` [RFC 00/18] Pkernfs: Support persistence for live update Alex Williamson
2024-02-07 14:56   ` Gowans, James
2024-02-07 15:28     ` Jason Gunthorpe
2024-02-05 17:42 ` Jason Gunthorpe
2024-02-07 14:45   ` Gowans, James
2024-02-07 15:22     ` Jason Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240205120203.60312-14-jgowans@amazon.com \
    --to=jgowans@amazon.com \
    --cc=akpm@linux-foundation.org \
    --cc=anthony.yznaga@oracle.com \
    --cc=brauner@kernel.org \
    --cc=dwmw@amazon.co.uk \
    --cc=ebiederm@xmission.com \
    --cc=graf@amazon.com \
    --cc=iommu@lists.linux.dev \
    --cc=joro@8bytes.org \
    --cc=jschoenh@amazon.de \
    --cc=kexec@lists.infradead.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=madvenka@linux.microsoft.com \
    --cc=pbonzini@redhat.com \
    --cc=seanjc@google.com \
    --cc=skinsburskii@linux.microsoft.com \
    --cc=steven.sistare@oracle.com \
    --cc=usama.arif@bytedance.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=will@kernel.org \
    --cc=yuleixzhang@tencent.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox