linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: William Tu <witu@nvidia.com>
To: <linux-mm@kvack.org>
Cc: <jgg@nvidia.com>, <weizhang@nvidia.com>, William Tu <witu@nvidia.com>
Subject: [PATCH] vfio: add dma map/unmap support for noiommu
Date: Fri, 2 May 2025 17:48:21 -0700	[thread overview]
Message-ID: <20250503004821.16980-1-witu@nvidia.com> (raw)

Currently when using noiommu, the VFIO uAPI does not support
VFIO_IOMMU_MAP_DMA, causing userspace VFIO program to use
s.t like /proc/self/pagemap to get the physical address.
The patch adds the support for dma map and unmap operation
for noiommu.

Signed-off-by: William Tu <witu@nvidia.com>
---
 drivers/vfio/container.c  | 177 +++++++++++++++++++++++++++++++++++++-
 include/uapi/linux/vfio.h |  15 ++++
 2 files changed, 190 insertions(+), 2 deletions(-)

diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c
index d53d08f16973..f05ba0566dfa 100644
--- a/drivers/vfio/container.c
+++ b/drivers/vfio/container.c
@@ -12,6 +12,8 @@
 #include <linux/miscdevice.h>
 #include <linux/vfio.h>
 #include <uapi/linux/vfio.h>
+#include <linux/mm.h>
+#include <linux/sched/mm.h>
 
 #include "vfio.h"
 
@@ -43,12 +45,183 @@ static void vfio_noiommu_release(void *iommu_data)
 {
 }
 
+static int vfio_noiommu_do_map(void *iommu, struct vfio_noiommu_dma_map *map)
+{
+	unsigned long nr_pages = map->size / PAGE_SIZE;
+	unsigned long target_vaddr = map->vaddr;
+	struct vm_area_struct *vma;
+	struct task_struct *task;
+	struct mm_struct *mm;
+	unsigned long paddr;
+	struct page **pages;
+	struct page *page;
+	int ret = 0;
+	int npgs;
+
+	if (target_vaddr >= TASK_SIZE) {
+		page = virt_to_page((void *)target_vaddr);
+		if (!page)
+			return -EINVAL;
+		paddr = page_to_phys(page) + (target_vaddr & (PAGE_SIZE - 1));
+		map->iova = paddr;
+		return 0;
+	}
+
+	rcu_read_lock();
+	task = pid_task(find_vpid(current->tgid), PIDTYPE_PID);
+	if (!task) {
+		rcu_read_unlock();
+		return -ESRCH;
+	}
+
+	mm = get_task_mm(task);
+	rcu_read_unlock();
+	if (!mm)
+		return -EINVAL;
+
+	down_read(&mm->mmap_lock);
+
+	vma = find_vma(mm, target_vaddr);
+	if (!vma || target_vaddr < vma->vm_start) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
+	if (!pages) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	npgs = get_user_pages_remote(mm, target_vaddr, nr_pages, FOLL_GET,
+				     pages, NULL);
+	if (npgs != nr_pages) {
+		if (npgs > 0) {
+			while (npgs--)
+				put_page(pages[ret]);
+		}
+		ret = -EINVAL;
+		goto out_free;
+	}
+
+	paddr = page_to_phys(pages[0]) + (target_vaddr & (PAGE_SIZE - 1));
+	map->iova = paddr;
+
+out_free:
+	kfree(pages);
+out:
+	up_read(&mm->mmap_lock);
+	mmput(mm);
+
+	return ret;
+}
+
+static int vfio_noiommu_do_unmap(void *iommu, struct vfio_noiommu_dma_unmap *unmap)
+{
+	unsigned long nr_pages = unmap->size / PAGE_SIZE;
+	unsigned long current_vaddr = unmap->vaddr;
+	unsigned long remaining_pages = nr_pages;
+	unsigned long chunk_size = 1024;
+	struct task_struct *task;
+	struct mm_struct *mm;
+	struct page **pages;
+	int ret = 0, i;
+
+	pages = kcalloc(chunk_size, sizeof(struct page *), GFP_KERNEL);
+	if (!pages)
+		return -ENOMEM;
+
+	task = current;
+	mm = get_task_mm(task);
+	if (!mm) {
+		ret = -EINVAL;
+		goto out_free;
+	}
+
+	down_read(&mm->mmap_lock);
+
+	while (remaining_pages > 0) {
+		unsigned long pages_to_unmap = min(remaining_pages, chunk_size);
+
+		ret = get_user_pages_remote(mm, current_vaddr, pages_to_unmap,
+					    FOLL_GET, pages, NULL);
+		if (ret > 0) {
+			for (i = ret - 1; i >= 0; i--) {
+				if (!pages[i])
+					continue;
+				put_page(pages[i]);
+			}
+			ret = 0;
+		} else {
+			ret = -EINVAL;
+			break;
+		}
+
+		remaining_pages -= pages_to_unmap;
+		current_vaddr += pages_to_unmap * PAGE_SIZE;
+	}
+
+	up_read(&mm->mmap_lock);
+	mmput(mm);
+
+out_free:
+	kfree(pages);
+	return ret;
+}
+
+static int vfio_noiommu_map_dma(void *iommu, unsigned long arg)
+{
+	struct vfio_noiommu_dma_map map;
+	unsigned long minsz;
+	int ret;
+
+	minsz = offsetofend(struct vfio_noiommu_dma_map, size);
+
+	if (copy_from_user(&map, (void __user *)arg, minsz))
+		return -EFAULT;
+
+	ret = vfio_noiommu_do_map(iommu, &map);
+	if (ret)
+		return ret;
+
+	if (copy_to_user((void __user *)arg, &map, minsz))
+		return -EFAULT;
+
+	return ret;
+}
+
+static int vfio_noiommu_unmap_dma(void *iommu_data, unsigned long arg)
+{
+	struct vfio_noiommu_dma_unmap unmap;
+	unsigned long minsz;
+	int ret;
+
+	minsz = offsetofend(struct vfio_noiommu_dma_unmap, size);
+
+	if (copy_from_user(&unmap, (void __user *)arg, minsz))
+		return -EFAULT;
+
+	ret = vfio_noiommu_do_unmap(iommu_data, &unmap);
+	if (ret)
+		return ret;
+
+	if (copy_to_user((void __user *)arg, &unmap, minsz))
+		return -EFAULT;
+
+	return 0;
+}
+
 static long vfio_noiommu_ioctl(void *iommu_data,
 			       unsigned int cmd, unsigned long arg)
 {
-	if (cmd == VFIO_CHECK_EXTENSION)
+	switch (cmd) {
+	case VFIO_CHECK_EXTENSION:
 		return vfio_noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
-
+	case VFIO_IOMMU_MAP_DMA:
+		return vfio_noiommu_map_dma(iommu_data, arg);
+	case VFIO_IOMMU_UNMAP_DMA:
+		return vfio_noiommu_unmap_dma(iommu_data, arg);
+	}
 	return -ENOTTY;
 }
 
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 2b68e6cdf190..5f7533195adc 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -1565,6 +1565,21 @@ struct vfio_iommu_type1_dma_map {
 	__u64	size;				/* Size of mapping (bytes) */
 };
 
+struct vfio_noiommu_dma_map {
+	__u32	argsz;
+	__u32	flags;
+	__u64	vaddr;				/* Process virtual address */
+	__u64	iova;				/* IO virtual address */
+	__u64	size;				/* Size of mapping (bytes) */
+};
+
+struct vfio_noiommu_dma_unmap {
+	__u32	argsz;
+	__u32	flags;
+	__u64	vaddr;				/* Process virtual address */
+	__u64	size;				/* Size of mapping (bytes) */
+};
+
 #define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
 
 struct vfio_bitmap {
-- 
2.25.1



             reply	other threads:[~2025-05-03  0:48 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-05-03  0:48 William Tu [this message]
2025-05-05 17:16 ` Jason Gunthorpe
2025-05-07 15:19   ` William Tu
2025-05-06  8:21 ` kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250503004821.16980-1-witu@nvidia.com \
    --to=witu@nvidia.com \
    --cc=jgg@nvidia.com \
    --cc=linux-mm@kvack.org \
    --cc=weizhang@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox