From: William Tu <witu@nvidia.com>
To: <linux-mm@kvack.org>
Cc: <jgg@nvidia.com>, <weizhang@nvidia.com>, William Tu <witu@nvidia.com>
Subject: [PATCH] vfio: add dma map/unmap support for noiommu
Date: Fri, 2 May 2025 17:48:21 -0700 [thread overview]
Message-ID: <20250503004821.16980-1-witu@nvidia.com> (raw)
Currently when using noiommu, the VFIO uAPI does not support
VFIO_IOMMU_MAP_DMA, causing userspace VFIO program to use
s.t like /proc/self/pagemap to get the physical address.
The patch adds the support for dma map and unmap operation
for noiommu.
Signed-off-by: William Tu <witu@nvidia.com>
---
drivers/vfio/container.c | 177 +++++++++++++++++++++++++++++++++++++-
include/uapi/linux/vfio.h | 15 ++++
2 files changed, 190 insertions(+), 2 deletions(-)
diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c
index d53d08f16973..f05ba0566dfa 100644
--- a/drivers/vfio/container.c
+++ b/drivers/vfio/container.c
@@ -12,6 +12,8 @@
#include <linux/miscdevice.h>
#include <linux/vfio.h>
#include <uapi/linux/vfio.h>
+#include <linux/mm.h>
+#include <linux/sched/mm.h>
#include "vfio.h"
@@ -43,12 +45,183 @@ static void vfio_noiommu_release(void *iommu_data)
{
}
+static int vfio_noiommu_do_map(void *iommu, struct vfio_noiommu_dma_map *map)
+{
+ unsigned long nr_pages = map->size / PAGE_SIZE;
+ unsigned long target_vaddr = map->vaddr;
+ struct vm_area_struct *vma;
+ struct task_struct *task;
+ struct mm_struct *mm;
+ unsigned long paddr;
+ struct page **pages;
+ struct page *page;
+ int ret = 0;
+ int npgs;
+
+ if (target_vaddr >= TASK_SIZE) {
+ page = virt_to_page((void *)target_vaddr);
+ if (!page)
+ return -EINVAL;
+ paddr = page_to_phys(page) + (target_vaddr & (PAGE_SIZE - 1));
+ map->iova = paddr;
+ return 0;
+ }
+
+ rcu_read_lock();
+ task = pid_task(find_vpid(current->tgid), PIDTYPE_PID);
+ if (!task) {
+ rcu_read_unlock();
+ return -ESRCH;
+ }
+
+ mm = get_task_mm(task);
+ rcu_read_unlock();
+ if (!mm)
+ return -EINVAL;
+
+ down_read(&mm->mmap_lock);
+
+ vma = find_vma(mm, target_vaddr);
+ if (!vma || target_vaddr < vma->vm_start) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
+ if (!pages) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ npgs = get_user_pages_remote(mm, target_vaddr, nr_pages, FOLL_GET,
+ pages, NULL);
+ if (npgs != nr_pages) {
+ if (npgs > 0) {
+ while (npgs--)
+ put_page(pages[ret]);
+ }
+ ret = -EINVAL;
+ goto out_free;
+ }
+
+ paddr = page_to_phys(pages[0]) + (target_vaddr & (PAGE_SIZE - 1));
+ map->iova = paddr;
+
+out_free:
+ kfree(pages);
+out:
+ up_read(&mm->mmap_lock);
+ mmput(mm);
+
+ return ret;
+}
+
+static int vfio_noiommu_do_unmap(void *iommu, struct vfio_noiommu_dma_unmap *unmap)
+{
+ unsigned long nr_pages = unmap->size / PAGE_SIZE;
+ unsigned long current_vaddr = unmap->vaddr;
+ unsigned long remaining_pages = nr_pages;
+ unsigned long chunk_size = 1024;
+ struct task_struct *task;
+ struct mm_struct *mm;
+ struct page **pages;
+ int ret = 0, i;
+
+ pages = kcalloc(chunk_size, sizeof(struct page *), GFP_KERNEL);
+ if (!pages)
+ return -ENOMEM;
+
+ task = current;
+ mm = get_task_mm(task);
+ if (!mm) {
+ ret = -EINVAL;
+ goto out_free;
+ }
+
+ down_read(&mm->mmap_lock);
+
+ while (remaining_pages > 0) {
+ unsigned long pages_to_unmap = min(remaining_pages, chunk_size);
+
+ ret = get_user_pages_remote(mm, current_vaddr, pages_to_unmap,
+ FOLL_GET, pages, NULL);
+ if (ret > 0) {
+ for (i = ret - 1; i >= 0; i--) {
+ if (!pages[i])
+ continue;
+ put_page(pages[i]);
+ }
+ ret = 0;
+ } else {
+ ret = -EINVAL;
+ break;
+ }
+
+ remaining_pages -= pages_to_unmap;
+ current_vaddr += pages_to_unmap * PAGE_SIZE;
+ }
+
+ up_read(&mm->mmap_lock);
+ mmput(mm);
+
+out_free:
+ kfree(pages);
+ return ret;
+}
+
+static int vfio_noiommu_map_dma(void *iommu, unsigned long arg)
+{
+ struct vfio_noiommu_dma_map map;
+ unsigned long minsz;
+ int ret;
+
+ minsz = offsetofend(struct vfio_noiommu_dma_map, size);
+
+ if (copy_from_user(&map, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ ret = vfio_noiommu_do_map(iommu, &map);
+ if (ret)
+ return ret;
+
+ if (copy_to_user((void __user *)arg, &map, minsz))
+ return -EFAULT;
+
+ return ret;
+}
+
+static int vfio_noiommu_unmap_dma(void *iommu_data, unsigned long arg)
+{
+ struct vfio_noiommu_dma_unmap unmap;
+ unsigned long minsz;
+ int ret;
+
+ minsz = offsetofend(struct vfio_noiommu_dma_unmap, size);
+
+ if (copy_from_user(&unmap, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ ret = vfio_noiommu_do_unmap(iommu_data, &unmap);
+ if (ret)
+ return ret;
+
+ if (copy_to_user((void __user *)arg, &unmap, minsz))
+ return -EFAULT;
+
+ return 0;
+}
+
static long vfio_noiommu_ioctl(void *iommu_data,
unsigned int cmd, unsigned long arg)
{
- if (cmd == VFIO_CHECK_EXTENSION)
+ switch (cmd) {
+ case VFIO_CHECK_EXTENSION:
return vfio_noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
-
+ case VFIO_IOMMU_MAP_DMA:
+ return vfio_noiommu_map_dma(iommu_data, arg);
+ case VFIO_IOMMU_UNMAP_DMA:
+ return vfio_noiommu_unmap_dma(iommu_data, arg);
+ }
return -ENOTTY;
}
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 2b68e6cdf190..5f7533195adc 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -1565,6 +1565,21 @@ struct vfio_iommu_type1_dma_map {
__u64 size; /* Size of mapping (bytes) */
};
+struct vfio_noiommu_dma_map {
+ __u32 argsz;
+ __u32 flags;
+ __u64 vaddr; /* Process virtual address */
+ __u64 iova; /* IO virtual address */
+ __u64 size; /* Size of mapping (bytes) */
+};
+
+struct vfio_noiommu_dma_unmap {
+ __u32 argsz;
+ __u32 flags;
+ __u64 vaddr; /* Process virtual address */
+ __u64 size; /* Size of mapping (bytes) */
+};
+
#define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
struct vfio_bitmap {
--
2.25.1
next reply other threads:[~2025-05-03 0:48 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-03 0:48 William Tu [this message]
2025-05-05 17:16 ` Jason Gunthorpe
2025-05-07 15:19 ` William Tu
2025-05-06 8:21 ` kernel test robot
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250503004821.16980-1-witu@nvidia.com \
--to=witu@nvidia.com \
--cc=jgg@nvidia.com \
--cc=linux-mm@kvack.org \
--cc=weizhang@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox