From: Leon Romanovsky <leon@kernel.org>
To: Jens Axboe <axboe@kernel.dk>, Jason Gunthorpe <jgg@ziepe.ca>,
Robin Murphy <robin.murphy@arm.com>,
Joerg Roedel <joro@8bytes.org>, Will Deacon <will@kernel.org>,
Keith Busch <kbusch@kernel.org>, Christoph Hellwig <hch@lst.de>,
"Zeng, Oak" <oak.zeng@intel.com>,
Chaitanya Kulkarni <kch@nvidia.com>
Cc: "Leon Romanovsky" <leonro@nvidia.com>,
"Sagi Grimberg" <sagi@grimberg.me>,
"Bjorn Helgaas" <bhelgaas@google.com>,
"Logan Gunthorpe" <logang@deltatee.com>,
"Yishai Hadas" <yishaih@nvidia.com>,
"Shameer Kolothum" <shameerali.kolothum.thodi@huawei.com>,
"Kevin Tian" <kevin.tian@intel.com>,
"Alex Williamson" <alex.williamson@redhat.com>,
"Marek Szyprowski" <m.szyprowski@samsung.com>,
"Jérôme Glisse" <jglisse@redhat.com>,
"Andrew Morton" <akpm@linux-foundation.org>,
linux-block@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-rdma@vger.kernel.org, iommu@lists.linux.dev,
linux-nvme@lists.infradead.org, linux-pci@vger.kernel.org,
kvm@vger.kernel.org, linux-mm@kvack.org
Subject: [RFC v2 02/21] iommu/dma: Implement link/unlink ranges callbacks
Date: Thu, 12 Sep 2024 14:15:37 +0300 [thread overview]
Message-ID: <e3a8350baeaad544010c65dc62db53cf92ff2be1.1726138681.git.leon@kernel.org> (raw)
In-Reply-To: <cover.1726138681.git.leon@kernel.org>
From: Leon Romanovsky <leonro@nvidia.com>
Add an implementation of link/unlink interface to perform in map/unmap
pages in fast patch for pre-allocated IOVA.
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
drivers/iommu/dma-iommu.c | 86 +++++++++++++++++++++++++++++++++++++++
include/linux/iommu-dma.h | 25 ++++++++++++
2 files changed, 111 insertions(+)
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 09deea2fc86b..72763f76b712 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -1743,6 +1743,92 @@ void iommu_dma_free_iova(struct dma_iova_state *state)
&iotlb_gather);
}
+int iommu_dma_start_range(struct device *dev)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+
+ if (static_branch_unlikely(&iommu_deferred_attach_enabled))
+ return iommu_deferred_attach(dev, domain);
+
+ return 0;
+}
+
+void iommu_dma_end_range(struct device *dev)
+{
+ /* TODO: Factor out ops->iotlb_sync_map(..) call from iommu_map()
+ * and put it here to provide batched iotlb sync for the range.
+ */
+}
+
+dma_addr_t iommu_dma_link_range(struct dma_iova_state *state, phys_addr_t phys,
+ size_t size, unsigned long attrs)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(state->dev);
+ struct iommu_dma_cookie *cookie = domain->iova_cookie;
+ struct iova_domain *iovad = &cookie->iovad;
+ size_t iova_off = iova_offset(iovad, phys);
+ bool coherent = dev_is_dma_coherent(state->dev);
+ int prot = dma_info_to_prot(state->dir, coherent, attrs);
+ dma_addr_t addr = state->addr + state->range_size;
+ int ret;
+
+ WARN_ON_ONCE(iova_off && state->range_size > 0);
+
+ if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ arch_sync_dma_for_device(phys, size, state->dir);
+
+ size = iova_align(iovad, size + iova_off);
+ ret = iommu_map(domain, addr, phys - iova_off, size, prot, GFP_ATOMIC);
+ if (ret)
+ return ret;
+
+ state->range_size += size;
+ return addr + iova_off;
+}
+
+static void iommu_sync_dma_for_cpu(struct iommu_domain *domain,
+ dma_addr_t start, size_t size,
+ enum dma_data_direction dir)
+{
+ size_t sync_size, unmapped = 0;
+ phys_addr_t phys;
+
+ do {
+ phys = iommu_iova_to_phys(domain, start + unmapped);
+ if (WARN_ON(!phys))
+ continue;
+
+ sync_size = (unmapped + PAGE_SIZE > size) ? size % PAGE_SIZE :
+ PAGE_SIZE;
+ arch_sync_dma_for_cpu(phys, sync_size, dir);
+ unmapped += sync_size;
+ } while (unmapped < size);
+}
+
+void iommu_dma_unlink_range(struct device *dev, dma_addr_t start, size_t size,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+ struct iommu_dma_cookie *cookie = domain->iova_cookie;
+ struct iova_domain *iovad = &cookie->iovad;
+ struct iommu_iotlb_gather iotlb_gather;
+ bool coherent = dev_is_dma_coherent(dev);
+ size_t unmapped;
+
+ iommu_iotlb_gather_init(&iotlb_gather);
+ iotlb_gather.queued = READ_ONCE(cookie->fq_domain);
+
+ if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !coherent)
+ iommu_sync_dma_for_cpu(domain, start, size, dir);
+
+ size = iova_align(iovad, size);
+ unmapped = iommu_unmap_fast(domain, start, size, &iotlb_gather);
+ WARN_ON(unmapped != size);
+
+ if (!iotlb_gather.queued)
+ iommu_iotlb_sync(domain, &iotlb_gather);
+}
+
void iommu_setup_dma_ops(struct device *dev)
{
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
diff --git a/include/linux/iommu-dma.h b/include/linux/iommu-dma.h
index 698df67b152a..21b0341f52b8 100644
--- a/include/linux/iommu-dma.h
+++ b/include/linux/iommu-dma.h
@@ -60,6 +60,12 @@ void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
int iommu_dma_alloc_iova(struct dma_iova_state *state, phys_addr_t phys,
size_t size);
void iommu_dma_free_iova(struct dma_iova_state *state);
+int iommu_dma_start_range(struct device *dev);
+void iommu_dma_end_range(struct device *dev);
+dma_addr_t iommu_dma_link_range(struct dma_iova_state *state, phys_addr_t phys,
+ size_t size, unsigned long attrs);
+void iommu_dma_unlink_range(struct device *dev, dma_addr_t start, size_t size,
+ enum dma_data_direction dir, unsigned long attrs);
#else
static inline bool use_dma_iommu(struct device *dev)
{
@@ -184,5 +190,24 @@ static inline int iommu_dma_alloc_iova(struct dma_iova_state *state,
static inline void iommu_dma_free_iova(struct dma_iova_state *state)
{
}
+static inline int iommu_dma_start_range(struct device *dev)
+{
+ return -EOPNOTSUPP;
+}
+static inline void iommu_dma_end_range(struct device *dev)
+{
+}
+static inline dma_addr_t iommu_dma_link_range(struct dma_iova_state *state,
+ phys_addr_t phys, size_t size,
+ unsigned long attrs)
+{
+ return DMA_MAPPING_ERROR;
+}
+static inline void iommu_dma_unlink_range(struct device *dev, dma_addr_t start,
+ size_t size,
+ enum dma_data_direction dir,
+ unsigned long attrs)
+{
+}
#endif /* CONFIG_IOMMU_DMA */
#endif /* _LINUX_IOMMU_DMA_H */
--
2.46.0
next prev parent reply other threads:[~2024-09-12 11:16 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-09-12 11:15 [RFC v2 00/21] Provide a new two step DMA API mapping API Leon Romanovsky
2024-09-12 11:15 ` [RFC v2 01/21] iommu/dma: Provide an interface to allow preallocate IOVA Leon Romanovsky
2024-09-12 11:15 ` Leon Romanovsky [this message]
2024-09-12 11:15 ` [RFC v2 03/21] iommu/dma: Add check if IOVA can be used Leon Romanovsky
2024-09-12 11:15 ` [RFC v2 04/21] dma-mapping: initialize IOVA state struct Leon Romanovsky
2024-09-12 11:15 ` [RFC v2 05/21] dma-mapping: provide an interface to allocate IOVA Leon Romanovsky
2024-09-12 11:15 ` [RFC v2 06/21] dma-mapping: set and query DMA IOVA state Leon Romanovsky
2024-09-12 11:15 ` [RFC v2 07/21] dma-mapping: implement link range API Leon Romanovsky
2024-09-12 11:15 ` [RFC v2 08/21] mm/hmm: let users to tag specific PFN with DMA mapped bit Leon Romanovsky
2024-09-12 11:15 ` [RFC v2 09/21] dma-mapping: provide callbacks to link/unlink HMM PFNs to specific IOVA Leon Romanovsky
2024-09-12 11:15 ` [RFC v2 10/21] RDMA/umem: Preallocate and cache IOVA for UMEM ODP Leon Romanovsky
2024-09-12 11:15 ` [RFC v2 11/21] RDMA/umem: Store ODP access mask information in PFN Leon Romanovsky
2024-09-12 11:15 ` [RFC v2 12/21] RDMA/core: Separate DMA mapping to caching IOVA and page linkage Leon Romanovsky
2024-09-12 11:15 ` [RFC v2 13/21] RDMA/umem: Prevent UMEM ODP creation with SWIOTLB Leon Romanovsky
2024-09-12 11:15 ` [RFC v2 14/21] vfio/mlx5: Explicitly use number of pages instead of allocated length Leon Romanovsky
2024-09-12 11:15 ` [RFC v2 15/21] vfio/mlx5: Rewrite create mkey flow to allow better code reuse Leon Romanovsky
2024-09-12 11:15 ` [RFC v2 16/21] vfio/mlx5: Explicitly store page list Leon Romanovsky
2024-09-12 11:15 ` [RFC v2 17/21] vfio/mlx5: Convert vfio to use DMA link API Leon Romanovsky
2024-09-12 11:15 ` [RFC v2 18/21] nvme-pci: remove optimizations for single DMA entry Leon Romanovsky
2024-09-12 11:15 ` [RFC v2 19/21] nvme-pci: precalculate number of DMA entries for each command Leon Romanovsky
2024-09-12 11:15 ` [RFC v2 20/21] nvme-pci: use new dma API Leon Romanovsky
2024-09-12 11:15 ` [RFC v2 21/21] nvme-pci: don't allow mapping of bvecs with offset Leon Romanovsky
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=e3a8350baeaad544010c65dc62db53cf92ff2be1.1726138681.git.leon@kernel.org \
--to=leon@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=alex.williamson@redhat.com \
--cc=axboe@kernel.dk \
--cc=bhelgaas@google.com \
--cc=hch@lst.de \
--cc=iommu@lists.linux.dev \
--cc=jgg@ziepe.ca \
--cc=jglisse@redhat.com \
--cc=joro@8bytes.org \
--cc=kbusch@kernel.org \
--cc=kch@nvidia.com \
--cc=kevin.tian@intel.com \
--cc=kvm@vger.kernel.org \
--cc=leonro@nvidia.com \
--cc=linux-block@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-nvme@lists.infradead.org \
--cc=linux-pci@vger.kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=logang@deltatee.com \
--cc=m.szyprowski@samsung.com \
--cc=oak.zeng@intel.com \
--cc=robin.murphy@arm.com \
--cc=sagi@grimberg.me \
--cc=shameerali.kolothum.thodi@huawei.com \
--cc=will@kernel.org \
--cc=yishaih@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox