linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Alex Williamson <alex@shazbot.org>
To: Leon Romanovsky <leon@kernel.org>
Cc: "Alex Williamson" <alex.williamson@redhat.com>,
	"Leon Romanovsky" <leonro@nvidia.com>,
	"Jason Gunthorpe" <jgg@nvidia.com>,
	"Andrew Morton" <akpm@linux-foundation.org>,
	"Bjorn Helgaas" <bhelgaas@google.com>,
	"Christian König" <christian.koenig@amd.com>,
	dri-devel@lists.freedesktop.org, iommu@lists.linux.dev,
	"Jens Axboe" <axboe@kernel.dk>, "Joerg Roedel" <joro@8bytes.org>,
	kvm@vger.kernel.org, linaro-mm-sig@lists.linaro.org,
	linux-block@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-media@vger.kernel.org, linux-mm@kvack.org,
	linux-pci@vger.kernel.org,
	"Logan Gunthorpe" <logang@deltatee.com>,
	"Marek Szyprowski" <m.szyprowski@samsung.com>,
	"Robin Murphy" <robin.murphy@arm.com>,
	"Sumit Semwal" <sumit.semwal@linaro.org>,
	"Vivek Kasireddy" <vivek.kasireddy@intel.com>,
	"Will Deacon" <will@kernel.org>
Subject: Re: [PATCH v5 9/9] vfio/pci: Add dma-buf export support for MMIO regions
Date: Thu, 30 Oct 2025 14:38:36 -0600	[thread overview]
Message-ID: <20251030143836.66cdf116@shazbot.org> (raw)
In-Reply-To: <72ecaa13864ca346797e342d23a7929562788148.1760368250.git.leon@kernel.org>

On Mon, 13 Oct 2025 18:26:11 +0300
Leon Romanovsky <leon@kernel.org> wrote:
> diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
> index fe247d0e2831..56b1320238a9 100644
> --- a/drivers/vfio/pci/vfio_pci_core.c
> +++ b/drivers/vfio/pci/vfio_pci_core.c
> @@ -1511,6 +1520,19 @@ int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags,
>  		return vfio_pci_core_pm_exit(vdev, flags, arg, argsz);
>  	case VFIO_DEVICE_FEATURE_PCI_VF_TOKEN:
>  		return vfio_pci_core_feature_token(vdev, flags, arg, argsz);
> +	case VFIO_DEVICE_FEATURE_DMA_BUF:
> +		if (device->ops->ioctl != vfio_pci_core_ioctl)
> +			/*
> +			 * Devices that overwrite general .ioctl() callback
> +			 * usually do it to implement their own
> +			 * VFIO_DEVICE_GET_REGION_INFO handlerm and they present

Typo, "handlerm"

> +			 * different BAR information from the real PCI.
> +			 *
> +			 * DMABUF relies on real PCI information.
> +			 */
> +			return -EOPNOTSUPP;
> +
> +		return vfio_pci_core_feature_dma_buf(vdev, flags, arg, argsz);
>  	default:
>  		return -ENOTTY;
>  	}
...
> @@ -2459,6 +2482,7 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
>  			break;
>  		}
>  
> +		vfio_pci_dma_buf_move(vdev, true);
>  		vfio_pci_zap_bars(vdev);
>  	}
>  
> @@ -2482,6 +2506,10 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
>  
>  	ret = pci_reset_bus(pdev);
>  
> +	list_for_each_entry(vdev, &dev_set->device_list, vdev.dev_set_list)
> +		if (__vfio_pci_memory_enabled(vdev))
> +			vfio_pci_dma_buf_move(vdev, false);
> +
>  	vdev = list_last_entry(&dev_set->device_list,
>  			       struct vfio_pci_core_device, vdev.dev_set_list);
>  

This needs to be placed in the existing undo loop with the up_write(),
otherwise it can be missed in the error case.

> diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c
> new file mode 100644
> index 000000000000..eaba010777f3
> --- /dev/null
> +++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
> +static unsigned int calc_sg_nents(struct vfio_pci_dma_buf *priv,
> +				  struct dma_iova_state *state)
> +{
> +	struct phys_vec *phys_vec = priv->phys_vec;
> +	unsigned int nents = 0;
> +	u32 i;
> +
> +	if (!state || !dma_use_iova(state))
> +		for (i = 0; i < priv->nr_ranges; i++)
> +			nents += DIV_ROUND_UP(phys_vec[i].len, UINT_MAX);
> +	else
> +		/*
> +		 * In IOVA case, there is only one SG entry which spans
> +		 * for whole IOVA address space, but we need to make sure
> +		 * that it fits sg->length, maybe we need more.
> +		 */
> +		nents = DIV_ROUND_UP(priv->size, UINT_MAX);

I think we're arguably running afoul of the coding style standard here
that this is not a single simple statement and should use braces.

> +
> +	return nents;
> +}
> +
> +static struct sg_table *
> +vfio_pci_dma_buf_map(struct dma_buf_attachment *attachment,
> +		     enum dma_data_direction dir)
> +{
> +	struct vfio_pci_dma_buf *priv = attachment->dmabuf->priv;
> +	struct dma_iova_state *state = attachment->priv;
> +	struct phys_vec *phys_vec = priv->phys_vec;
> +	unsigned long attrs = DMA_ATTR_MMIO;
> +	unsigned int nents, mapped_len = 0;
> +	struct scatterlist *sgl;
> +	struct sg_table *sgt;
> +	dma_addr_t addr;
> +	int ret;
> +	u32 i;
> +
> +	dma_resv_assert_held(priv->dmabuf->resv);
> +
> +	if (priv->revoked)
> +		return ERR_PTR(-ENODEV);
> +
> +	sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
> +	if (!sgt)
> +		return ERR_PTR(-ENOMEM);
> +
> +	nents = calc_sg_nents(priv, state);
> +	ret = sg_alloc_table(sgt, nents, GFP_KERNEL | __GFP_ZERO);
> +	if (ret)
> +		goto err_kfree_sgt;
> +
> +	sgl = sgt->sgl;
> +
> +	for (i = 0; i < priv->nr_ranges; i++) {
> +		if (!state) {
> +			addr = pci_p2pdma_bus_addr_map(priv->provider,
> +						       phys_vec[i].paddr);
> +		} else if (dma_use_iova(state)) {
> +			ret = dma_iova_link(attachment->dev, state,
> +					    phys_vec[i].paddr, 0,
> +					    phys_vec[i].len, dir, attrs);
> +			if (ret)
> +				goto err_unmap_dma;
> +
> +			mapped_len += phys_vec[i].len;
> +		} else {
> +			addr = dma_map_phys(attachment->dev, phys_vec[i].paddr,
> +					    phys_vec[i].len, dir, attrs);
> +			ret = dma_mapping_error(attachment->dev, addr);
> +			if (ret)
> +				goto err_unmap_dma;
> +		}
> +
> +		if (!state || !dma_use_iova(state))
> +			sgl = fill_sg_entry(sgl, phys_vec[i].len, addr);
> +	}
> +
> +	if (state && dma_use_iova(state)) {
> +		WARN_ON_ONCE(mapped_len != priv->size);
> +		ret = dma_iova_sync(attachment->dev, state, 0, mapped_len);
> +		if (ret)
> +			goto err_unmap_dma;
> +		sgl = fill_sg_entry(sgl, mapped_len, state->addr);
> +	}
> +
> +	/*
> +	 * SGL must be NULL to indicate that SGL is the last one
> +	 * and we allocated correct number of entries in sg_alloc_table()
> +	 */
> +	WARN_ON_ONCE(sgl);
> +	return sgt;
> +
> +err_unmap_dma:
> +	if (!i || !state)
> +		; /* Do nothing */
> +	else if (dma_use_iova(state))
> +		dma_iova_destroy(attachment->dev, state, mapped_len, dir,
> +				 attrs);
> +	else
> +		for_each_sgtable_dma_sg(sgt, sgl, i)
> +			dma_unmap_phys(attachment->dev, sg_dma_address(sgl),
> +					sg_dma_len(sgl), dir, attrs);

Same, here for braces.

> +	sg_free_table(sgt);
> +err_kfree_sgt:
> +	kfree(sgt);
> +	return ERR_PTR(ret);
> +}
> +
> +static void vfio_pci_dma_buf_unmap(struct dma_buf_attachment *attachment,
> +				   struct sg_table *sgt,
> +				   enum dma_data_direction dir)
> +{
> +	struct vfio_pci_dma_buf *priv = attachment->dmabuf->priv;
> +	struct dma_iova_state *state = attachment->priv;
> +	unsigned long attrs = DMA_ATTR_MMIO;
> +	struct scatterlist *sgl;
> +	int i;
> +
> +	if (!state)
> +		; /* Do nothing */
> +	else if (dma_use_iova(state))
> +		dma_iova_destroy(attachment->dev, state, priv->size, dir,
> +				 attrs);
> +	else
> +		for_each_sgtable_dma_sg(sgt, sgl, i)
> +			dma_unmap_phys(attachment->dev, sg_dma_address(sgl),
> +				       sg_dma_len(sgl), dir, attrs);
> +

Here too.

> +	sg_free_table(sgt);
> +	kfree(sgt);
> +}
...
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 75100bf009ba..63214467c875 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -1478,6 +1478,31 @@ struct vfio_device_feature_bus_master {
>  };
>  #define VFIO_DEVICE_FEATURE_BUS_MASTER 10
>  
> +/**
> + * Upon VFIO_DEVICE_FEATURE_GET create a dma_buf fd for the
> + * regions selected.
> + *
> + * open_flags are the typical flags passed to open(2), eg O_RDWR, O_CLOEXEC,
> + * etc. offset/length specify a slice of the region to create the dmabuf from.
> + * nr_ranges is the total number of (P2P DMA) ranges that comprise the dmabuf.
> + *

Probably worth noting that .flags should be zero, I see we enforce
that.  Thanks,

Alex

> + * Return: The fd number on success, -1 and errno is set on failure.
> + */
> +#define VFIO_DEVICE_FEATURE_DMA_BUF 11
> +
> +struct vfio_region_dma_range {
> +	__u64 offset;
> +	__u64 length;
> +};
> +
> +struct vfio_device_feature_dma_buf {
> +	__u32	region_index;
> +	__u32	open_flags;
> +	__u32   flags;
> +	__u32   nr_ranges;
> +	struct vfio_region_dma_range dma_ranges[];
> +};
> +
>  /* -------- API for Type1 VFIO IOMMU -------- */
>  
>  /**



  parent reply	other threads:[~2025-10-30 20:38 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-13 15:26 [PATCH v5 0/9] vfio/pci: Allow MMIO regions to be exported through dma-buf Leon Romanovsky
2025-10-13 15:26 ` [PATCH v5 1/9] PCI/P2PDMA: Separate the mmap() support from the core logic Leon Romanovsky
2025-10-17  6:30   ` Christoph Hellwig
2025-10-17 11:53     ` Jason Gunthorpe
2025-10-20 12:27       ` Christoph Hellwig
2025-10-20 12:58         ` Jason Gunthorpe
2025-10-20 15:04           ` Leon Romanovsky
2025-10-22  7:10           ` Christoph Hellwig
2025-10-22 11:43             ` Jason Gunthorpe
2025-10-13 15:26 ` [PATCH v5 2/9] PCI/P2PDMA: Simplify bus address mapping API Leon Romanovsky
2025-10-13 15:26 ` [PATCH v5 3/9] PCI/P2PDMA: Refactor to separate core P2P functionality from memory allocation Leon Romanovsky
2025-10-13 15:26 ` [PATCH v5 4/9] PCI/P2PDMA: Export pci_p2pdma_map_type() function Leon Romanovsky
2025-10-17  6:31   ` Christoph Hellwig
2025-10-17 12:14     ` Jason Gunthorpe
2025-10-20 12:29       ` Christoph Hellwig
2025-10-20 13:14         ` Jason Gunthorpe
2025-10-13 15:26 ` [PATCH v5 5/9] types: move phys_vec definition to common header Leon Romanovsky
2025-10-13 15:26 ` [PATCH v5 6/9] vfio: Export vfio device get and put registration helpers Leon Romanovsky
2025-10-13 15:26 ` [PATCH v5 7/9] vfio/pci: Share the core device pointer while invoking feature functions Leon Romanovsky
2025-10-13 15:26 ` [PATCH v5 8/9] vfio/pci: Enable peer-to-peer DMA transactions by default Leon Romanovsky
2025-10-16  4:09   ` Nicolin Chen
2025-10-16  6:10     ` Leon Romanovsky
2025-10-17  6:32   ` Christoph Hellwig
2025-10-17 11:55     ` Jason Gunthorpe
2025-10-20 12:28       ` Christoph Hellwig
2025-10-20 13:08         ` Jason Gunthorpe
2025-10-22  7:08           ` Christoph Hellwig
2025-10-22 11:38             ` Jason Gunthorpe
2025-10-22 11:54   ` Jason Gunthorpe
2025-10-13 15:26 ` [PATCH v5 9/9] vfio/pci: Add dma-buf export support for MMIO regions Leon Romanovsky
2025-10-16 23:53   ` Jason Gunthorpe
2025-10-17  5:40     ` Leon Romanovsky
2025-10-17 15:58       ` Jason Gunthorpe
2025-10-17 16:01         ` Jason Gunthorpe
2025-10-17  0:01   ` Jason Gunthorpe
2025-10-17  6:33   ` Christoph Hellwig
2025-10-17 12:16     ` Jason Gunthorpe
2025-10-17 13:02   ` Jason Gunthorpe
2025-10-17 16:13     ` Leon Romanovsky
2025-10-20 16:15       ` Jason Gunthorpe
2025-10-20 16:44         ` Leon Romanovsky
2025-10-20 16:51           ` Jason Gunthorpe
2025-10-17 23:40   ` Jason Gunthorpe
2025-10-22 12:50   ` Jason Gunthorpe
2025-10-26  7:55     ` Shuai Xue
2025-10-27 12:09       ` Jason Gunthorpe
2025-10-28 13:46         ` Shuai Xue
2025-10-27 23:13   ` David Matlack
2025-10-28 12:02     ` Leon Romanovsky
2025-10-30 22:28       ` Alex Mastro
2025-10-29 16:50   ` Alex Mastro
2025-10-29 18:21     ` Leon Romanovsky
2025-10-30  0:25   ` Samiullah Khawaja
2025-10-30  6:48     ` Leon Romanovsky
2025-10-30 12:57       ` Jason Gunthorpe
2025-10-30 20:38   ` Alex Williamson [this message]
2025-10-31  6:48     ` Leon Romanovsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251030143836.66cdf116@shazbot.org \
    --to=alex@shazbot.org \
    --cc=akpm@linux-foundation.org \
    --cc=alex.williamson@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=bhelgaas@google.com \
    --cc=christian.koenig@amd.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=iommu@lists.linux.dev \
    --cc=jgg@nvidia.com \
    --cc=joro@8bytes.org \
    --cc=kvm@vger.kernel.org \
    --cc=leon@kernel.org \
    --cc=leonro@nvidia.com \
    --cc=linaro-mm-sig@lists.linaro.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-media@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-pci@vger.kernel.org \
    --cc=logang@deltatee.com \
    --cc=m.szyprowski@samsung.com \
    --cc=robin.murphy@arm.com \
    --cc=sumit.semwal@linaro.org \
    --cc=vivek.kasireddy@intel.com \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox