Re: [PATCH v1 2/4] nouveau/dmem: HMM P2P DMA for private dev pages

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Yonatan Maman <ymaman@nvidia.com>
To: Alistair Popple <apopple@nvidia.com>
Cc: nouveau@lists.freedesktop.org, linux-kernel@vger.kernel.org,
	linux-rdma@vger.kernel.org, linux-mm@kvack.org,
	herbst@redhat.com, lyude@redhat.com, dakr@redhat.com,
	airlied@gmail.com, simona@ffwll.ch, jgg@ziepe.ca,
	leon@kernel.org, jglisse@redhat.com, akpm@linux-foundation.org,
	dri-devel@lists.freedesktop.org, bskeggs@nvidia.com,
	Gal Shalom <GalShalom@Nvidia.com>
Subject: Re: [PATCH v1 2/4] nouveau/dmem: HMM P2P DMA for private dev pages
Date: Wed, 16 Oct 2024 18:18:58 +0300	[thread overview]
Message-ID: <7f09fb43-8e0e-4746-b93d-2d09efe52d88@nvidia.com> (raw)
In-Reply-To: <87bjzk8viu.fsf@nvdebian.thelocal>



On 16/10/2024 8:12, Alistair Popple wrote:
> 
> Yonatan Maman <ymaman@nvidia.com> writes:
> 
>> From: Yonatan Maman <Ymaman@Nvidia.com>
>>
>> Enabling Peer-to-Peer DMA (P2P DMA) access in GPU-centric applications
>> is crucial for minimizing data transfer overhead (e.g., for RDMA use-
>> case).
>>
>> This change aims to enable that capability for Nouveau over HMM device
>> private pages. P2P DMA for private device pages allows the GPU to
>> directly exchange data with other devices (e.g., NICs) without needing
>> to traverse system RAM.
>>
>> To fully support Peer-to-Peer for device private pages, the following
>> changes are made:
>>
>>   - Introduce struct nouveau_dmem_hmm_p2p within struct nouveau_dmem
>>     to manage BAR1 PCI P2P memory. p2p_start_addr holds the virtual
>>     address allocated with pci_alloc_p2pmem(), and p2p_size represents
>>     the allocated size of the PCI P2P memory.
>>
>>   - nouveau_dmem_init - Ensure BAR1 accessibility and assign struct
>>     pages (PCI_P2P_PAGE) for all BAR1 pages. Introduce
>>     nouveau_alloc_bar1_pci_p2p_mem in nouveau_dmem to expose BAR1 for
>>     use as P2P memory via pci_p2pdma_add_resource and implement static
>>     allocation and assignment of struct pages using pci_alloc_p2pmem.
>>     This function will be called from nouveau_dmem_init, and failure
>>     triggers a warning message instead of driver failure.
>>
>>   - nouveau_dmem_fini - Ensure BAR1 PCI P2P memory is properly
>>     destroyed during driver cleanup. Introduce
>>     nouveau_destroy_bar1_pci_p2p_mem to handle freeing of PCI P2P
>>     memory associated with Nouveau BAR1. Modify nouveau_dmem_fini to
>>     call nouveau_destroy_bar1_pci_p2p_mem.
>>
>>   - Implement Nouveau `p2p_page` callback function - Implement BAR1
>>     mapping for the chunk using `io_mem_reserve` if no mapping exists.
>>     Retrieve the pre-allocated P2P virtual address and size from
>>     `hmm_p2p`. Calculate the page offset within BAR1 and return the
>>     corresponding P2P page.
>>
>> Signed-off-by: Yonatan Maman <Ymaman@Nvidia.com>
>> Reviewed-by: Gal Shalom <GalShalom@Nvidia.com>
>> ---
>>   drivers/gpu/drm/nouveau/nouveau_dmem.c | 117 ++++++++++++++++++++++++-
>>   1 file changed, 115 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
>> index 1a072568cef6..13fb8671f212 100644
>> --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
>> +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
>> @@ -40,6 +40,9 @@
>>   #include <linux/hmm.h>
>>   #include <linux/memremap.h>
>>   #include <linux/migrate.h>
>> +#include <linux/pci-p2pdma.h>
>> +#include <nvkm/core/pci.h>
>> +
>>   
>>   /*
>>    * FIXME: this is ugly right now we are using TTM to allocate vram and we pin
>> @@ -77,9 +80,15 @@ struct nouveau_dmem_migrate {
>>   	struct nouveau_channel *chan;
>>   };
>>   
>> +struct nouveau_dmem_hmm_p2p {
>> +	size_t p2p_size;
>> +	void *p2p_start_addr;
>> +};
>> +
>>   struct nouveau_dmem {
>>   	struct nouveau_drm *drm;
>>   	struct nouveau_dmem_migrate migrate;
>> +	struct nouveau_dmem_hmm_p2p hmm_p2p;
>>   	struct list_head chunks;
>>   	struct mutex mutex;
>>   	struct page *free_pages;
>> @@ -158,6 +167,61 @@ static int nouveau_dmem_copy_one(struct nouveau_drm *drm, struct page *spage,
>>   	return 0;
>>   }
>>   
>> +static int nouveau_dmem_bar1_mapping(struct nouveau_bo *nvbo,
>> +				     unsigned long long *bus_addr)
>> +{
>> +	int ret;
>> +	struct ttm_resource *mem = nvbo->bo.resource;
>> +
>> +	if (mem->bus.offset) {
>> +		*bus_addr = mem->bus.offset;
>> +		return 0;
>> +	}
>> +
>> +	if (PFN_UP(nvbo->bo.base.size) > PFN_UP(nvbo->bo.resource->size))
>> +		return -EINVAL;
>> +
>> +	ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL);
>> +	if (ret)
>> +		return ret;
>> +
>> +	ret = nvbo->bo.bdev->funcs->io_mem_reserve(nvbo->bo.bdev, mem);
>> +	*bus_addr = mem->bus.offset;
>> +
>> +	ttm_bo_unreserve(&nvbo->bo);
>> +	return ret;
>> +}
>> +
>> +static struct page *nouveau_dmem_get_dma_page(struct page *private_page)
>> +{
>> +	int ret;
>> +	unsigned long long offset_in_chunk, offset_in_bar1;
>> +	unsigned long long chunk_bus_addr, page_bus_addr;
>> +	unsigned long long bar1_base_addr;
>> +	struct nouveau_drm *drm = page_to_drm(private_page);
>> +	struct nouveau_bo *nvbo = nouveau_page_to_chunk(private_page)->bo;
>> +	struct nvkm_device *nv_device = nvxx_device(drm);
>> +	void *p2p_start_addr = drm->dmem->hmm_p2p.p2p_start_addr;
>> +	size_t p2p_size = drm->dmem->hmm_p2p.p2p_size;
>> +
>> +	bar1_base_addr = nv_device->func->resource_addr(nv_device, 1);
>> +	offset_in_chunk =
>> +		(page_to_pfn(private_page) << PAGE_SHIFT) -
>> +		nouveau_page_to_chunk(private_page)->pagemap.range.start;
>> +
>> +	ret = nouveau_dmem_bar1_mapping(nvbo, &chunk_bus_addr);
>> +	if (ret)
>> +		return ERR_PTR(ret);
>> +
>> +	page_bus_addr = chunk_bus_addr + offset_in_chunk;
>> +	if (!p2p_size || page_bus_addr > bar1_base_addr + p2p_size ||
>> +	    page_bus_addr < bar1_base_addr)
>> +		return ERR_PTR(-ENOMEM);
>> +
>> +	offset_in_bar1 = page_bus_addr - bar1_base_addr;
>> +	return virt_to_page(p2p_start_addr + offset_in_bar1);
> 
> This conversion looks a bit complicated. Once you have page_bus_addr I
> think you can just return pfn_to_page(page_bus_addr >> PAGE_SHIFT)
> 

Agree, I will fix that (v2)

>> +}
>> +
>>   static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
>>   {
>>   	struct nouveau_drm *drm = page_to_drm(vmf->page);
>> @@ -219,8 +283,9 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
>>   }
>>   
>>   static const struct dev_pagemap_ops nouveau_dmem_pagemap_ops = {
>> -	.page_free		= nouveau_dmem_page_free,
>> -	.migrate_to_ram		= nouveau_dmem_migrate_to_ram,
>> +	.page_free		 = nouveau_dmem_page_free,
>> +	.migrate_to_ram		 = nouveau_dmem_migrate_to_ram,
>> +	.get_dma_page_for_device = nouveau_dmem_get_dma_page,
>>   };
>>   
>>   static int
>> @@ -413,14 +478,31 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk)
>>   	kvfree(dma_addrs);
>>   }
>>   
>> +static void nouveau_destroy_bar1_pci_p2p_mem(struct nouveau_drm *drm,
>> +					     struct pci_dev *pdev,
>> +					     void *p2p_start_addr,
>> +					     size_t p2p_size)
>> +{
>> +	if (p2p_size)
>> +		pci_free_p2pmem(pdev, p2p_start_addr, p2p_size);
>> +
>> +	NV_INFO(drm, "PCI P2P memory freed(%p)\n", p2p_start_addr);
>> +}
>> +
>>   void
>>   nouveau_dmem_fini(struct nouveau_drm *drm)
>>   {
>>   	struct nouveau_dmem_chunk *chunk, *tmp;
>> +	struct nvkm_device *nv_device = nvxx_device(drm);
>>   
>>   	if (drm->dmem == NULL)
>>   		return;
>>   
>> +	nouveau_destroy_bar1_pci_p2p_mem(drm,
>> +					 nv_device->func->pci(nv_device)->pdev,
>> +					 drm->dmem->hmm_p2p.p2p_start_addr,
>> +					 drm->dmem->hmm_p2p.p2p_size);
>> +
>>   	mutex_lock(&drm->dmem->mutex);
>>   
>>   	list_for_each_entry_safe(chunk, tmp, &drm->dmem->chunks, list) {
>> @@ -586,10 +668,30 @@ nouveau_dmem_migrate_init(struct nouveau_drm *drm)
>>   	return -ENODEV;
>>   }
>>   
>> +static int nouveau_alloc_bar1_pci_p2p_mem(struct nouveau_drm *drm,
>> +					  struct pci_dev *pdev, size_t size,
>> +					  void **pp2p_start_addr,
>> +					  size_t *pp2p_size)
>> +{
>> +	int ret;
>> +
>> +	ret = pci_p2pdma_add_resource(pdev, 1, size, 0);
>> +	if (ret)
>> +		return ret;
>> +
>> +	*pp2p_start_addr = pci_alloc_p2pmem(pdev, size);
>> +	*pp2p_size = (*pp2p_start_addr) ? size : 0;
> 
> Why return the size here? Personally I think it would be clearer to have
> the caller directly initialise/clear whatever struct values it needs.
> 

Agree, I will fix that (v2)

>> +
>> +	NV_INFO(drm, "PCI P2P memory allocated(%p)\n", *pp2p_start_addr);
>> +	return 0;
>> +}
>> +
>>   void
>>   nouveau_dmem_init(struct nouveau_drm *drm)
>>   {
>>   	int ret;
>> +	struct nvkm_device *nv_device = nvxx_device(drm);
>> +	size_t bar1_size;
>>   
>>   	/* This only make sense on PASCAL or newer */
>>   	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_PASCAL)
>> @@ -610,6 +712,17 @@ nouveau_dmem_init(struct nouveau_drm *drm)
>>   		kfree(drm->dmem);
>>   		drm->dmem = NULL;
>>   	}
>> +
>> +	/* Expose BAR1 for HMM P2P Memory */
>> +	bar1_size = nv_device->func->resource_size(nv_device, 1);
>> +	ret = nouveau_alloc_bar1_pci_p2p_mem(drm,
>> +					     nv_device->func->pci(nv_device)->pdev,
>> +					     bar1_size,
>> +					     &drm->dmem->hmm_p2p.p2p_start_addr,
>> +					     &drm->dmem->hmm_p2p.p2p_size);
>> +	if (ret)
>> +		NV_WARN(drm,
>> +			"PCI P2P memory allocation failed, HMM P2P won't be supported\n");
>>   }
>>   
>>   static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm,
>

next prev parent reply	other threads:[~2024-10-16 15:19 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-10-15 15:23 [PATCH v1 0/4] GPU Direct RDMA (P2P DMA) for Device Private Pages Yonatan Maman
2024-10-15 15:23 ` [PATCH v1 1/4] mm/hmm: HMM API for P2P DMA to device zone pages Yonatan Maman
2024-10-16  4:49   ` Christoph Hellwig
2024-10-16 15:04     ` Yonatan Maman
2024-10-16 15:44     ` Jason Gunthorpe
2024-10-16 16:41       ` Christoph Hellwig
2024-10-16 17:44         ` Jason Gunthorpe
2024-10-17 11:58           ` Christoph Hellwig
2024-10-17 13:05             ` Jason Gunthorpe
2024-10-17 13:12               ` Christoph Hellwig
2024-10-17 13:46                 ` Jason Gunthorpe
2024-10-17 13:49                   ` Christoph Hellwig
2024-10-17 14:05                     ` Jason Gunthorpe
2024-10-17 14:19                       ` Christoph Hellwig
2024-10-16  5:10   ` Alistair Popple
2024-10-16 15:45     ` Jason Gunthorpe
2024-10-17  1:58       ` Alistair Popple
2024-10-17 11:53         ` Jason Gunthorpe
2024-10-15 15:23 ` [PATCH v1 2/4] nouveau/dmem: HMM P2P DMA for private dev pages Yonatan Maman
2024-10-16  5:12   ` Alistair Popple
2024-10-16 15:18     ` Yonatan Maman [this message]
2024-10-15 15:23 ` [PATCH v1 3/4] IB/core: P2P DMA for device private pages Yonatan Maman
2024-10-15 15:23 ` [PATCH v1 4/4] RDMA/mlx5: Enabling ATS for ODP memory Yonatan Maman
2024-10-16  4:23 ` [PATCH v1 0/4] GPU Direct RDMA (P2P DMA) for Device Private Pages Christoph Hellwig
2024-10-16 15:16   ` Yonatan Maman
2024-10-16 22:22     ` Alistair Popple
2024-10-18  7:26     ` Zhu Yanjun
2024-10-20 15:26       ` Yonatan Maman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7f09fb43-8e0e-4746-b93d-2d09efe52d88@nvidia.com \
    --to=ymaman@nvidia.com \
    --cc=GalShalom@Nvidia.com \
    --cc=airlied@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=apopple@nvidia.com \
    --cc=bskeggs@nvidia.com \
    --cc=dakr@redhat.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=herbst@redhat.com \
    --cc=jgg@ziepe.ca \
    --cc=jglisse@redhat.com \
    --cc=leon@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=lyude@redhat.com \
    --cc=nouveau@lists.freedesktop.org \
    --cc=simona@ffwll.ch \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox