Re: [PATCH v2 2/4] drm/xe/userptr: Convert invalidation to two-pass MMU notifier

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Matthew Brost <matthew.brost@intel.com>
To: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
Cc: intel-xe@lists.freedesktop.org,
	"Christian König" <christian.koenig@amd.com>,
	dri-devel@lists.freedesktop.org, "Jason Gunthorpe" <jgg@ziepe.ca>,
	"Andrew Morton" <akpm@linux-foundation.org>,
	"Simona Vetter" <simona.vetter@ffwll.ch>,
	"Dave Airlie" <airlied@gmail.com>,
	"Alistair Popple" <apopple@nvidia.com>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH v2 2/4] drm/xe/userptr: Convert invalidation to two-pass MMU notifier
Date: Mon, 2 Mar 2026 10:57:54 -0800	[thread overview]
Message-ID: <aaXdsiRlux/cZ6WC@lstrano-desk.jf.intel.com> (raw)
In-Reply-To: <20260302163248.105454-3-thomas.hellstrom@linux.intel.com>

On Mon, Mar 02, 2026 at 05:32:46PM +0100, Thomas Hellström wrote:
> In multi-GPU scenarios, asynchronous GPU job latency is a bottleneck if
> each notifier waits for its own GPU before returning. The two-pass
> mmu_interval_notifier infrastructure allows deferring the wait to a
> second pass, so all GPUs can be signalled in the first pass before
> any of them are waited on.
> 
> Convert the userptr invalidation to use the two-pass model:
> 
> Use invalidate_start as the first pass to mark the VMA for repin and
> enable software signalling on the VM reservation fences to start any
> gpu work needed for signaling. Fall back to completing the work
> synchronously if all fences are already signalled, or if a concurrent
> invalidation is already using the embedded finish structure.
> 
> Use invalidate_finish as the second pass to wait for the reservation
> fences to complete, invalidate the GPU TLB in fault mode, and unmap
> the gpusvm pages.
> 
> Embed a struct mmu_interval_notifier_finish in struct xe_userptr to
> avoid dynamic allocation in the notifier callback. Use a finish_inuse
> flag to prevent two concurrent invalidations from using it
> simultaneously; fall back to the synchronous path for the second caller.
>

A couple nits below. Also for clarity, I'd probably rework this series...

 - Move patch #3 to 2nd to patch
 - Squash patch #2, #4 into a single patch, make thia the last patch

Let me know what you think on the reordering. I'm looking with the
series applied and aside from nits below everything in xe_userptr.c
looks good to me.
 
> Assisted-by: GitHub Copilot:claude-sonnet-4.6
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
>  drivers/gpu/drm/xe/xe_userptr.c | 96 +++++++++++++++++++++++++--------
>  drivers/gpu/drm/xe/xe_userptr.h | 14 +++++
>  2 files changed, 88 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_userptr.c b/drivers/gpu/drm/xe/xe_userptr.c
> index e120323c43bc..440b0a79d16f 100644
> --- a/drivers/gpu/drm/xe/xe_userptr.c
> +++ b/drivers/gpu/drm/xe/xe_userptr.c
> @@ -73,18 +73,42 @@ int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
>  				    &ctx);
>  }
>  
> -static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma)
> +static void xe_vma_userptr_do_inval(struct xe_vm *vm, struct xe_userptr_vma *uvma,
> +				    bool is_deferred)
>  {
>  	struct xe_userptr *userptr = &uvma->userptr;
>  	struct xe_vma *vma = &uvma->vma;
> -	struct dma_resv_iter cursor;
> -	struct dma_fence *fence;
>  	struct drm_gpusvm_ctx ctx = {
>  		.in_notifier = true,
>  		.read_only = xe_vma_read_only(vma),
>  	};
>  	long err;
>  

xe_svm_assert_in_notifier(vm);

> +	err = dma_resv_wait_timeout(xe_vm_resv(vm),
> +				    DMA_RESV_USAGE_BOOKKEEP,
> +				    false, MAX_SCHEDULE_TIMEOUT);
> +	XE_WARN_ON(err <= 0);
> +
> +	if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) {
> +		err = xe_vm_invalidate_vma(vma);
> +		XE_WARN_ON(err);
> +	}
> +
> +	if (is_deferred)
> +		userptr->finish_inuse = false;
> +	drm_gpusvm_unmap_pages(&vm->svm.gpusvm, &uvma->userptr.pages,
> +			       xe_vma_size(vma) >> PAGE_SHIFT, &ctx);
> +}
> +
> +static struct mmu_interval_notifier_finish *
> +xe_vma_userptr_invalidate_pass1(struct xe_vm *vm, struct xe_userptr_vma *uvma)
> +{
> +	struct xe_userptr *userptr = &uvma->userptr;
> +	struct xe_vma *vma = &uvma->vma;
> +	struct dma_resv_iter cursor;
> +	struct dma_fence *fence;
> +	bool signaled = true;
> +

xe_svm_assert_in_notifier(vm);

>  	/*
>  	 * Tell exec and rebind worker they need to repin and rebind this
>  	 * userptr.
> @@ -105,27 +129,32 @@ static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uv
>  	 */
>  	dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
>  			    DMA_RESV_USAGE_BOOKKEEP);
> -	dma_resv_for_each_fence_unlocked(&cursor, fence)
> +	dma_resv_for_each_fence_unlocked(&cursor, fence) {
>  		dma_fence_enable_sw_signaling(fence);
> +		if (signaled && !dma_fence_is_signaled(fence))
> +			signaled = false;
> +	}
>  	dma_resv_iter_end(&cursor);
>  
> -	err = dma_resv_wait_timeout(xe_vm_resv(vm),
> -				    DMA_RESV_USAGE_BOOKKEEP,
> -				    false, MAX_SCHEDULE_TIMEOUT);
> -	XE_WARN_ON(err <= 0);
> -
> -	if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) {
> -		err = xe_vm_invalidate_vma(vma);
> -		XE_WARN_ON(err);
> +	/*
> +	 * Only one caller at a time can use the multi-pass state.
> +	 * If it's already in use, or all fences are already signaled,
> +	 * proceed directly to invalidation without deferring.
> +	 */
> +	if (signaled || userptr->finish_inuse) {
> +		xe_vma_userptr_do_inval(vm, uvma, false);
> +		return NULL;
>  	}
>  
> -	drm_gpusvm_unmap_pages(&vm->svm.gpusvm, &uvma->userptr.pages,
> -			       xe_vma_size(vma) >> PAGE_SHIFT, &ctx);
> +	userptr->finish_inuse = true;
> +
> +	return &userptr->finish;
>  }
>  
> -static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
> -				   const struct mmu_notifier_range *range,
> -				   unsigned long cur_seq)
> +static bool xe_vma_userptr_invalidate_start(struct mmu_interval_notifier *mni,
> +					    const struct mmu_notifier_range *range,
> +					    unsigned long cur_seq,
> +					    struct mmu_interval_notifier_finish **p_finish)
>  {
>  	struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier);
>  	struct xe_vma *vma = &uvma->vma;
> @@ -138,21 +167,40 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
>  		return false;
>  
>  	vm_dbg(&xe_vma_vm(vma)->xe->drm,
> -	       "NOTIFIER: addr=0x%016llx, range=0x%016llx",
> +	       "NOTIFIER PASS1: addr=0x%016llx, range=0x%016llx",
>  		xe_vma_start(vma), xe_vma_size(vma));
>  
>  	down_write(&vm->svm.gpusvm.notifier_lock);
>  	mmu_interval_set_seq(mni, cur_seq);
>  
> -	__vma_userptr_invalidate(vm, uvma);
> +	*p_finish = xe_vma_userptr_invalidate_pass1(vm, uvma);
> +
>  	up_write(&vm->svm.gpusvm.notifier_lock);
> -	trace_xe_vma_userptr_invalidate_complete(vma);
> +	if (!*p_finish)
> +		trace_xe_vma_userptr_invalidate_complete(vma);
>  
>  	return true;
>  }
>  
> +static void xe_vma_userptr_invalidate_finish(struct mmu_interval_notifier_finish *finish)
> +{
> +	struct xe_userptr_vma *uvma = container_of(finish, typeof(*uvma), userptr.finish);
> +	struct xe_vma *vma = &uvma->vma;
> +	struct xe_vm *vm = xe_vma_vm(vma);
> +
> +	vm_dbg(&xe_vma_vm(vma)->xe->drm,
> +	       "NOTIFIER PASS2: addr=0x%016llx, range=0x%016llx",
> +		xe_vma_start(vma), xe_vma_size(vma));
> +
> +	down_write(&vm->svm.gpusvm.notifier_lock);
> +	xe_vma_userptr_do_inval(vm, uvma, true);
> +	up_write(&vm->svm.gpusvm.notifier_lock);
> +	trace_xe_vma_userptr_invalidate_complete(vma);
> +}
> +
>  static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
> -	.invalidate = vma_userptr_invalidate,
> +	.invalidate_start = xe_vma_userptr_invalidate_start,
> +	.invalidate_finish = xe_vma_userptr_invalidate_finish,
>  };
>  
>  #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
> @@ -164,6 +212,7 @@ static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
>   */
>  void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
>  {
> +	static struct mmu_interval_notifier_finish *finish;
>  	struct xe_vm *vm = xe_vma_vm(&uvma->vma);
>  
>  	/* Protect against concurrent userptr pinning */
> @@ -179,7 +228,10 @@ void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
>  	if (!mmu_interval_read_retry(&uvma->userptr.notifier,
>  				     uvma->userptr.pages.notifier_seq))
>  		uvma->userptr.pages.notifier_seq -= 2;
> -	__vma_userptr_invalidate(vm, uvma);
> +
> +	finish = xe_vma_userptr_invalidate_pass1(vm, uvma);
> +	if (finish)
> +		xe_vma_userptr_do_inval(vm, uvma, true);
>  }
>  #endif
>  
> diff --git a/drivers/gpu/drm/xe/xe_userptr.h b/drivers/gpu/drm/xe/xe_userptr.h
> index ef801234991e..4f42db61fd62 100644
> --- a/drivers/gpu/drm/xe/xe_userptr.h
> +++ b/drivers/gpu/drm/xe/xe_userptr.h
> @@ -57,12 +57,26 @@ struct xe_userptr {
>  	 */
>  	struct mmu_interval_notifier notifier;
>  
> +	/**
> +	 * @finish: MMU notifier finish structure for two-pass invalidation.
> +	 * Embedded here to avoid allocation in the notifier callback.
> +	 * Protected by @vm::svm.gpusvm.notifier_lock.
> +	 */
> +	struct mmu_interval_notifier_finish finish;
> +	/**
> +	 * @finish_inuse: Whether @finish is currently in use by an in-progress
> +	 * two-pass invalidation.
> +	 * Protected by @vm::svm.gpusvm.notifier_lock.
> +	 */
> +	bool finish_inuse;
> +
>  	/**
>  	 * @initial_bind: user pointer has been bound at least once.
>  	 * write: vm->svm.gpusvm.notifier_lock in read mode and vm->resv held.
>  	 * read: vm->svm.gpusvm.notifier_lock in write mode or vm->resv held.
>  	 */
>  	bool initial_bind;
> +

Unrelated.

Matt

>  #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
>  	u32 divisor;
>  #endif
> -- 
> 2.53.0
>

next prev parent reply	other threads:[~2026-03-02 18:58 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-02 16:32 [PATCH v2 0/4] Two-pass MMU interval notifiers Thomas Hellström
2026-03-02 16:32 ` [PATCH v2 1/4] mm/mmu_notifier: Allow two-pass struct mmu_interval_notifiers Thomas Hellström
2026-03-02 19:48   ` Matthew Brost
2026-03-02 21:12     ` Thomas Hellström
2026-03-02 16:32 ` [PATCH v2 2/4] drm/xe/userptr: Convert invalidation to two-pass MMU notifier Thomas Hellström
2026-03-02 18:57   ` Matthew Brost [this message]
2026-03-02 21:22     ` Thomas Hellström
2026-03-02 21:30       ` Matthew Brost
2026-03-02 16:32 ` [PATCH v2 3/4] drm/xe: Split TLB invalidation into submit and wait steps Thomas Hellström
2026-03-02 19:06   ` Matthew Brost
2026-03-02 21:29     ` Thomas Hellström
2026-03-02 21:31       ` Matthew Brost
2026-03-02 16:32 ` [PATCH v2 4/4] drm/xe/userptr: Defer Waiting for TLB invalidation to the second pass if possible Thomas Hellström
2026-03-02 19:14   ` Matthew Brost
2026-03-02 21:33     ` Thomas Hellström

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aaXdsiRlux/cZ6WC@lstrano-desk.jf.intel.com \
    --to=matthew.brost@intel.com \
    --cc=airlied@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=apopple@nvidia.com \
    --cc=christian.koenig@amd.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=jgg@ziepe.ca \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=simona.vetter@ffwll.ch \
    --cc=thomas.hellstrom@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox