From: Matthew Brost <matthew.brost@intel.com>
To: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
Cc: intel-xe@lists.freedesktop.org,
"Christian König" <christian.koenig@amd.com>,
dri-devel@lists.freedesktop.org, "Jason Gunthorpe" <jgg@ziepe.ca>,
"Andrew Morton" <akpm@linux-foundation.org>,
"Simona Vetter" <simona.vetter@ffwll.ch>,
"Dave Airlie" <airlied@gmail.com>,
"Alistair Popple" <apopple@nvidia.com>,
linux-mm@kvack.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH v2 2/4] drm/xe/userptr: Convert invalidation to two-pass MMU notifier
Date: Mon, 2 Mar 2026 10:57:54 -0800 [thread overview]
Message-ID: <aaXdsiRlux/cZ6WC@lstrano-desk.jf.intel.com> (raw)
In-Reply-To: <20260302163248.105454-3-thomas.hellstrom@linux.intel.com>
On Mon, Mar 02, 2026 at 05:32:46PM +0100, Thomas Hellström wrote:
> In multi-GPU scenarios, asynchronous GPU job latency is a bottleneck if
> each notifier waits for its own GPU before returning. The two-pass
> mmu_interval_notifier infrastructure allows deferring the wait to a
> second pass, so all GPUs can be signalled in the first pass before
> any of them are waited on.
>
> Convert the userptr invalidation to use the two-pass model:
>
> Use invalidate_start as the first pass to mark the VMA for repin and
> enable software signalling on the VM reservation fences to start any
> gpu work needed for signaling. Fall back to completing the work
> synchronously if all fences are already signalled, or if a concurrent
> invalidation is already using the embedded finish structure.
>
> Use invalidate_finish as the second pass to wait for the reservation
> fences to complete, invalidate the GPU TLB in fault mode, and unmap
> the gpusvm pages.
>
> Embed a struct mmu_interval_notifier_finish in struct xe_userptr to
> avoid dynamic allocation in the notifier callback. Use a finish_inuse
> flag to prevent two concurrent invalidations from using it
> simultaneously; fall back to the synchronous path for the second caller.
>
A couple nits below. Also for clarity, I'd probably rework this series...
- Move patch #3 to 2nd to patch
- Squash patch #2, #4 into a single patch, make thia the last patch
Let me know what you think on the reordering. I'm looking with the
series applied and aside from nits below everything in xe_userptr.c
looks good to me.
> Assisted-by: GitHub Copilot:claude-sonnet-4.6
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/xe/xe_userptr.c | 96 +++++++++++++++++++++++++--------
> drivers/gpu/drm/xe/xe_userptr.h | 14 +++++
> 2 files changed, 88 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_userptr.c b/drivers/gpu/drm/xe/xe_userptr.c
> index e120323c43bc..440b0a79d16f 100644
> --- a/drivers/gpu/drm/xe/xe_userptr.c
> +++ b/drivers/gpu/drm/xe/xe_userptr.c
> @@ -73,18 +73,42 @@ int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
> &ctx);
> }
>
> -static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma)
> +static void xe_vma_userptr_do_inval(struct xe_vm *vm, struct xe_userptr_vma *uvma,
> + bool is_deferred)
> {
> struct xe_userptr *userptr = &uvma->userptr;
> struct xe_vma *vma = &uvma->vma;
> - struct dma_resv_iter cursor;
> - struct dma_fence *fence;
> struct drm_gpusvm_ctx ctx = {
> .in_notifier = true,
> .read_only = xe_vma_read_only(vma),
> };
> long err;
>
xe_svm_assert_in_notifier(vm);
> + err = dma_resv_wait_timeout(xe_vm_resv(vm),
> + DMA_RESV_USAGE_BOOKKEEP,
> + false, MAX_SCHEDULE_TIMEOUT);
> + XE_WARN_ON(err <= 0);
> +
> + if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) {
> + err = xe_vm_invalidate_vma(vma);
> + XE_WARN_ON(err);
> + }
> +
> + if (is_deferred)
> + userptr->finish_inuse = false;
> + drm_gpusvm_unmap_pages(&vm->svm.gpusvm, &uvma->userptr.pages,
> + xe_vma_size(vma) >> PAGE_SHIFT, &ctx);
> +}
> +
> +static struct mmu_interval_notifier_finish *
> +xe_vma_userptr_invalidate_pass1(struct xe_vm *vm, struct xe_userptr_vma *uvma)
> +{
> + struct xe_userptr *userptr = &uvma->userptr;
> + struct xe_vma *vma = &uvma->vma;
> + struct dma_resv_iter cursor;
> + struct dma_fence *fence;
> + bool signaled = true;
> +
xe_svm_assert_in_notifier(vm);
> /*
> * Tell exec and rebind worker they need to repin and rebind this
> * userptr.
> @@ -105,27 +129,32 @@ static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uv
> */
> dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
> DMA_RESV_USAGE_BOOKKEEP);
> - dma_resv_for_each_fence_unlocked(&cursor, fence)
> + dma_resv_for_each_fence_unlocked(&cursor, fence) {
> dma_fence_enable_sw_signaling(fence);
> + if (signaled && !dma_fence_is_signaled(fence))
> + signaled = false;
> + }
> dma_resv_iter_end(&cursor);
>
> - err = dma_resv_wait_timeout(xe_vm_resv(vm),
> - DMA_RESV_USAGE_BOOKKEEP,
> - false, MAX_SCHEDULE_TIMEOUT);
> - XE_WARN_ON(err <= 0);
> -
> - if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) {
> - err = xe_vm_invalidate_vma(vma);
> - XE_WARN_ON(err);
> + /*
> + * Only one caller at a time can use the multi-pass state.
> + * If it's already in use, or all fences are already signaled,
> + * proceed directly to invalidation without deferring.
> + */
> + if (signaled || userptr->finish_inuse) {
> + xe_vma_userptr_do_inval(vm, uvma, false);
> + return NULL;
> }
>
> - drm_gpusvm_unmap_pages(&vm->svm.gpusvm, &uvma->userptr.pages,
> - xe_vma_size(vma) >> PAGE_SHIFT, &ctx);
> + userptr->finish_inuse = true;
> +
> + return &userptr->finish;
> }
>
> -static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
> - const struct mmu_notifier_range *range,
> - unsigned long cur_seq)
> +static bool xe_vma_userptr_invalidate_start(struct mmu_interval_notifier *mni,
> + const struct mmu_notifier_range *range,
> + unsigned long cur_seq,
> + struct mmu_interval_notifier_finish **p_finish)
> {
> struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier);
> struct xe_vma *vma = &uvma->vma;
> @@ -138,21 +167,40 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
> return false;
>
> vm_dbg(&xe_vma_vm(vma)->xe->drm,
> - "NOTIFIER: addr=0x%016llx, range=0x%016llx",
> + "NOTIFIER PASS1: addr=0x%016llx, range=0x%016llx",
> xe_vma_start(vma), xe_vma_size(vma));
>
> down_write(&vm->svm.gpusvm.notifier_lock);
> mmu_interval_set_seq(mni, cur_seq);
>
> - __vma_userptr_invalidate(vm, uvma);
> + *p_finish = xe_vma_userptr_invalidate_pass1(vm, uvma);
> +
> up_write(&vm->svm.gpusvm.notifier_lock);
> - trace_xe_vma_userptr_invalidate_complete(vma);
> + if (!*p_finish)
> + trace_xe_vma_userptr_invalidate_complete(vma);
>
> return true;
> }
>
> +static void xe_vma_userptr_invalidate_finish(struct mmu_interval_notifier_finish *finish)
> +{
> + struct xe_userptr_vma *uvma = container_of(finish, typeof(*uvma), userptr.finish);
> + struct xe_vma *vma = &uvma->vma;
> + struct xe_vm *vm = xe_vma_vm(vma);
> +
> + vm_dbg(&xe_vma_vm(vma)->xe->drm,
> + "NOTIFIER PASS2: addr=0x%016llx, range=0x%016llx",
> + xe_vma_start(vma), xe_vma_size(vma));
> +
> + down_write(&vm->svm.gpusvm.notifier_lock);
> + xe_vma_userptr_do_inval(vm, uvma, true);
> + up_write(&vm->svm.gpusvm.notifier_lock);
> + trace_xe_vma_userptr_invalidate_complete(vma);
> +}
> +
> static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
> - .invalidate = vma_userptr_invalidate,
> + .invalidate_start = xe_vma_userptr_invalidate_start,
> + .invalidate_finish = xe_vma_userptr_invalidate_finish,
> };
>
> #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
> @@ -164,6 +212,7 @@ static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
> */
> void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
> {
> + static struct mmu_interval_notifier_finish *finish;
> struct xe_vm *vm = xe_vma_vm(&uvma->vma);
>
> /* Protect against concurrent userptr pinning */
> @@ -179,7 +228,10 @@ void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
> if (!mmu_interval_read_retry(&uvma->userptr.notifier,
> uvma->userptr.pages.notifier_seq))
> uvma->userptr.pages.notifier_seq -= 2;
> - __vma_userptr_invalidate(vm, uvma);
> +
> + finish = xe_vma_userptr_invalidate_pass1(vm, uvma);
> + if (finish)
> + xe_vma_userptr_do_inval(vm, uvma, true);
> }
> #endif
>
> diff --git a/drivers/gpu/drm/xe/xe_userptr.h b/drivers/gpu/drm/xe/xe_userptr.h
> index ef801234991e..4f42db61fd62 100644
> --- a/drivers/gpu/drm/xe/xe_userptr.h
> +++ b/drivers/gpu/drm/xe/xe_userptr.h
> @@ -57,12 +57,26 @@ struct xe_userptr {
> */
> struct mmu_interval_notifier notifier;
>
> + /**
> + * @finish: MMU notifier finish structure for two-pass invalidation.
> + * Embedded here to avoid allocation in the notifier callback.
> + * Protected by @vm::svm.gpusvm.notifier_lock.
> + */
> + struct mmu_interval_notifier_finish finish;
> + /**
> + * @finish_inuse: Whether @finish is currently in use by an in-progress
> + * two-pass invalidation.
> + * Protected by @vm::svm.gpusvm.notifier_lock.
> + */
> + bool finish_inuse;
> +
> /**
> * @initial_bind: user pointer has been bound at least once.
> * write: vm->svm.gpusvm.notifier_lock in read mode and vm->resv held.
> * read: vm->svm.gpusvm.notifier_lock in write mode or vm->resv held.
> */
> bool initial_bind;
> +
Unrelated.
Matt
> #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
> u32 divisor;
> #endif
> --
> 2.53.0
>
next prev parent reply other threads:[~2026-03-02 18:58 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-02 16:32 [PATCH v2 0/4] Two-pass MMU interval notifiers Thomas Hellström
2026-03-02 16:32 ` [PATCH v2 1/4] mm/mmu_notifier: Allow two-pass struct mmu_interval_notifiers Thomas Hellström
2026-03-02 19:48 ` Matthew Brost
2026-03-02 21:12 ` Thomas Hellström
2026-03-02 16:32 ` [PATCH v2 2/4] drm/xe/userptr: Convert invalidation to two-pass MMU notifier Thomas Hellström
2026-03-02 18:57 ` Matthew Brost [this message]
2026-03-02 21:22 ` Thomas Hellström
2026-03-02 21:30 ` Matthew Brost
2026-03-02 16:32 ` [PATCH v2 3/4] drm/xe: Split TLB invalidation into submit and wait steps Thomas Hellström
2026-03-02 19:06 ` Matthew Brost
2026-03-02 21:29 ` Thomas Hellström
2026-03-02 21:31 ` Matthew Brost
2026-03-02 16:32 ` [PATCH v2 4/4] drm/xe/userptr: Defer Waiting for TLB invalidation to the second pass if possible Thomas Hellström
2026-03-02 19:14 ` Matthew Brost
2026-03-02 21:33 ` Thomas Hellström
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=aaXdsiRlux/cZ6WC@lstrano-desk.jf.intel.com \
--to=matthew.brost@intel.com \
--cc=airlied@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=apopple@nvidia.com \
--cc=christian.koenig@amd.com \
--cc=dri-devel@lists.freedesktop.org \
--cc=intel-xe@lists.freedesktop.org \
--cc=jgg@ziepe.ca \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=simona.vetter@ffwll.ch \
--cc=thomas.hellstrom@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox