From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
To: Jason Gunthorpe <jgg@ziepe.ca>,
linux-mm@kvack.org, Jerome Glisse <jglisse@redhat.com>,
Ralph Campbell <rcampbell@nvidia.com>,
John Hubbard <jhubbard@nvidia.com>,
Felix.Kuehling@amd.com
Cc: linux-rdma@vger.kernel.org, dri-devel@lists.freedesktop.org,
amd-gfx@lists.freedesktop.org,
"Alex Deucher" <alexander.deucher@amd.com>,
"Ben Skeggs" <bskeggs@redhat.com>,
"Christian König" <christian.koenig@amd.com>,
"David Zhou" <David1.Zhou@amd.com>,
"Dennis Dalessandro" <dennis.dalessandro@intel.com>,
"Juergen Gross" <jgross@suse.com>,
"Mike Marciniszyn" <mike.marciniszyn@intel.com>,
"Oleksandr Andrushchenko" <oleksandr_andrushchenko@epam.com>,
"Petr Cvek" <petrcvekcz@gmail.com>,
"Stefano Stabellini" <sstabellini@kernel.org>,
nouveau@lists.freedesktop.org, xen-devel@lists.xenproject.org,
"Christoph Hellwig" <hch@infradead.org>,
"Jason Gunthorpe" <jgg@mellanox.com>
Subject: Re: [PATCH v2 09/15] xen/gntdev: use mmu_range_notifier_insert
Date: Wed, 30 Oct 2019 12:55:37 -0400 [thread overview]
Message-ID: <0355257f-6a3a-cdcd-d206-aec3df97dded@oracle.com> (raw)
In-Reply-To: <20191028201032.6352-10-jgg@ziepe.ca>
On 10/28/19 4:10 PM, Jason Gunthorpe wrote:
> From: Jason Gunthorpe <jgg@mellanox.com>
>
> gntdev simply wants to monitor a specific VMA for any notifier events,
> this can be done straightforwardly using mmu_range_notifier_insert() over
> the VMA's VA range.
>
> The notifier should be attached until the original VMA is destroyed.
>
> It is unclear if any of this is even sane, but at least a lot of duplicate
> code is removed.
I didn't have a chance to look at the patch itself yet but as a heads-up
--- it crashes dom0.
-boris
>
> Cc: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> Cc: xen-devel@lists.xenproject.org
> Cc: Juergen Gross <jgross@suse.com>
> Cc: Stefano Stabellini <sstabellini@kernel.org>
> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
> ---
> drivers/xen/gntdev-common.h | 8 +-
> drivers/xen/gntdev.c | 180 ++++++++++--------------------------
> 2 files changed, 49 insertions(+), 139 deletions(-)
>
> diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h
> index 2f8b949c3eeb14..b201fdd20b667b 100644
> --- a/drivers/xen/gntdev-common.h
> +++ b/drivers/xen/gntdev-common.h
> @@ -21,15 +21,8 @@ struct gntdev_dmabuf_priv;
> struct gntdev_priv {
> /* Maps with visible offsets in the file descriptor. */
> struct list_head maps;
> - /*
> - * Maps that are not visible; will be freed on munmap.
> - * Only populated if populate_freeable_maps == 1
> - */
> - struct list_head freeable_maps;
> /* lock protects maps and freeable_maps. */
> struct mutex lock;
> - struct mm_struct *mm;
> - struct mmu_notifier mn;
>
> #ifdef CONFIG_XEN_GRANT_DMA_ALLOC
> /* Device for which DMA memory is allocated. */
> @@ -49,6 +42,7 @@ struct gntdev_unmap_notify {
> };
>
> struct gntdev_grant_map {
> + struct mmu_range_notifier notifier;
> struct list_head next;
> struct vm_area_struct *vma;
> int index;
> diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
> index a446a7221e13e9..12d626670bebbc 100644
> --- a/drivers/xen/gntdev.c
> +++ b/drivers/xen/gntdev.c
> @@ -65,7 +65,6 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by "
> static atomic_t pages_mapped = ATOMIC_INIT(0);
>
> static int use_ptemod;
> -#define populate_freeable_maps use_ptemod
>
> static int unmap_grant_pages(struct gntdev_grant_map *map,
> int offset, int pages);
> @@ -251,12 +250,6 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
> evtchn_put(map->notify.event);
> }
>
> - if (populate_freeable_maps && priv) {
> - mutex_lock(&priv->lock);
> - list_del(&map->next);
> - mutex_unlock(&priv->lock);
> - }
> -
> if (map->pages && !use_ptemod)
> unmap_grant_pages(map, 0, map->count);
> gntdev_free_map(map);
> @@ -445,17 +438,9 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
> struct gntdev_priv *priv = file->private_data;
>
> pr_debug("gntdev_vma_close %p\n", vma);
> - if (use_ptemod) {
> - /* It is possible that an mmu notifier could be running
> - * concurrently, so take priv->lock to ensure that the vma won't
> - * vanishing during the unmap_grant_pages call, since we will
> - * spin here until that completes. Such a concurrent call will
> - * not do any unmapping, since that has been done prior to
> - * closing the vma, but it may still iterate the unmap_ops list.
> - */
> - mutex_lock(&priv->lock);
> + if (use_ptemod && map->vma == vma) {
> + mmu_range_notifier_remove(&map->notifier);
> map->vma = NULL;
> - mutex_unlock(&priv->lock);
> }
> vma->vm_private_data = NULL;
> gntdev_put_map(priv, map);
> @@ -477,109 +462,44 @@ static const struct vm_operations_struct gntdev_vmops = {
>
> /* ------------------------------------------------------------------ */
>
> -static bool in_range(struct gntdev_grant_map *map,
> - unsigned long start, unsigned long end)
> -{
> - if (!map->vma)
> - return false;
> - if (map->vma->vm_start >= end)
> - return false;
> - if (map->vma->vm_end <= start)
> - return false;
> -
> - return true;
> -}
> -
> -static int unmap_if_in_range(struct gntdev_grant_map *map,
> - unsigned long start, unsigned long end,
> - bool blockable)
> +static bool gntdev_invalidate(struct mmu_range_notifier *mn,
> + const struct mmu_notifier_range *range,
> + unsigned long cur_seq)
> {
> + struct gntdev_grant_map *map =
> + container_of(mn, struct gntdev_grant_map, notifier);
> unsigned long mstart, mend;
> int err;
>
> - if (!in_range(map, start, end))
> - return 0;
> + if (!mmu_notifier_range_blockable(range))
> + return false;
>
> - if (!blockable)
> - return -EAGAIN;
> + /*
> + * If the VMA is split or otherwise changed the notifier is not
> + * updated, but we don't want to process VA's outside the modified
> + * VMA. FIXME: It would be much more understandable to just prevent
> + * modifying the VMA in the first place.
> + */
> + if (map->vma->vm_start >= range->end ||
> + map->vma->vm_end <= range->start)
> + return true;
>
> - mstart = max(start, map->vma->vm_start);
> - mend = min(end, map->vma->vm_end);
> + mstart = max(range->start, map->vma->vm_start);
> + mend = min(range->end, map->vma->vm_end);
> pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n",
> map->index, map->count,
> map->vma->vm_start, map->vma->vm_end,
> - start, end, mstart, mend);
> + range->start, range->end, mstart, mend);
> err = unmap_grant_pages(map,
> (mstart - map->vma->vm_start) >> PAGE_SHIFT,
> (mend - mstart) >> PAGE_SHIFT);
> WARN_ON(err);
>
> - return 0;
> -}
> -
> -static int mn_invl_range_start(struct mmu_notifier *mn,
> - const struct mmu_notifier_range *range)
> -{
> - struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn);
> - struct gntdev_grant_map *map;
> - int ret = 0;
> -
> - if (mmu_notifier_range_blockable(range))
> - mutex_lock(&priv->lock);
> - else if (!mutex_trylock(&priv->lock))
> - return -EAGAIN;
> -
> - list_for_each_entry(map, &priv->maps, next) {
> - ret = unmap_if_in_range(map, range->start, range->end,
> - mmu_notifier_range_blockable(range));
> - if (ret)
> - goto out_unlock;
> - }
> - list_for_each_entry(map, &priv->freeable_maps, next) {
> - ret = unmap_if_in_range(map, range->start, range->end,
> - mmu_notifier_range_blockable(range));
> - if (ret)
> - goto out_unlock;
> - }
> -
> -out_unlock:
> - mutex_unlock(&priv->lock);
> -
> - return ret;
> -}
> -
> -static void mn_release(struct mmu_notifier *mn,
> - struct mm_struct *mm)
> -{
> - struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn);
> - struct gntdev_grant_map *map;
> - int err;
> -
> - mutex_lock(&priv->lock);
> - list_for_each_entry(map, &priv->maps, next) {
> - if (!map->vma)
> - continue;
> - pr_debug("map %d+%d (%lx %lx)\n",
> - map->index, map->count,
> - map->vma->vm_start, map->vma->vm_end);
> - err = unmap_grant_pages(map, /* offset */ 0, map->count);
> - WARN_ON(err);
> - }
> - list_for_each_entry(map, &priv->freeable_maps, next) {
> - if (!map->vma)
> - continue;
> - pr_debug("map %d+%d (%lx %lx)\n",
> - map->index, map->count,
> - map->vma->vm_start, map->vma->vm_end);
> - err = unmap_grant_pages(map, /* offset */ 0, map->count);
> - WARN_ON(err);
> - }
> - mutex_unlock(&priv->lock);
> + return true;
> }
>
> -static const struct mmu_notifier_ops gntdev_mmu_ops = {
> - .release = mn_release,
> - .invalidate_range_start = mn_invl_range_start,
> +static const struct mmu_range_notifier_ops gntdev_mmu_ops = {
> + .invalidate = gntdev_invalidate,
> };
>
> /* ------------------------------------------------------------------ */
> @@ -594,7 +514,6 @@ static int gntdev_open(struct inode *inode, struct file *flip)
> return -ENOMEM;
>
> INIT_LIST_HEAD(&priv->maps);
> - INIT_LIST_HEAD(&priv->freeable_maps);
> mutex_init(&priv->lock);
>
> #ifdef CONFIG_XEN_GNTDEV_DMABUF
> @@ -606,17 +525,6 @@ static int gntdev_open(struct inode *inode, struct file *flip)
> }
> #endif
>
> - if (use_ptemod) {
> - priv->mm = get_task_mm(current);
> - if (!priv->mm) {
> - kfree(priv);
> - return -ENOMEM;
> - }
> - priv->mn.ops = &gntdev_mmu_ops;
> - ret = mmu_notifier_register(&priv->mn, priv->mm);
> - mmput(priv->mm);
> - }
> -
> if (ret) {
> kfree(priv);
> return ret;
> @@ -653,16 +561,12 @@ static int gntdev_release(struct inode *inode, struct file *flip)
> list_del(&map->next);
> gntdev_put_map(NULL /* already removed */, map);
> }
> - WARN_ON(!list_empty(&priv->freeable_maps));
> mutex_unlock(&priv->lock);
>
> #ifdef CONFIG_XEN_GNTDEV_DMABUF
> gntdev_dmabuf_fini(priv->dmabuf_priv);
> #endif
>
> - if (use_ptemod)
> - mmu_notifier_unregister(&priv->mn, priv->mm);
> -
> kfree(priv);
> return 0;
> }
> @@ -723,8 +627,6 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv,
> map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count);
> if (map) {
> list_del(&map->next);
> - if (populate_freeable_maps)
> - list_add_tail(&map->next, &priv->freeable_maps);
> err = 0;
> }
> mutex_unlock(&priv->lock);
> @@ -1096,11 +998,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
> goto unlock_out;
> if (use_ptemod && map->vma)
> goto unlock_out;
> - if (use_ptemod && priv->mm != vma->vm_mm) {
> - pr_warn("Huh? Other mm?\n");
> - goto unlock_out;
> - }
> -
> refcount_inc(&map->users);
>
> vma->vm_ops = &gntdev_vmops;
> @@ -1111,10 +1008,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
> vma->vm_flags |= VM_DONTCOPY;
>
> vma->vm_private_data = map;
> -
> - if (use_ptemod)
> - map->vma = vma;
> -
> if (map->flags) {
> if ((vma->vm_flags & VM_WRITE) &&
> (map->flags & GNTMAP_readonly))
> @@ -1125,8 +1018,28 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
> map->flags |= GNTMAP_readonly;
> }
>
> + if (use_ptemod) {
> + map->vma = vma;
> + err = mmu_range_notifier_insert_locked(
> + &map->notifier, vma->vm_start,
> + vma->vm_end - vma->vm_start, vma->vm_mm);
> + if (err)
> + goto out_unlock_put;
> + }
> mutex_unlock(&priv->lock);
>
> + /*
> + * gntdev takes the address of the PTE in find_grant_ptes() and passes
> + * it to the hypervisor in gntdev_map_grant_pages(). The purpose of
> + * the notifier is to prevent the hypervisor pointer to the PTE from
> + * going stale.
> + *
> + * Since this vma's mappings can't be touched without the mmap_sem,
> + * and we are holding it now, there is no need for the notifier_range
> + * locking pattern.
> + */
> + mmu_range_read_begin(&map->notifier);
> +
> if (use_ptemod) {
> map->pages_vm_start = vma->vm_start;
> err = apply_to_page_range(vma->vm_mm, vma->vm_start,
> @@ -1175,8 +1088,11 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
> mutex_unlock(&priv->lock);
> out_put_map:
> if (use_ptemod) {
> - map->vma = NULL;
> unmap_grant_pages(map, 0, map->count);
> + if (map->vma) {
> + mmu_range_notifier_remove(&map->notifier);
> + map->vma = NULL;
> + }
> }
> gntdev_put_map(priv, map);
> return err;
next prev parent reply other threads:[~2019-10-30 16:58 UTC|newest]
Thread overview: 70+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-10-28 20:10 [PATCH v2 00/15] Consolidate the mmu notifier interval_tree and locking Jason Gunthorpe
2019-10-28 20:10 ` [PATCH v2 01/15] mm/mmu_notifier: define the header pre-processor parts even if disabled Jason Gunthorpe
2019-11-05 21:23 ` John Hubbard
2019-11-06 13:36 ` Jason Gunthorpe
2019-10-28 20:10 ` [PATCH v2 02/15] mm/mmu_notifier: add an interval tree notifier Jason Gunthorpe
2019-10-29 22:04 ` Kuehling, Felix
2019-10-29 22:56 ` Jason Gunthorpe
2019-11-07 0:23 ` John Hubbard
2019-11-07 2:08 ` Jerome Glisse
2019-11-07 20:11 ` Jason Gunthorpe
2019-11-07 21:04 ` Jerome Glisse
2019-11-08 0:32 ` Jason Gunthorpe
2019-11-08 2:00 ` Jerome Glisse
2019-11-08 20:19 ` Jason Gunthorpe
2019-11-07 20:06 ` Jason Gunthorpe
2019-11-07 20:53 ` John Hubbard
2019-11-08 15:26 ` Jason Gunthorpe
2019-11-08 6:33 ` Christoph Hellwig
2019-11-08 13:43 ` Jerome Glisse
2019-10-28 20:10 ` [PATCH v2 03/15] mm/hmm: allow hmm_range to be used with a mmu_range_notifier or hmm_mirror Jason Gunthorpe
2019-10-28 20:10 ` [PATCH v2 04/15] mm/hmm: define the pre-processor related parts of hmm.h even if disabled Jason Gunthorpe
2019-10-28 20:10 ` [PATCH v2 05/15] RDMA/odp: Use mmu_range_notifier_insert() Jason Gunthorpe
2019-10-28 20:10 ` [PATCH v2 06/15] RDMA/hfi1: Use mmu_range_notifier_inset for user_exp_rcv Jason Gunthorpe
2019-10-29 12:19 ` Dennis Dalessandro
2019-10-29 12:51 ` Jason Gunthorpe
2019-10-28 20:10 ` [PATCH v2 07/15] drm/radeon: use mmu_range_notifier_insert Jason Gunthorpe
2019-10-29 7:48 ` Koenig, Christian
2019-10-28 20:10 ` [PATCH v2 08/15] xen/gntdev: Use select for DMA_SHARED_BUFFER Jason Gunthorpe
2019-11-01 18:26 ` Jason Gunthorpe
2019-11-05 14:44 ` Jürgen Groß
2019-11-07 9:39 ` Jürgen Groß
2019-10-28 20:10 ` [PATCH v2 09/15] xen/gntdev: use mmu_range_notifier_insert Jason Gunthorpe
2019-10-30 16:55 ` Boris Ostrovsky [this message]
2019-11-01 17:48 ` Jason Gunthorpe
2019-11-01 18:51 ` Boris Ostrovsky
2019-11-01 19:17 ` Jason Gunthorpe
2019-11-04 22:03 ` Boris Ostrovsky
2019-11-05 2:31 ` Jason Gunthorpe
2019-11-05 15:16 ` Boris Ostrovsky
2019-11-07 20:36 ` Jason Gunthorpe
2019-11-07 22:54 ` Boris Ostrovsky
2019-11-08 14:53 ` Jason Gunthorpe
2019-10-28 20:10 ` [PATCH v2 10/15] nouveau: use mmu_notifier directly for invalidate_range_start Jason Gunthorpe
2019-10-28 20:10 ` [PATCH v2 11/15] nouveau: use mmu_range_notifier instead of hmm_mirror Jason Gunthorpe
2019-10-28 20:10 ` [PATCH v2 13/15] drm/amdgpu: Use mmu_range_insert " Jason Gunthorpe
2019-10-29 7:51 ` Koenig, Christian
2019-10-29 13:59 ` Jason Gunthorpe
2019-10-29 22:14 ` Kuehling, Felix
2019-10-29 23:09 ` Jason Gunthorpe
2019-10-28 20:10 ` [PATCH v2 14/15] drm/amdgpu: Use mmu_range_notifier " Jason Gunthorpe
2019-10-29 19:22 ` Yang, Philip
2019-10-29 19:25 ` Jason Gunthorpe
2019-11-01 14:44 ` Yang, Philip
2019-11-01 15:12 ` Jason Gunthorpe
2019-11-01 15:59 ` Yang, Philip
2019-11-01 17:42 ` Jason Gunthorpe
2019-11-01 19:19 ` Jason Gunthorpe
2019-11-01 19:45 ` Yang, Philip
2019-11-01 19:50 ` Yang, Philip
2019-11-01 19:51 ` Jason Gunthorpe
2019-11-01 18:21 ` Jason Gunthorpe
2019-11-01 18:34 ` [PATCH v2a " Jason Gunthorpe
2019-10-28 20:10 ` [PATCH v2 15/15] mm/hmm: remove hmm_mirror and related Jason Gunthorpe
[not found] ` <20191028201032.6352-13-jgg@ziepe.ca>
2019-10-29 7:49 ` [PATCH v2 12/15] drm/amdgpu: Call find_vma under mmap_sem Koenig, Christian
2019-10-29 16:28 ` Kuehling, Felix
2019-10-29 13:07 ` Christian König
2019-10-29 17:19 ` Jason Gunthorpe
2019-11-01 19:54 ` [PATCH v2 00/15] Consolidate the mmu notifier interval_tree and locking Jason Gunthorpe
2019-11-01 20:54 ` Ralph Campbell
2019-11-04 20:40 ` Jason Gunthorpe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=0355257f-6a3a-cdcd-d206-aec3df97dded@oracle.com \
--to=boris.ostrovsky@oracle.com \
--cc=David1.Zhou@amd.com \
--cc=Felix.Kuehling@amd.com \
--cc=alexander.deucher@amd.com \
--cc=amd-gfx@lists.freedesktop.org \
--cc=bskeggs@redhat.com \
--cc=christian.koenig@amd.com \
--cc=dennis.dalessandro@intel.com \
--cc=dri-devel@lists.freedesktop.org \
--cc=hch@infradead.org \
--cc=jgg@mellanox.com \
--cc=jgg@ziepe.ca \
--cc=jglisse@redhat.com \
--cc=jgross@suse.com \
--cc=jhubbard@nvidia.com \
--cc=linux-mm@kvack.org \
--cc=linux-rdma@vger.kernel.org \
--cc=mike.marciniszyn@intel.com \
--cc=nouveau@lists.freedesktop.org \
--cc=oleksandr_andrushchenko@epam.com \
--cc=petrcvekcz@gmail.com \
--cc=rcampbell@nvidia.com \
--cc=sstabellini@kernel.org \
--cc=xen-devel@lists.xenproject.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox