linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: "Liam R. Howlett" <Liam.Howlett@Oracle.com>
To: Suren Baghdasaryan <surenb@google.com>
Cc: akpm@linux-foundation.org, torvalds@linux-foundation.org,
	jannh@google.com, willy@infradead.org, david@redhat.com,
	peterx@redhat.com, ldufour@linux.ibm.com, vbabka@suse.cz,
	michel@lespinasse.org, jglisse@google.com, mhocko@suse.com,
	hannes@cmpxchg.org, dave@stgolabs.net, hughd@google.com,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	stable@vger.kernel.org,
	Linus Torvalds <torvalds@linuxfoundation.org>
Subject: Re: [PATCH v2 4/6] mm: lock vma explicitly before doing vm_flags_reset and vm_flags_reset_once
Date: Wed, 2 Aug 2023 13:13:17 -0400	[thread overview]
Message-ID: <20230802171317.wccm4gfruhz4mbxh@revolver> (raw)
In-Reply-To: <20230801220733.1987762-5-surenb@google.com>

* Suren Baghdasaryan <surenb@google.com> [230801 18:07]:
> Implicit vma locking inside vm_flags_reset() and vm_flags_reset_once() is
> not obvious and makes it hard to understand where vma locking is happening.
> Also in some cases (like in dup_userfaultfd()) vma should be locked earlier
> than vma_flags modification. To make locking more visible, change these
> functions to assert that the vma write lock is taken and explicitly lock
> the vma beforehand. Fix userfaultfd functions which should lock the vma
> earlier.
> 
> Suggested-by: Linus Torvalds <torvalds@linuxfoundation.org>
> Signed-off-by: Suren Baghdasaryan <surenb@google.com>

Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>

> ---
>  arch/powerpc/kvm/book3s_hv_uvmem.c    |  1 +
>  drivers/infiniband/hw/hfi1/file_ops.c |  1 +
>  fs/userfaultfd.c                      |  6 ++++++
>  include/linux/mm.h                    | 10 +++++++---
>  mm/madvise.c                          |  5 ++---
>  mm/mlock.c                            |  3 ++-
>  mm/mprotect.c                         |  1 +
>  7 files changed, 20 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
> index 709ebd578394..e2d6f9327f77 100644
> --- a/arch/powerpc/kvm/book3s_hv_uvmem.c
> +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
> @@ -410,6 +410,7 @@ static int kvmppc_memslot_page_merge(struct kvm *kvm,
>  			ret = H_STATE;
>  			break;
>  		}
> +		vma_start_write(vma);
>  		/* Copy vm_flags to avoid partial modifications in ksm_madvise */
>  		vm_flags = vma->vm_flags;
>  		ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
> diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
> index a5ab22cedd41..5920bfc1e1c5 100644
> --- a/drivers/infiniband/hw/hfi1/file_ops.c
> +++ b/drivers/infiniband/hw/hfi1/file_ops.c
> @@ -344,6 +344,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
>  		goto done;
>  	}
>  
> +	vma_start_write(vma);
>  	/*
>  	 * vm_pgoff is used as a buffer selector cookie.  Always mmap from
>  	 * the beginning.
> diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
> index 7cecd49e078b..6cde95533dcd 100644
> --- a/fs/userfaultfd.c
> +++ b/fs/userfaultfd.c
> @@ -667,6 +667,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
>  		mmap_write_lock(mm);
>  		for_each_vma(vmi, vma) {
>  			if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
> +				vma_start_write(vma);
>  				vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
>  				userfaultfd_set_vm_flags(vma,
>  							 vma->vm_flags & ~__VM_UFFD_FLAGS);
> @@ -702,6 +703,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
>  
>  	octx = vma->vm_userfaultfd_ctx.ctx;
>  	if (!octx || !(octx->features & UFFD_FEATURE_EVENT_FORK)) {
> +		vma_start_write(vma);
>  		vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
>  		userfaultfd_set_vm_flags(vma, vma->vm_flags & ~__VM_UFFD_FLAGS);
>  		return 0;
> @@ -783,6 +785,7 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma,
>  		atomic_inc(&ctx->mmap_changing);
>  	} else {
>  		/* Drop uffd context if remap feature not enabled */
> +		vma_start_write(vma);
>  		vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
>  		userfaultfd_set_vm_flags(vma, vma->vm_flags & ~__VM_UFFD_FLAGS);
>  	}
> @@ -940,6 +943,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
>  			prev = vma;
>  		}
>  
> +		vma_start_write(vma);
>  		userfaultfd_set_vm_flags(vma, new_flags);
>  		vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
>  	}
> @@ -1502,6 +1506,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
>  		 * the next vma was merged into the current one and
>  		 * the current one has not been updated yet.
>  		 */
> +		vma_start_write(vma);
>  		userfaultfd_set_vm_flags(vma, new_flags);
>  		vma->vm_userfaultfd_ctx.ctx = ctx;
>  
> @@ -1685,6 +1690,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
>  		 * the next vma was merged into the current one and
>  		 * the current one has not been updated yet.
>  		 */
> +		vma_start_write(vma);
>  		userfaultfd_set_vm_flags(vma, new_flags);
>  		vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
>  
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 262b5f44101d..2c720c9bb1ae 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -780,18 +780,22 @@ static inline void vm_flags_init(struct vm_area_struct *vma,
>  	ACCESS_PRIVATE(vma, __vm_flags) = flags;
>  }
>  
> -/* Use when VMA is part of the VMA tree and modifications need coordination */
> +/*
> + * Use when VMA is part of the VMA tree and modifications need coordination
> + * Note: vm_flags_reset and vm_flags_reset_once do not lock the vma and
> + * it should be locked explicitly beforehand.
> + */
>  static inline void vm_flags_reset(struct vm_area_struct *vma,
>  				  vm_flags_t flags)
>  {
> -	vma_start_write(vma);
> +	vma_assert_write_locked(vma);
>  	vm_flags_init(vma, flags);
>  }
>  
>  static inline void vm_flags_reset_once(struct vm_area_struct *vma,
>  				       vm_flags_t flags)
>  {
> -	vma_start_write(vma);
> +	vma_assert_write_locked(vma);
>  	WRITE_ONCE(ACCESS_PRIVATE(vma, __vm_flags), flags);
>  }
>  
> diff --git a/mm/madvise.c b/mm/madvise.c
> index bfe0e06427bd..507b1d299fec 100644
> --- a/mm/madvise.c
> +++ b/mm/madvise.c
> @@ -173,9 +173,8 @@ static int madvise_update_vma(struct vm_area_struct *vma,
>  	}
>  
>  success:
> -	/*
> -	 * vm_flags is protected by the mmap_lock held in write mode.
> -	 */
> +	/* vm_flags is protected by the mmap_lock held in write mode. */
> +	vma_start_write(vma);
>  	vm_flags_reset(vma, new_flags);
>  	if (!vma->vm_file || vma_is_anon_shmem(vma)) {
>  		error = replace_anon_vma_name(vma, anon_name);
> diff --git a/mm/mlock.c b/mm/mlock.c
> index 479e09d0994c..06bdfab83b58 100644
> --- a/mm/mlock.c
> +++ b/mm/mlock.c
> @@ -387,6 +387,7 @@ static void mlock_vma_pages_range(struct vm_area_struct *vma,
>  	 */
>  	if (newflags & VM_LOCKED)
>  		newflags |= VM_IO;
> +	vma_start_write(vma);
>  	vm_flags_reset_once(vma, newflags);
>  
>  	lru_add_drain();
> @@ -461,9 +462,9 @@ static int mlock_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma,
>  	 * It's okay if try_to_unmap_one unmaps a page just after we
>  	 * set VM_LOCKED, populate_vma_page_range will bring it back.
>  	 */
> -
>  	if ((newflags & VM_LOCKED) && (oldflags & VM_LOCKED)) {
>  		/* No work to do, and mlocking twice would be wrong */
> +		vma_start_write(vma);
>  		vm_flags_reset(vma, newflags);
>  	} else {
>  		mlock_vma_pages_range(vma, start, end, newflags);
> diff --git a/mm/mprotect.c b/mm/mprotect.c
> index 3aef1340533a..362e190a8f81 100644
> --- a/mm/mprotect.c
> +++ b/mm/mprotect.c
> @@ -657,6 +657,7 @@ mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb,
>  	 * vm_flags and vm_page_prot are protected by the mmap_lock
>  	 * held in write mode.
>  	 */
> +	vma_start_write(vma);
>  	vm_flags_reset(vma, newflags);
>  	if (vma_wants_manual_pte_write_upgrade(vma))
>  		mm_cp_flags |= MM_CP_TRY_CHANGE_WRITABLE;
> -- 
> 2.41.0.585.gd2178a4bd4-goog
> 


  reply	other threads:[~2023-08-02 17:13 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-01 22:07 [PATCH v2 0/6] make vma locking more obvious Suren Baghdasaryan
2023-08-01 22:07 ` [PATCH v2 1/6] mm: enable page walking API to lock vmas during the walk Suren Baghdasaryan
2023-08-01 22:07 ` [PATCH v2 2/6] mm: for !CONFIG_PER_VMA_LOCK equate write lock assertion for vma and mmap Suren Baghdasaryan
2023-08-02 17:07   ` Liam R. Howlett
2023-08-01 22:07 ` [PATCH v2 3/6] mm: replace mmap with vma write lock assertions when operating on a vma Suren Baghdasaryan
2023-08-02 17:13   ` Liam R. Howlett
2023-08-01 22:07 ` [PATCH v2 4/6] mm: lock vma explicitly before doing vm_flags_reset and vm_flags_reset_once Suren Baghdasaryan
2023-08-02 17:13   ` Liam R. Howlett [this message]
2023-08-02 17:49   ` Linus Torvalds
2023-08-02 18:09     ` Suren Baghdasaryan
2023-08-02 18:53       ` Linus Torvalds
2023-08-02 20:21         ` Suren Baghdasaryan
2023-08-01 22:07 ` [PATCH v2 5/6] mm: always lock new vma before inserting into vma tree Suren Baghdasaryan
2023-08-02 17:02   ` Liam R. Howlett
2023-08-01 22:07 ` [PATCH v2 6/6] mm: move vma locking out of vma_prepare Suren Baghdasaryan
2023-08-02 16:59   ` Liam R. Howlett
2023-08-02 17:24     ` Suren Baghdasaryan
2023-08-03 17:28 ` [PATCH v2 0/6] make vma locking more obvious Suren Baghdasaryan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230802171317.wccm4gfruhz4mbxh@revolver \
    --to=liam.howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=dave@stgolabs.net \
    --cc=david@redhat.com \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=jannh@google.com \
    --cc=jglisse@google.com \
    --cc=ldufour@linux.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.com \
    --cc=michel@lespinasse.org \
    --cc=peterx@redhat.com \
    --cc=stable@vger.kernel.org \
    --cc=surenb@google.com \
    --cc=torvalds@linux-foundation.org \
    --cc=torvalds@linuxfoundation.org \
    --cc=vbabka@suse.cz \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox