From: David Rientjes <rientjes@google.com>
To: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Cc: paulmck@linux.vnet.ibm.com, peterz@infradead.org,
akpm@linux-foundation.org, kirill@shutemov.name,
ak@linux.intel.com, mhocko@kernel.org, dave@stgolabs.net,
jack@suse.cz, Matthew Wilcox <willy@infradead.org>,
benh@kernel.crashing.org, mpe@ellerman.id.au, paulus@samba.org,
Thomas Gleixner <tglx@linutronix.de>,
Ingo Molnar <mingo@redhat.com>,
hpa@zytor.com, Will Deacon <will.deacon@arm.com>,
Sergey Senozhatsky <sergey.senozhatsky@gmail.com>,
Andrea Arcangeli <aarcange@redhat.com>,
Alexei Starovoitov <alexei.starovoitov@gmail.com>,
kemi.wang@intel.com, sergey.senozhatsky.work@gmail.com,
Daniel Jordan <daniel.m.jordan@oracle.com>,
linux-kernel@vger.kernel.org, linux-mm@kvack.org,
haren@linux.vnet.ibm.com, khandual@linux.vnet.ibm.com,
npiggin@gmail.com, bsingharora@gmail.com,
Tim Chen <tim.c.chen@linux.intel.com>,
linuxppc-dev@lists.ozlabs.org, x86@kernel.org
Subject: Re: [PATCH v9 09/24] mm: protect mremap() against SPF hanlder
Date: Tue, 27 Mar 2018 15:12:39 -0700 (PDT) [thread overview]
Message-ID: <alpine.DEB.2.20.1803271500540.43106@chino.kir.corp.google.com> (raw)
In-Reply-To: <1520963994-28477-10-git-send-email-ldufour@linux.vnet.ibm.com>
On Tue, 13 Mar 2018, Laurent Dufour wrote:
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 88042d843668..ef6ef0627090 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2189,16 +2189,24 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node);
> extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin);
> extern int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
> unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert,
> - struct vm_area_struct *expand);
> + struct vm_area_struct *expand, bool keep_locked);
> static inline int vma_adjust(struct vm_area_struct *vma, unsigned long start,
> unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
> {
> - return __vma_adjust(vma, start, end, pgoff, insert, NULL);
> + return __vma_adjust(vma, start, end, pgoff, insert, NULL, false);
> }
> -extern struct vm_area_struct *vma_merge(struct mm_struct *,
> +extern struct vm_area_struct *__vma_merge(struct mm_struct *,
> struct vm_area_struct *prev, unsigned long addr, unsigned long end,
> unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
> - struct mempolicy *, struct vm_userfaultfd_ctx);
> + struct mempolicy *, struct vm_userfaultfd_ctx, bool keep_locked);
> +static inline struct vm_area_struct *vma_merge(struct mm_struct *vma,
> + struct vm_area_struct *prev, unsigned long addr, unsigned long end,
> + unsigned long vm_flags, struct anon_vma *anon, struct file *file,
> + pgoff_t off, struct mempolicy *pol, struct vm_userfaultfd_ctx uff)
> +{
> + return __vma_merge(vma, prev, addr, end, vm_flags, anon, file, off,
> + pol, uff, false);
> +}
The first formal to vma_merge() is an mm, not a vma.
This area could use an uncluttering.
> extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
> extern int __split_vma(struct mm_struct *, struct vm_area_struct *,
> unsigned long addr, int new_below);
> diff --git a/mm/mmap.c b/mm/mmap.c
> index d6533cb85213..ac32b577a0c9 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -684,7 +684,7 @@ static inline void __vma_unlink_prev(struct mm_struct *mm,
> */
> int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
> unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert,
> - struct vm_area_struct *expand)
> + struct vm_area_struct *expand, bool keep_locked)
> {
> struct mm_struct *mm = vma->vm_mm;
> struct vm_area_struct *next = vma->vm_next, *orig_vma = vma;
> @@ -996,7 +996,8 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
>
> if (next && next != vma)
> vm_raw_write_end(next);
> - vm_raw_write_end(vma);
> + if (!keep_locked)
> + vm_raw_write_end(vma);
>
> validate_mm(mm);
>
This will require a fixup for the following patch where a retval from
anon_vma_close() can also return without vma locked even though
keep_locked == true.
How does vma_merge() handle that error wrt vm_raw_write_begin(vma)?
> @@ -1132,12 +1133,13 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
> * parameter) may establish ptes with the wrong permissions of NNNN
> * instead of the right permissions of XXXX.
> */
> -struct vm_area_struct *vma_merge(struct mm_struct *mm,
> +struct vm_area_struct *__vma_merge(struct mm_struct *mm,
> struct vm_area_struct *prev, unsigned long addr,
> unsigned long end, unsigned long vm_flags,
> struct anon_vma *anon_vma, struct file *file,
> pgoff_t pgoff, struct mempolicy *policy,
> - struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
> + struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
> + bool keep_locked)
> {
> pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
> struct vm_area_struct *area, *next;
> @@ -1185,10 +1187,11 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
> /* cases 1, 6 */
> err = __vma_adjust(prev, prev->vm_start,
> next->vm_end, prev->vm_pgoff, NULL,
> - prev);
> + prev, keep_locked);
> } else /* cases 2, 5, 7 */
> err = __vma_adjust(prev, prev->vm_start,
> - end, prev->vm_pgoff, NULL, prev);
> + end, prev->vm_pgoff, NULL, prev,
> + keep_locked);
> if (err)
> return NULL;
> khugepaged_enter_vma_merge(prev, vm_flags);
> @@ -1205,10 +1208,12 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
> vm_userfaultfd_ctx)) {
> if (prev && addr < prev->vm_end) /* case 4 */
> err = __vma_adjust(prev, prev->vm_start,
> - addr, prev->vm_pgoff, NULL, next);
> + addr, prev->vm_pgoff, NULL, next,
> + keep_locked);
> else { /* cases 3, 8 */
> err = __vma_adjust(area, addr, next->vm_end,
> - next->vm_pgoff - pglen, NULL, next);
> + next->vm_pgoff - pglen, NULL, next,
> + keep_locked);
> /*
> * In case 3 area is already equal to next and
> * this is a noop, but in case 8 "area" has
> @@ -3163,9 +3168,20 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
>
> if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
> return NULL; /* should never get here */
> - new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
> - vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
> - vma->vm_userfaultfd_ctx);
> +
> + /* There is 3 cases to manage here in
> + * AAAA AAAA AAAA AAAA
> + * PPPP.... PPPP......NNNN PPPP....NNNN PP........NN
> + * PPPPPPPP(A) PPPP..NNNNNNNN(B) PPPPPPPPPPPP(1) NULL
> + * PPPPPPPPNNNN(2)
> + * PPPPNNNNNNNN(3)
> + *
> + * new_vma == prev in case A,1,2
> + * new_vma == next in case B,3
> + */
Interleaved tabs and whitespace.
> + new_vma = __vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
> + vma->anon_vma, vma->vm_file, pgoff,
> + vma_policy(vma), vma->vm_userfaultfd_ctx, true);
> if (new_vma) {
> /*
> * Source vma may have been merged into new_vma
> @@ -3205,6 +3221,15 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
> get_file(new_vma->vm_file);
> if (new_vma->vm_ops && new_vma->vm_ops->open)
> new_vma->vm_ops->open(new_vma);
> + /*
> + * As the VMA is linked right now, it may be hit by the
> + * speculative page fault handler. But we don't want it to
> + * to start mapping page in this area until the caller has
> + * potentially move the pte from the moved VMA. To prevent
> + * that we protect it right now, and let the caller unprotect
> + * it once the move is done.
> + */
> + vm_raw_write_begin(new_vma);
> vma_link(mm, new_vma, prev, rb_link, rb_parent);
> *need_rmap_locks = false;
> }
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 049470aa1e3e..8ed1a1d6eaed 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -302,6 +302,14 @@ static unsigned long move_vma(struct vm_area_struct *vma,
> if (!new_vma)
> return -ENOMEM;
>
> + /* new_vma is returned protected by copy_vma, to prevent speculative
> + * page fault to be done in the destination area before we move the pte.
> + * Now, we must also protect the source VMA since we don't want pages
> + * to be mapped in our back while we are copying the PTEs.
> + */
> + if (vma != new_vma)
> + vm_raw_write_begin(vma);
> +
> moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len,
> need_rmap_locks);
> if (moved_len < old_len) {
> @@ -318,6 +326,8 @@ static unsigned long move_vma(struct vm_area_struct *vma,
> */
> move_page_tables(new_vma, new_addr, vma, old_addr, moved_len,
> true);
> + if (vma != new_vma)
> + vm_raw_write_end(vma);
> vma = new_vma;
> old_len = new_len;
> old_addr = new_addr;
> @@ -326,7 +336,10 @@ static unsigned long move_vma(struct vm_area_struct *vma,
> mremap_userfaultfd_prep(new_vma, uf);
> arch_remap(mm, old_addr, old_addr + old_len,
> new_addr, new_addr + new_len);
> + if (vma != new_vma)
> + vm_raw_write_end(vma);
> }
> + vm_raw_write_end(new_vma);
Just do
vm_raw_write_end(vma);
vm_raw_write_end(new_vma);
here.
next prev parent reply other threads:[~2018-03-27 22:12 UTC|newest]
Thread overview: 100+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-03-13 17:59 [PATCH v9 00/24] Speculative page faults Laurent Dufour
2018-03-13 17:59 ` [PATCH v9 01/24] mm: Introduce CONFIG_SPECULATIVE_PAGE_FAULT Laurent Dufour
2018-03-25 21:50 ` David Rientjes
2018-03-28 7:49 ` Laurent Dufour
2018-03-28 10:16 ` David Rientjes
2018-03-28 11:15 ` Laurent Dufour
2018-03-28 21:18 ` David Rientjes
2018-03-13 17:59 ` [PATCH v9 02/24] x86/mm: Define CONFIG_SPECULATIVE_PAGE_FAULT Laurent Dufour
2018-03-13 17:59 ` [PATCH v9 03/24] powerpc/mm: " Laurent Dufour
2018-03-13 17:59 ` [PATCH v9 04/24] mm: Prepare for FAULT_FLAG_SPECULATIVE Laurent Dufour
2018-03-25 21:50 ` David Rientjes
2018-03-28 10:27 ` Laurent Dufour
2018-04-03 21:57 ` David Rientjes
2018-04-04 9:23 ` Laurent Dufour
2018-03-13 17:59 ` [PATCH v9 05/24] mm: Introduce pte_spinlock " Laurent Dufour
2018-03-25 21:50 ` David Rientjes
2018-03-28 8:15 ` Laurent Dufour
2018-03-13 17:59 ` [PATCH v9 06/24] mm: make pte_unmap_same compatible with SPF Laurent Dufour
2018-03-27 21:18 ` David Rientjes
2018-03-28 8:27 ` Laurent Dufour
2018-03-28 10:20 ` David Rientjes
2018-03-28 10:43 ` Laurent Dufour
2018-04-03 19:10 ` Jerome Glisse
2018-04-03 20:40 ` David Rientjes
2018-04-03 21:04 ` Jerome Glisse
2018-04-04 9:53 ` Laurent Dufour
2018-03-13 17:59 ` [PATCH v9 07/24] mm: VMA sequence count Laurent Dufour
2018-03-17 7:51 ` [mm] b1f0502d04: INFO:trying_to_register_non-static_key kernel test robot
2018-03-21 12:21 ` Laurent Dufour
2018-03-25 22:10 ` David Rientjes
2018-03-28 13:30 ` Laurent Dufour
2018-04-04 0:48 ` David Rientjes
2018-04-04 1:03 ` David Rientjes
2018-04-04 10:28 ` Laurent Dufour
2018-04-04 10:19 ` Laurent Dufour
2018-04-04 21:53 ` David Rientjes
2018-04-05 16:55 ` Laurent Dufour
2018-03-27 21:30 ` [PATCH v9 07/24] mm: VMA sequence count David Rientjes
2018-03-28 17:58 ` Laurent Dufour
2018-03-13 17:59 ` [PATCH v9 08/24] mm: Protect VMA modifications using " Laurent Dufour
2018-03-27 21:45 ` David Rientjes
2018-03-28 16:57 ` Laurent Dufour
2018-03-27 21:57 ` David Rientjes
2018-03-28 17:10 ` Laurent Dufour
2018-03-13 17:59 ` [PATCH v9 09/24] mm: protect mremap() against SPF hanlder Laurent Dufour
2018-03-27 22:12 ` David Rientjes [this message]
2018-03-28 18:11 ` Laurent Dufour
2018-03-28 21:21 ` David Rientjes
2018-04-04 8:24 ` Laurent Dufour
2018-03-13 17:59 ` [PATCH v9 10/24] mm: Protect SPF handler against anon_vma changes Laurent Dufour
2018-03-13 17:59 ` [PATCH v9 11/24] mm: Cache some VMA fields in the vm_fault structure Laurent Dufour
2018-04-02 22:24 ` David Rientjes
2018-04-04 15:48 ` Laurent Dufour
2018-03-13 17:59 ` [PATCH v9 12/24] mm/migrate: Pass vm_fault pointer to migrate_misplaced_page() Laurent Dufour
2018-04-02 23:00 ` David Rientjes
2018-03-13 17:59 ` [PATCH v9 13/24] mm: Introduce __lru_cache_add_active_or_unevictable Laurent Dufour
2018-04-02 23:11 ` David Rientjes
2018-03-13 17:59 ` [PATCH v9 14/24] mm: Introduce __maybe_mkwrite() Laurent Dufour
2018-04-02 23:12 ` David Rientjes
2018-04-04 15:56 ` Laurent Dufour
2018-03-13 17:59 ` [PATCH v9 15/24] mm: Introduce __vm_normal_page() Laurent Dufour
2018-04-02 23:18 ` David Rientjes
2018-04-04 16:04 ` Laurent Dufour
2018-04-03 19:39 ` Jerome Glisse
2018-04-03 20:45 ` David Rientjes
2018-04-04 16:26 ` Laurent Dufour
2018-04-04 21:59 ` Jerome Glisse
2018-04-05 12:53 ` Laurent Dufour
2018-03-13 17:59 ` [PATCH v9 16/24] mm: Introduce __page_add_new_anon_rmap() Laurent Dufour
2018-04-02 23:57 ` David Rientjes
2018-04-10 16:30 ` Laurent Dufour
2018-03-13 17:59 ` [PATCH v9 17/24] mm: Protect mm_rb tree with a rwlock Laurent Dufour
2018-03-14 8:48 ` Peter Zijlstra
2018-03-14 16:25 ` Laurent Dufour
2018-03-16 10:23 ` [mm] b33ddf50eb: INFO:trying_to_register_non-static_key kernel test robot
2018-03-16 16:38 ` Laurent Dufour
2018-04-03 0:11 ` [PATCH v9 17/24] mm: Protect mm_rb tree with a rwlock David Rientjes
2018-04-06 14:23 ` Laurent Dufour
2018-04-10 16:20 ` Laurent Dufour
2018-03-13 17:59 ` [PATCH v9 18/24] mm: Provide speculative fault infrastructure Laurent Dufour
2018-03-13 17:59 ` [PATCH v9 19/24] mm: Adding speculative page fault failure trace events Laurent Dufour
2018-03-13 17:59 ` [PATCH v9 20/24] perf: Add a speculative page fault sw event Laurent Dufour
2018-03-26 21:43 ` David Rientjes
2018-03-13 17:59 ` [PATCH v9 21/24] perf tools: Add support for the SPF perf event Laurent Dufour
2018-03-26 21:44 ` David Rientjes
2018-03-27 3:49 ` Andi Kleen
2018-04-10 6:47 ` David Rientjes
2018-04-12 13:44 ` Laurent Dufour
2018-03-13 17:59 ` [PATCH v9 22/24] mm: Speculative page fault handler return VMA Laurent Dufour
2018-03-13 17:59 ` [PATCH v9 23/24] x86/mm: Add speculative pagefault handling Laurent Dufour
2018-03-26 21:41 ` David Rientjes
2018-03-13 17:59 ` [PATCH v9 24/24] powerpc/mm: Add speculative page fault Laurent Dufour
2018-03-26 21:39 ` David Rientjes
2018-03-14 13:11 ` [PATCH v9 00/24] Speculative page faults Michal Hocko
2018-03-14 13:33 ` Laurent Dufour
2018-04-13 13:34 ` Laurent Dufour
2018-03-22 1:21 ` Ganesh Mahendran
2018-03-29 12:49 ` Laurent Dufour
2018-04-03 20:37 ` Jerome Glisse
2018-04-04 7:59 ` Laurent Dufour
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=alpine.DEB.2.20.1803271500540.43106@chino.kir.corp.google.com \
--to=rientjes@google.com \
--cc=aarcange@redhat.com \
--cc=ak@linux.intel.com \
--cc=akpm@linux-foundation.org \
--cc=alexei.starovoitov@gmail.com \
--cc=benh@kernel.crashing.org \
--cc=bsingharora@gmail.com \
--cc=daniel.m.jordan@oracle.com \
--cc=dave@stgolabs.net \
--cc=haren@linux.vnet.ibm.com \
--cc=hpa@zytor.com \
--cc=jack@suse.cz \
--cc=kemi.wang@intel.com \
--cc=khandual@linux.vnet.ibm.com \
--cc=kirill@shutemov.name \
--cc=ldufour@linux.vnet.ibm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=mhocko@kernel.org \
--cc=mingo@redhat.com \
--cc=mpe@ellerman.id.au \
--cc=npiggin@gmail.com \
--cc=paulmck@linux.vnet.ibm.com \
--cc=paulus@samba.org \
--cc=peterz@infradead.org \
--cc=sergey.senozhatsky.work@gmail.com \
--cc=sergey.senozhatsky@gmail.com \
--cc=tglx@linutronix.de \
--cc=tim.c.chen@linux.intel.com \
--cc=will.deacon@arm.com \
--cc=willy@infradead.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox