Re: [PATCH] mm/munlock: Protect the per-CPU pagevec by a local_lock_t.

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Hugh Dickins <hughd@google.com>
To: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: linux-mm@kvack.org, Hugh Dickins <hughd@google.com>,
	 Vlastimil Babka <vbabka@suse.cz>,
	Matthew Wilcox <willy@infradead.org>,
	 Andrew Morton <akpm@linux-foundation.org>,
	 Thomas Gleixner <tglx@linutronix.de>
Subject: Re: [PATCH] mm/munlock: Protect the per-CPU pagevec by a local_lock_t.
Date: Mon, 21 Mar 2022 22:50:26 -0700 (PDT)	[thread overview]
Message-ID: <6aa913a9-6eaf-2a7d-9966-15f9cb3672aa@google.com> (raw)
In-Reply-To: <YjizWi9IY0mpvIfb@linutronix.de>

On Mon, 21 Mar 2022, Sebastian Andrzej Siewior wrote:

> The access to mlock_pvec is protected by disabling preemption via
> get_cpu_var() or implicit by having preemption disabled by the caller
> (in mlock_page_drain() case).
> This breaks on PREEMPT_RT since folio_lruvec_lock_irq() acquires a
> sleeping lock in this section.
> 
> Create struct mlock_pvec which consits of the local_lock_t and the
> pagevec. Acquire the local_lock() before accessing the per-CPU pagevec.
> Replace mlock_page_drain() with a _local() version which is invoked on
> the local CPU and acquires the local_lock_t and a _remote() version
> which uses the pagevec from a remote CPU which offline.
> 
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>

Acked-by: Hugh Dickins <hughd@google.com>

Thanks a lot for catching this: your patch looks exactly right to me.
Bad mistake on my part to have missed all the local_lock'ing in mm/swap.c
when I rebased my old mm/munlock implementation to the current tree.

> ---
>  mm/internal.h   |  6 ++++--
>  mm/migrate.c    |  2 +-
>  mm/mlock.c      | 46 ++++++++++++++++++++++++++++++++++++----------
>  mm/page_alloc.c |  1 +
>  mm/rmap.c       |  4 ++--
>  mm/swap.c       |  4 +++-
>  6 files changed, 47 insertions(+), 16 deletions(-)
> 
> diff --git a/mm/internal.h b/mm/internal.h
> index 9be0227ccc942..50c3fd71d7ddd 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -456,7 +456,8 @@ static inline void munlock_vma_page(struct page *page,
>  }
>  void mlock_new_page(struct page *page);
>  bool need_mlock_page_drain(int cpu);
> -void mlock_page_drain(int cpu);
> +void mlock_page_drain_local(void);
> +void mlock_page_drain_remote(int cpu);
>  
>  extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
>  
> @@ -547,7 +548,8 @@ static inline void munlock_vma_page(struct page *page,
>  			struct vm_area_struct *vma, bool compound) { }
>  static inline void mlock_new_page(struct page *page) { }
>  static inline bool need_mlock_page_drain(int cpu) { return false; }
> -static inline void mlock_page_drain(int cpu) { }
> +static inline void mlock_page_drain_local(void) { }
> +static inline void mlock_page_drain_remote(int cpu) { }
>  static inline void vunmap_range_noflush(unsigned long start, unsigned long end)
>  {
>  }
> diff --git a/mm/migrate.c b/mm/migrate.c
> index 3d60823afd2d3..de175e2fdba5d 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -246,7 +246,7 @@ static bool remove_migration_pte(struct folio *folio,
>  			set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
>  		}
>  		if (vma->vm_flags & VM_LOCKED)
> -			mlock_page_drain(smp_processor_id());
> +			mlock_page_drain_local();
>  
>  		trace_remove_migration_pte(pvmw.address, pte_val(pte),
>  					   compound_order(new));
> diff --git a/mm/mlock.c b/mm/mlock.c
> index efd2dd2943dec..6e503b8690f52 100644
> --- a/mm/mlock.c
> +++ b/mm/mlock.c
> @@ -28,7 +28,14 @@
>  
>  #include "internal.h"
>  
> -static DEFINE_PER_CPU(struct pagevec, mlock_pvec);
> +struct mlock_pvec {
> +	local_lock_t lock;
> +	struct pagevec vec;
> +};
> +
> +static DEFINE_PER_CPU(struct mlock_pvec, mlock_pvec) = {
> +	.lock = INIT_LOCAL_LOCK(lock),
> +};
>  
>  bool can_do_mlock(void)
>  {
> @@ -203,18 +210,30 @@ static void mlock_pagevec(struct pagevec *pvec)
>  	pagevec_reinit(pvec);
>  }
>  
> -void mlock_page_drain(int cpu)
> +void mlock_page_drain_local(void)
>  {
>  	struct pagevec *pvec;
>  
> -	pvec = &per_cpu(mlock_pvec, cpu);
> +	local_lock(&mlock_pvec.lock);
> +	pvec = this_cpu_ptr(&mlock_pvec.vec);
> +	if (pagevec_count(pvec))
> +		mlock_pagevec(pvec);
> +	local_unlock(&mlock_pvec.lock);
> +}
> +
> +void mlock_page_drain_remote(int cpu)
> +{
> +	struct pagevec *pvec;
> +
> +	WARN_ON_ONCE(cpu_online(cpu));
> +	pvec = &per_cpu(mlock_pvec.vec, cpu);
>  	if (pagevec_count(pvec))
>  		mlock_pagevec(pvec);
>  }
>  
>  bool need_mlock_page_drain(int cpu)
>  {
> -	return pagevec_count(&per_cpu(mlock_pvec, cpu));
> +	return pagevec_count(&per_cpu(mlock_pvec.vec, cpu));
>  }
>  
>  /**
> @@ -223,7 +242,10 @@ bool need_mlock_page_drain(int cpu)
>   */
>  void mlock_folio(struct folio *folio)
>  {
> -	struct pagevec *pvec = &get_cpu_var(mlock_pvec);
> +	struct pagevec *pvec;
> +
> +	local_lock(&mlock_pvec.lock);
> +	pvec = this_cpu_ptr(&mlock_pvec.vec);
>  
>  	if (!folio_test_set_mlocked(folio)) {
>  		int nr_pages = folio_nr_pages(folio);
> @@ -236,7 +258,7 @@ void mlock_folio(struct folio *folio)
>  	if (!pagevec_add(pvec, mlock_lru(&folio->page)) ||
>  	    folio_test_large(folio) || lru_cache_disabled())
>  		mlock_pagevec(pvec);
> -	put_cpu_var(mlock_pvec);
> +	local_unlock(&mlock_pvec.lock);
>  }
>  
>  /**
> @@ -245,9 +267,11 @@ void mlock_folio(struct folio *folio)
>   */
>  void mlock_new_page(struct page *page)
>  {
> -	struct pagevec *pvec = &get_cpu_var(mlock_pvec);
> +	struct pagevec *pvec;
>  	int nr_pages = thp_nr_pages(page);
>  
> +	local_lock(&mlock_pvec.lock);
> +	pvec = this_cpu_ptr(&mlock_pvec.vec);
>  	SetPageMlocked(page);
>  	mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages);
>  	__count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
> @@ -256,7 +280,7 @@ void mlock_new_page(struct page *page)
>  	if (!pagevec_add(pvec, mlock_new(page)) ||
>  	    PageHead(page) || lru_cache_disabled())
>  		mlock_pagevec(pvec);
> -	put_cpu_var(mlock_pvec);
> +	local_unlock(&mlock_pvec.lock);
>  }
>  
>  /**
> @@ -265,8 +289,10 @@ void mlock_new_page(struct page *page)
>   */
>  void munlock_page(struct page *page)
>  {
> -	struct pagevec *pvec = &get_cpu_var(mlock_pvec);
> +	struct pagevec *pvec;
>  
> +	local_lock(&mlock_pvec.lock);
> +	pvec = this_cpu_ptr(&mlock_pvec.vec);
>  	/*
>  	 * TestClearPageMlocked(page) must be left to __munlock_page(),
>  	 * which will check whether the page is multiply mlocked.
> @@ -276,7 +302,7 @@ void munlock_page(struct page *page)
>  	if (!pagevec_add(pvec, page) ||
>  	    PageHead(page) || lru_cache_disabled())
>  		mlock_pagevec(pvec);
> -	put_cpu_var(mlock_pvec);
> +	local_unlock(&mlock_pvec.lock);
>  }
>  
>  static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index f648decfe39d1..94f515845d53a 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -8355,6 +8355,7 @@ static int page_alloc_cpu_dead(unsigned int cpu)
>  	struct zone *zone;
>  
>  	lru_add_drain_cpu(cpu);
> +	mlock_page_drain_remote(cpu);
>  	drain_pages(cpu);
>  
>  	/*
> diff --git a/mm/rmap.c b/mm/rmap.c
> index 3eb95fcde7000..90b4956096ef3 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -1727,7 +1727,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
>  		 */
>  		page_remove_rmap(subpage, vma, folio_test_hugetlb(folio));
>  		if (vma->vm_flags & VM_LOCKED)
> -			mlock_page_drain(smp_processor_id());
> +			mlock_page_drain_local();
>  		folio_put(folio);
>  	}
>  
> @@ -2005,7 +2005,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
>  		 */
>  		page_remove_rmap(subpage, vma, folio_test_hugetlb(folio));
>  		if (vma->vm_flags & VM_LOCKED)
> -			mlock_page_drain(smp_processor_id());
> +			mlock_page_drain_local();
>  		folio_put(folio);
>  	}
>  
> diff --git a/mm/swap.c b/mm/swap.c
> index bceff0cb559c9..7e320ec08c6ae 100644
> --- a/mm/swap.c
> +++ b/mm/swap.c
> @@ -624,7 +624,6 @@ void lru_add_drain_cpu(int cpu)
>  		pagevec_lru_move_fn(pvec, lru_lazyfree_fn);
>  
>  	activate_page_drain(cpu);
> -	mlock_page_drain(cpu);
>  }
>  
>  /**
> @@ -706,6 +705,7 @@ void lru_add_drain(void)
>  	local_lock(&lru_pvecs.lock);
>  	lru_add_drain_cpu(smp_processor_id());
>  	local_unlock(&lru_pvecs.lock);
> +	mlock_page_drain_local();
>  }
>  
>  /*
> @@ -720,6 +720,7 @@ static void lru_add_and_bh_lrus_drain(void)
>  	lru_add_drain_cpu(smp_processor_id());
>  	local_unlock(&lru_pvecs.lock);
>  	invalidate_bh_lrus_cpu();
> +	mlock_page_drain_local();
>  }
>  
>  void lru_add_drain_cpu_zone(struct zone *zone)
> @@ -728,6 +729,7 @@ void lru_add_drain_cpu_zone(struct zone *zone)
>  	lru_add_drain_cpu(smp_processor_id());
>  	drain_local_pages(zone);
>  	local_unlock(&lru_pvecs.lock);
> +	mlock_page_drain_local();
>  }
>  
>  #ifdef CONFIG_SMP
> -- 
> 2.35.1
> 
>

next prev parent reply	other threads:[~2022-03-22  5:50 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-21 17:18 Sebastian Andrzej Siewior
2022-03-22  5:50 ` Hugh Dickins [this message]
2022-03-22  7:56   ` Sebastian Andrzej Siewior

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=6aa913a9-6eaf-2a7d-9966-15f9cb3672aa@google.com \
    --to=hughd@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=bigeasy@linutronix.de \
    --cc=linux-mm@kvack.org \
    --cc=tglx@linutronix.de \
    --cc=vbabka@suse.cz \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox