linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Guenter Roeck <linux@roeck-us.net>
To: Dennis Zhou <dennis@kernel.org>
Cc: Tejun Heo <tj@kernel.org>, Christoph Lameter <cl@linux.com>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH] percpu: flush tlb in pcpu_reclaim_populated()
Date: Sat, 3 Jul 2021 16:02:53 -0700	[thread overview]
Message-ID: <20210703230253.GA2242521@roeck-us.net> (raw)

On Sat, Jul 03, 2021 at 09:05:23PM +0000, Dennis Zhou wrote:
> Prior to "percpu: implement partial chunk depopulation",
> pcpu_depopulate_chunk() was called only on the destruction path. This
> meant the virtual address range was on its way back to vmalloc which
> will handle flushing the tlbs for us.
> 
> However, with pcpu_reclaim_populated(), we are now calling
> pcpu_depopulate_chunk() during the active lifecycle of a chunk.
> Therefore, we need to flush the tlb as well otherwise we can end up
> accessing the wrong page through an invalid tlb mapping as reported in
> [1].
> 
> [1] https://lore.kernel.org/lkml/20210702191140.GA3166599@roeck-us.net/
> 
> Fixes: f183324133ea ("percpu: implement partial chunk depopulation")
> Reported-by: Guenter Roeck <linux@roeck-us.net>
> Signed-off-by: Dennis Zhou <dennis@kernel.org>

Tested-by: Guenter Roeck <linux@roeck-us.net>

Guenter

> ---
> I think I'm happier with this. It does the same thing as [2] but moves
> the flush to the caller so we can batch per chunk.
> 
> [2] https://lore.kernel.org/lkml/20210703040449.3213210-1-dennis@kernel.org/
> 
>  mm/percpu-km.c |  6 ++++++
>  mm/percpu-vm.c |  5 +++--
>  mm/percpu.c    | 29 +++++++++++++++++++++++------
>  3 files changed, 32 insertions(+), 8 deletions(-)
> 
> diff --git a/mm/percpu-km.c b/mm/percpu-km.c
> index c9d529dc7651..fe31aa19db81 100644
> --- a/mm/percpu-km.c
> +++ b/mm/percpu-km.c
> @@ -32,6 +32,12 @@
>  
>  #include <linux/log2.h>
>  
> +static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk,
> +				      int page_start, int page_end)
> +{
> +	/* nothing */
> +}
> +
>  static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
>  			       int page_start, int page_end, gfp_t gfp)
>  {
> diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c
> index ee5d89fcd66f..2054c9213c43 100644
> --- a/mm/percpu-vm.c
> +++ b/mm/percpu-vm.c
> @@ -303,6 +303,9 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
>   * For each cpu, depopulate and unmap pages [@page_start,@page_end)
>   * from @chunk.
>   *
> + * Caller is required to call pcpu_post_unmap_tlb_flush() if not returning the
> + * region back to vmalloc() which will lazily flush the tlb.
> + *
>   * CONTEXT:
>   * pcpu_alloc_mutex.
>   */
> @@ -324,8 +327,6 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
>  
>  	pcpu_unmap_pages(chunk, pages, page_start, page_end);
>  
> -	/* no need to flush tlb, vmalloc will handle it lazily */
> -
>  	pcpu_free_pages(chunk, pages, page_start, page_end);
>  }
>  
> diff --git a/mm/percpu.c b/mm/percpu.c
> index b4cebeca4c0c..8d8efd668f76 100644
> --- a/mm/percpu.c
> +++ b/mm/percpu.c
> @@ -1572,6 +1572,7 @@ static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk,
>   *
>   * pcpu_populate_chunk		- populate the specified range of a chunk
>   * pcpu_depopulate_chunk	- depopulate the specified range of a chunk
> + * pcpu_post_unmap_tlb_flush	- flush tlb for the specified range of a chunk
>   * pcpu_create_chunk		- create a new chunk
>   * pcpu_destroy_chunk		- destroy a chunk, always preceded by full depop
>   * pcpu_addr_to_page		- translate address to physical address
> @@ -1581,6 +1582,8 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
>  			       int page_start, int page_end, gfp_t gfp);
>  static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
>  				  int page_start, int page_end);
> +static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk,
> +				      int page_start, int page_end);
>  static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp);
>  static void pcpu_destroy_chunk(struct pcpu_chunk *chunk);
>  static struct page *pcpu_addr_to_page(void *addr);
> @@ -2137,11 +2140,12 @@ static void pcpu_reclaim_populated(void)
>  {
>  	struct pcpu_chunk *chunk;
>  	struct pcpu_block_md *block;
> +	int freed_page_start, freed_page_end;
>  	int i, end;
> +	bool reintegrate;
>  
>  	lockdep_assert_held(&pcpu_lock);
>  
> -restart:
>  	/*
>  	 * Once a chunk is isolated to the to_depopulate list, the chunk is no
>  	 * longer discoverable to allocations whom may populate pages.  The only
> @@ -2157,6 +2161,9 @@ static void pcpu_reclaim_populated(void)
>  		 * Scan chunk's pages in the reverse order to keep populated
>  		 * pages close to the beginning of the chunk.
>  		 */
> +		freed_page_start = chunk->nr_pages;
> +		freed_page_end = 0;
> +		reintegrate = false;
>  		for (i = chunk->nr_pages - 1, end = -1; i >= 0; i--) {
>  			/* no more work to do */
>  			if (chunk->nr_empty_pop_pages == 0)
> @@ -2164,8 +2171,8 @@ static void pcpu_reclaim_populated(void)
>  
>  			/* reintegrate chunk to prevent atomic alloc failures */
>  			if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_HIGH) {
> -				pcpu_reintegrate_chunk(chunk);
> -				goto restart;
> +				reintegrate = true;
> +				goto end_chunk;
>  			}
>  
>  			/*
> @@ -2194,16 +2201,26 @@ static void pcpu_reclaim_populated(void)
>  			spin_lock_irq(&pcpu_lock);
>  
>  			pcpu_chunk_depopulated(chunk, i + 1, end + 1);
> +			freed_page_start = min(freed_page_start, i + 1);
> +			freed_page_end = max(freed_page_end, end + 1);
>  
>  			/* reset the range and continue */
>  			end = -1;
>  		}
>  
> -		if (chunk->free_bytes == pcpu_unit_size)
> +end_chunk:
> +		/* batch tlb flush per chunk to amortize cost */
> +		if (freed_page_start < freed_page_end) {
> +			pcpu_post_unmap_tlb_flush(chunk,
> +						  freed_page_start,
> +						  freed_page_end);
> +		}
> +
> +		if (reintegrate || chunk->free_bytes == pcpu_unit_size)
>  			pcpu_reintegrate_chunk(chunk);
>  		else
> -			list_move(&chunk->list,
> -				  &pcpu_chunk_lists[pcpu_sidelined_slot]);
> +			list_move_tail(&chunk->list,
> +				       &pcpu_chunk_lists[pcpu_sidelined_slot]);
>  	}
>  }
>  
> -- 
> 2.32.0.93.g670b81a890-goog
> 


             reply	other threads:[~2021-07-03 23:03 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-03 23:02 Guenter Roeck [this message]
  -- strict thread matches above, loose matches on Subject: below --
2021-07-03 21:05 Dennis Zhou

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210703230253.GA2242521@roeck-us.net \
    --to=linux@roeck-us.net \
    --cc=cl@linux.com \
    --cc=dennis@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox