Re: [PATCH v3 5/7] mm/swap: avoid a duplicated swap cache lookup for SWP_SYNCHRONOUS_IO

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: "Huang, Ying" <ying.huang@intel.com>
To: Kairui Song <ryncsn@gmail.com>
Cc: linux-mm@kvack.org,  Kairui Song <kasong@tencent.com>,
	 Andrew Morton <akpm@linux-foundation.org>,
	 Chris Li <chrisl@kernel.org>,  Hugh Dickins <hughd@google.com>,
	 Johannes Weiner <hannes@cmpxchg.org>,
	 Matthew Wilcox <willy@infradead.org>,
	 Michal Hocko <mhocko@suse.com>,
	 Yosry Ahmed <yosryahmed@google.com>,
	 David Hildenbrand <david@redhat.com>,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH v3 5/7] mm/swap: avoid a duplicated swap cache lookup for SWP_SYNCHRONOUS_IO
Date: Tue, 30 Jan 2024 14:51:48 +0800	[thread overview]
Message-ID: <87jznrgvp7.fsf@yhuang6-desk2.ccr.corp.intel.com> (raw)
In-Reply-To: <20240129175423.1987-6-ryncsn@gmail.com> (Kairui Song's message of "Tue, 30 Jan 2024 01:54:20 +0800")

Kairui Song <ryncsn@gmail.com> writes:

> From: Kairui Song <kasong@tencent.com>
>
> When a xa_value is returned by the cache lookup, keep it to be used
> later for workingset refault check instead of doing the looking up again
> in swapin_no_readahead.
>
> Shadow look up and workingset check is skipped for swapoff to reduce
> overhead, workingset checking for anon pages upon swapoff is not
> helpful, simply consider all pages as inactive make more sense since
> swapoff doesn't mean pages is being accessed.
>
> After this commit, swappin is about 4% faster for ZRAM, micro benchmark
> result which use madvise to swap out 10G zero-filled data to ZRAM then
> read them in:
>
> Before: 11143285 us
> After:  10692644 us (+4.1%)
>
> Signed-off-by: Kairui Song <kasong@tencent.com>

LGTM, Thanks!

Reviewed-by: "Huang, Ying" <ying.huang@intel.com>

> ---
>  mm/memory.c     |  5 +++--
>  mm/shmem.c      |  2 +-
>  mm/swap.h       | 11 ++++++-----
>  mm/swap_state.c | 23 +++++++++++++----------
>  mm/swapfile.c   |  4 ++--
>  5 files changed, 25 insertions(+), 20 deletions(-)
>
> diff --git a/mm/memory.c b/mm/memory.c
> index 8711f8a07039..349946899f8d 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -3800,6 +3800,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
>  	struct swap_info_struct *si = NULL;
>  	rmap_t rmap_flags = RMAP_NONE;
>  	bool exclusive = false;
> +	void *shadow = NULL;
>  	swp_entry_t entry;
>  	pte_t pte;
>  	vm_fault_t ret = 0;
> @@ -3858,14 +3859,14 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
>  	if (unlikely(!si))
>  		goto out;
>  
> -	folio = swap_cache_get_folio(entry, vma, vmf->address);
> +	folio = swap_cache_get_folio(entry, vma, vmf->address, &shadow);
>  	if (folio)
>  		page = folio_file_page(folio, swp_offset(entry));
>  	swapcache = folio;
>  
>  	if (!folio) {
>  		folio = swapin_entry(entry, GFP_HIGHUSER_MOVABLE,
> -				     vmf, &swapcache);
> +				     vmf, &swapcache, shadow);
>  		if (!folio) {
>  			/*
>  			 * Back out if somebody else faulted in this pte
> diff --git a/mm/shmem.c b/mm/shmem.c
> index d7c84ff62186..698a31bf7baa 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -1873,7 +1873,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
>  	}
>  
>  	/* Look it up and read it in.. */
> -	folio = swap_cache_get_folio(swap, NULL, 0);
> +	folio = swap_cache_get_folio(swap, NULL, 0, NULL);
>  	if (!folio) {
>  		/* Or update major stats only when swapin succeeds?? */
>  		if (fault_type) {
> diff --git a/mm/swap.h b/mm/swap.h
> index 8f8185d3865c..ca9cb472a263 100644
> --- a/mm/swap.h
> +++ b/mm/swap.h
> @@ -42,7 +42,8 @@ void delete_from_swap_cache(struct folio *folio);
>  void clear_shadow_from_swap_cache(int type, unsigned long begin,
>  				  unsigned long end);
>  struct folio *swap_cache_get_folio(swp_entry_t entry,
> -		struct vm_area_struct *vma, unsigned long addr);
> +		struct vm_area_struct *vma, unsigned long addr,
> +		void **shadowp);
>  struct folio *filemap_get_incore_folio(struct address_space *mapping,
>  		pgoff_t index);
>  
> @@ -54,8 +55,8 @@ struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_flags,
>  		bool skip_if_exists);
>  struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t flag,
>  		struct mempolicy *mpol, pgoff_t ilx);
> -struct folio *swapin_entry(swp_entry_t entry, gfp_t flag,
> -			   struct vm_fault *vmf, struct folio **swapcached);
> +struct folio *swapin_entry(swp_entry_t entry, gfp_t flag, struct vm_fault *vmf,
> +		struct folio **swapcached, void *shadow);
>  
>  static inline unsigned int folio_swap_flags(struct folio *folio)
>  {
> @@ -87,7 +88,7 @@ static inline struct folio *swap_cluster_readahead(swp_entry_t entry,
>  }
>  
>  static inline struct folio *swapin_entry(swp_entry_t swp, gfp_t gfp_mask,
> -			struct vm_fault *vmf, struct folio **swapcached)
> +			struct vm_fault *vmf, struct folio **swapcached, void *shadow)
>  {
>  	return NULL;
>  }
> @@ -98,7 +99,7 @@ static inline int swap_writepage(struct page *p, struct writeback_control *wbc)
>  }
>  
>  static inline struct folio *swap_cache_get_folio(swp_entry_t entry,
> -		struct vm_area_struct *vma, unsigned long addr)
> +		struct vm_area_struct *vma, unsigned long addr, void **shadowp)
>  {
>  	return NULL;
>  }
> diff --git a/mm/swap_state.c b/mm/swap_state.c
> index 5e06b2e140d4..e41a137a6123 100644
> --- a/mm/swap_state.c
> +++ b/mm/swap_state.c
> @@ -330,12 +330,18 @@ static inline bool swap_use_vma_readahead(void)
>   * Caller must lock the swap device or hold a reference to keep it valid.
>   */
>  struct folio *swap_cache_get_folio(swp_entry_t entry,
> -		struct vm_area_struct *vma, unsigned long addr)
> +		struct vm_area_struct *vma, unsigned long addr, void **shadowp)
>  {
>  	struct folio *folio;
>  
> -	folio = filemap_get_folio(swap_address_space(entry), swp_offset(entry));
> -	if (!IS_ERR(folio)) {
> +	folio = filemap_get_entry(swap_address_space(entry), swp_offset(entry));
> +	if (xa_is_value(folio)) {
> +		if (shadowp)
> +			*shadowp = folio;
> +		return NULL;
> +	}
> +
> +	if (folio) {
>  		bool vma_ra = swap_use_vma_readahead();
>  		bool readahead;
>  
> @@ -365,8 +371,6 @@ struct folio *swap_cache_get_folio(swp_entry_t entry,
>  			if (!vma || !vma_ra)
>  				atomic_inc(&swapin_readahead_hits);
>  		}
> -	} else {
> -		folio = NULL;
>  	}
>  
>  	return folio;
> @@ -866,16 +870,16 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
>   * @entry: swap entry of this memory
>   * @gfp_mask: memory allocation flags
>   * @vmf: fault information
> + * @shadow: workingset shadow corresponding to entry
>   *
>   * Returns the struct folio for entry and addr after the swap entry is read
>   * in.
>   */
>  static struct folio *swapin_direct(swp_entry_t entry, gfp_t gfp_mask,
> -				  struct vm_fault *vmf)
> +				  struct vm_fault *vmf, void *shadow)
>  {
>  	struct vm_area_struct *vma = vmf->vma;
>  	struct folio *folio;
> -	void *shadow = NULL;
>  
>  	/* skip swapcache */
>  	folio = vma_alloc_folio(gfp_mask, 0,
> @@ -892,7 +896,6 @@ static struct folio *swapin_direct(swp_entry_t entry, gfp_t gfp_mask,
>  		}
>  		mem_cgroup_swapin_uncharge_swap(entry);
>  
> -		shadow = get_shadow_from_swap_cache(entry);
>  		if (shadow)
>  			workingset_refault(folio, shadow);
>  
> @@ -922,7 +925,7 @@ static struct folio *swapin_direct(swp_entry_t entry, gfp_t gfp_mask,
>   * or skip the readahead(ie, ramdisk based swap device).
>   */
>  struct folio *swapin_entry(swp_entry_t entry, gfp_t gfp_mask,
> -			   struct vm_fault *vmf, struct folio **swapcache)
> +			   struct vm_fault *vmf, struct folio **swapcache, void *shadow)
>  {
>  	struct mempolicy *mpol;
>  	struct folio *folio;
> @@ -930,7 +933,7 @@ struct folio *swapin_entry(swp_entry_t entry, gfp_t gfp_mask,
>  
>  	if (data_race(swp_swap_info(entry)->flags & SWP_SYNCHRONOUS_IO) &&
>  	    __swap_count(entry) == 1) {
> -		folio = swapin_direct(entry, gfp_mask, vmf);
> +		folio = swapin_direct(entry, gfp_mask, vmf, shadow);
>  	} else {
>  		mpol = get_vma_policy(vmf->vma, vmf->address, 0, &ilx);
>  		if (swap_use_vma_readahead())
> diff --git a/mm/swapfile.c b/mm/swapfile.c
> index 1cf7e72e19e3..aac26f5a6cec 100644
> --- a/mm/swapfile.c
> +++ b/mm/swapfile.c
> @@ -1865,7 +1865,7 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
>  		pte_unmap(pte);
>  		pte = NULL;
>  
> -		folio = swap_cache_get_folio(entry, vma, addr);
> +		folio = swap_cache_get_folio(entry, vma, addr, NULL);
>  		if (!folio) {
>  			struct vm_fault vmf = {
>  				.vma = vma,
> @@ -1875,7 +1875,7 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
>  			};
>  
>  			folio = swapin_entry(entry, GFP_HIGHUSER_MOVABLE,
> -					    &vmf, NULL);
> +					    &vmf, NULL, NULL);
>  		}
>  		if (!folio) {
>  			/*

next prev parent reply	other threads:[~2024-01-30  6:53 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-29 17:54 [PATCH v3 0/7] swapin refactor for optimization and unified readahead Kairui Song
2024-01-29 17:54 ` [PATCH v3 1/7] mm/swapfile.c: add back some comment Kairui Song
2024-01-29 17:54 ` [PATCH v3 2/7] mm/swap: move no readahead swapin code to a stand-alone helper Kairui Song
2024-01-30  5:38   ` Huang, Ying
2024-01-30  5:55     ` Kairui Song
2024-01-29 17:54 ` [PATCH v3 3/7] mm/swap: always account swapped in page into current memcg Kairui Song
2024-01-30  6:12   ` Huang, Ying
2024-01-30  7:01     ` Kairui Song
2024-01-30  7:03       ` Kairui Song
2024-01-29 17:54 ` [PATCH v3 4/7] mm/swap: introduce swapin_entry for unified readahead policy Kairui Song
2024-01-30  6:29   ` Huang, Ying
2024-01-29 17:54 ` [PATCH v3 5/7] mm/swap: avoid a duplicated swap cache lookup for SWP_SYNCHRONOUS_IO Kairui Song
2024-01-30  6:51   ` Huang, Ying [this message]
2024-01-29 17:54 ` [PATCH v3 6/7] mm/swap, shmem: use unified swapin helper for shmem Kairui Song
2024-01-31  2:51   ` Whether is the race for SWP_SYNCHRONOUS_IO possible? (was Re: [PATCH v3 6/7] mm/swap, shmem: use unified swapin helper for shmem) Huang, Ying
2024-01-31  3:58     ` Kairui Song
2024-01-31 23:45       ` Chris Li
2024-02-01  0:52         ` Huang, Ying
2024-01-31 23:38     ` Chris Li
2024-01-29 17:54 ` [PATCH v3 7/7] mm/swap: refactor swap_cache_get_folio Kairui Song

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87jznrgvp7.fsf@yhuang6-desk2.ccr.corp.intel.com \
    --to=ying.huang@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=chrisl@kernel.org \
    --cc=david@redhat.com \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=kasong@tencent.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.com \
    --cc=ryncsn@gmail.com \
    --cc=willy@infradead.org \
    --cc=yosryahmed@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox