linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Baolin Wang <baolin.wang@linux.alibaba.com>
To: Kairui Song <kasong@tencent.com>, linux-mm@kvack.org
Cc: Andrew Morton <akpm@linux-foundation.org>,
	Hugh Dickins <hughd@google.com>,
	Matthew Wilcox <willy@infradead.org>,
	Kemeng Shi <shikemeng@huaweicloud.com>,
	Chris Li <chrisl@kernel.org>, Nhat Pham <nphamcs@gmail.com>,
	Baoquan He <bhe@redhat.com>, Barry Song <baohua@kernel.org>,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH v4 6/9] mm/shmem, swap: never use swap cache and readahead for SWP_SYNCHRONOUS_IO
Date: Mon, 7 Jul 2025 16:05:39 +0800	[thread overview]
Message-ID: <2b1c5548-bf66-4d4a-a379-d9c6bf35283c@linux.alibaba.com> (raw)
In-Reply-To: <20250704181748.63181-7-ryncsn@gmail.com>



On 2025/7/5 02:17, Kairui Song wrote:
> From: Kairui Song <kasong@tencent.com>
> 
> Currently if a THP swapin failed due to reasons like partially
> conflicting swap cache or ZSWAP enabled, it will fallback to
> cached swapin.
> 
> Right now the swap cache still has a non-trivial overhead, and readahead
> is not helpful for SWP_SYNCHRONOUS_IO devices, so we should always skip
> the readahead and swap cache even if the swapin falls back to order 0.
> 
> So handle the fallback logic without falling back to the cached read.
> 
> Signed-off-by: Kairui Song <kasong@tencent.com>
> ---
>   mm/shmem.c | 55 +++++++++++++++++++++++++++++++++++-------------------
>   1 file changed, 36 insertions(+), 19 deletions(-)
> 
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 2ab214e2771c..1fe9a3eb92b1 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -1975,13 +1975,16 @@ static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf,
>   	return ERR_PTR(error);
>   }
>   
> -static struct folio *shmem_swap_alloc_folio(struct inode *inode,
> +static struct folio *shmem_swapin_direct(struct inode *inode,
>   		struct vm_area_struct *vma, pgoff_t index,
> -		swp_entry_t entry, int order, gfp_t gfp)
> +		swp_entry_t swap, swp_entry_t index_entry,

IMO, 'swap' and 'index_entry' are confusing, and it's easy to be unclear 
about their roles. I suggest only passing the original swap value. If it 
falls back to order 0, the swap value can be recalculated, which is more 
readable as well as maintaining the independence of the function.

> +		int order, gfp_t gfp)
>   {
>   	struct shmem_inode_info *info = SHMEM_I(inode);
> +	swp_entry_t entry = index_entry;
>   	int nr_pages = 1 << order;
>   	struct folio *new;
> +	gfp_t alloc_gfp;
>   	void *shadow;
>   
>   	/*
> @@ -1989,6 +1992,7 @@ static struct folio *shmem_swap_alloc_folio(struct inode *inode,
>   	 * limit chance of success with further cpuset and node constraints.
>   	 */
>   	gfp &= ~GFP_CONSTRAINT_MASK;
> +	alloc_gfp = gfp;
>   	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
>   		if (WARN_ON_ONCE(order))
>   			return ERR_PTR(-EINVAL);
> @@ -2003,19 +2007,22 @@ static struct folio *shmem_swap_alloc_folio(struct inode *inode,
>   		if ((vma && unlikely(userfaultfd_armed(vma))) ||
>   		     !zswap_never_enabled() ||
>   		     non_swapcache_batch(entry, nr_pages) != nr_pages)
> -			return ERR_PTR(-EINVAL);
> +			goto fallback;
>   
> -		gfp = limit_gfp_mask(vma_thp_gfp_mask(vma), gfp);
> +		alloc_gfp = limit_gfp_mask(vma_thp_gfp_mask(vma), gfp);
> +	}
> +retry:
> +	new = shmem_alloc_folio(alloc_gfp, order, info, index);
> +	if (!new) {
> +		new = ERR_PTR(-ENOMEM);
> +		goto fallback;
>   	}
> -
> -	new = shmem_alloc_folio(gfp, order, info, index);
> -	if (!new)
> -		return ERR_PTR(-ENOMEM);
>   
>   	if (mem_cgroup_swapin_charge_folio(new, vma ? vma->vm_mm : NULL,
> -					   gfp, entry)) {
> +					   alloc_gfp, entry)) {
>   		folio_put(new);
> -		return ERR_PTR(-ENOMEM);
> +		new = ERR_PTR(-ENOMEM);
> +		goto fallback;
>   	}
>   
>   	/*
> @@ -2030,7 +2037,9 @@ static struct folio *shmem_swap_alloc_folio(struct inode *inode,
>   	 */
>   	if (swapcache_prepare(entry, nr_pages)) {
>   		folio_put(new);
> -		return ERR_PTR(-EEXIST);
> +		new = ERR_PTR(-EEXIST);
> +		/* Try smaller folio to avoid cache conflict */
> +		goto fallback;
>   	}
>   
>   	__folio_set_locked(new);
> @@ -2044,6 +2053,15 @@ static struct folio *shmem_swap_alloc_folio(struct inode *inode,
>   	folio_add_lru(new);
>   	swap_read_folio(new, NULL);
>   	return new;
> +fallback:
> +	/* Order 0 swapin failed, nothing to fallback to, abort */
> +	if (!order)
> +		return new;
> +	order = 0;
> +	nr_pages = 1;
> +	alloc_gfp = gfp;
> +	entry = swap;
> +	goto retry;
>   }
>   
>   /*
> @@ -2309,25 +2327,24 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
>   			count_vm_event(PGMAJFAULT);
>   			count_memcg_event_mm(fault_mm, PGMAJFAULT);
>   		}
> -

Nit: do not add unnecessary change.

>   		/* Skip swapcache for synchronous device. */
>   		if (data_race(si->flags & SWP_SYNCHRONOUS_IO)) {
> -			folio = shmem_swap_alloc_folio(inode, vma, index,
> -						       index_entry, order, gfp);
> +			folio = shmem_swapin_direct(inode, vma, index, swap,
> +						    index_entry, order, gfp);
>   			if (!IS_ERR(folio)) {
> -				swap = index_entry;
> +				if (folio_test_large(folio))
> +					swap = index_entry;
>   				skip_swapcache = true;
>   				goto alloced;
>   			}
>   
>   			/*
> -			 * Fallback to swapin order-0 folio unless the swap entry
> -			 * already exists.
> +			 * Direct swapin handled order 0 fallback already,
> +			 * if it failed, abort.
>   			 */
>   			error = PTR_ERR(folio);
>   			folio = NULL;
> -			if (error == -EEXIST)
> -				goto failed;
> +			goto failed;
>   		}
>   		/* Cached swapin with readahead, only supports order 0 */
>   		folio = shmem_swapin_cluster(swap, gfp, info, index);



  reply	other threads:[~2025-07-07  8:05 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-07-04 18:17 [PATCH v4 0/9] mm/shmem, swap: bugfix and improvement of mTHP swap-in Kairui Song
2025-07-04 18:17 ` [PATCH v4 1/9] mm/shmem, swap: improve cached mTHP handling and fix potential hung Kairui Song
2025-07-04 18:17 ` [PATCH v4 2/9] mm/shmem, swap: avoid redundant Xarray lookup during swapin Kairui Song
2025-07-04 18:17 ` [PATCH v4 3/9] mm/shmem, swap: tidy up THP swapin checks Kairui Song
2025-07-04 18:17 ` [PATCH v4 4/9] mm/shmem, swap: tidy up swap entry splitting Kairui Song
2025-07-06  3:35   ` Baolin Wang
2025-07-06 11:50     ` Kairui Song
2025-07-04 18:17 ` [PATCH v4 5/9] mm/shmem, swap: avoid false positive swap cache lookup Kairui Song
2025-07-07  7:53   ` Baolin Wang
2025-07-07  8:04     ` Kairui Song
2025-07-08  6:00       ` Baolin Wang
2025-07-04 18:17 ` [PATCH v4 6/9] mm/shmem, swap: never use swap cache and readahead for SWP_SYNCHRONOUS_IO Kairui Song
2025-07-07  8:05   ` Baolin Wang [this message]
2025-07-04 18:17 ` [PATCH v4 7/9] mm/shmem, swap: simplify swapin path and result handling Kairui Song
2025-07-07  8:14   ` Baolin Wang
2025-07-04 18:17 ` [PATCH v4 8/9] mm/shmem, swap: simplify swap entry and index calculation of large swapin Kairui Song
2025-07-04 18:17 ` [PATCH v4 9/9] mm/shmem, swap: fix major fault counting Kairui Song

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2b1c5548-bf66-4d4a-a379-d9c6bf35283c@linux.alibaba.com \
    --to=baolin.wang@linux.alibaba.com \
    --cc=akpm@linux-foundation.org \
    --cc=baohua@kernel.org \
    --cc=bhe@redhat.com \
    --cc=chrisl@kernel.org \
    --cc=hughd@google.com \
    --cc=kasong@tencent.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=nphamcs@gmail.com \
    --cc=shikemeng@huaweicloud.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox