Re: [PATCH v2 5/7] mm, swap: use percpu cluster as allocation fast path

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Baoquan He <bhe@redhat.com>
To: Kairui Song <kasong@tencent.com>
Cc: linux-mm@kvack.org, Andrew Morton <akpm@linux-foundation.org>,
	Chris Li <chrisl@kernel.org>, Barry Song <v-songbaohua@oppo.com>,
	Hugh Dickins <hughd@google.com>,
	Yosry Ahmed <yosryahmed@google.com>,
	"Huang, Ying" <ying.huang@linux.alibaba.com>,
	Nhat Pham <nphamcs@gmail.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Baolin Wang <baolin.wang@linux.alibaba.com>,
	Kalesh Singh <kaleshsingh@google.com>,
	Matthew Wilcox <willy@infradead.org>,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH v2 5/7] mm, swap: use percpu cluster as allocation fast path
Date: Tue, 25 Feb 2025 12:01:32 +0800	[thread overview]
Message-ID: <Z71AnDX+LQuXhY1Y@MiWiFi-R3L-srv> (raw)
In-Reply-To: <20250224180212.22802-6-ryncsn@gmail.com>

On 02/25/25 at 02:02am, Kairui Song wrote:
......snip...
> @@ -1204,19 +1252,36 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_order)
>  	int n_ret = 0;
>  	int node;
>  
> +	/* Fast path using percpu cluster */
> +	local_lock(&percpu_swap_cluster.lock);
> +	n_ret = swap_alloc_fast(swp_entries,
> +				SWAP_HAS_CACHE,
> +				order, n_goal);

Since we always pass in the SWAP_HAS_CACHE for the 2nd parameter, we can
hard code it inside fucntion swap_alloc_fast(), unless there will be
other callsite where different value is passed in for 'usage'. And the
function 2nd parameter of swap_alloc_slow() too. Anyway, it's not a big
deal, could be my personal peference.

Other than this nit, this looks good to me.

> +	if (n_ret == n_goal)
> +		goto out;
> +
> +	n_goal = min_t(int, n_goal - n_ret, SWAP_BATCH);
> +	/* Rotate the device and switch to a new cluster */
>  	spin_lock(&swap_avail_lock);
>  start_over:
>  	node = numa_node_id();
>  	plist_for_each_entry_safe(si, next, &swap_avail_heads[node], avail_lists[node]) {
> -		/* requeue si to after same-priority siblings */
>  		plist_requeue(&si->avail_lists[node], &swap_avail_heads[node]);
>  		spin_unlock(&swap_avail_lock);
>  		if (get_swap_device_info(si)) {
> -			n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
> -					n_goal, swp_entries, order);
> +			/*
> +			 * For order 0 allocation, try best to fill the request
> +			 * as it's used by slot cache.
> +			 *
> +			 * For mTHP allocation, it always have n_goal == 1,
> +			 * and falling a mTHP swapin will just make the caller
> +			 * fallback to order 0 allocation, so just bail out.
> +			 */
> +			n_ret += scan_swap_map_slots(si, SWAP_HAS_CACHE, n_goal,
> +					swp_entries + n_ret, order);
>  			put_swap_device(si);
>  			if (n_ret || size > 1)
> -				goto check_out;
> +				goto out;
>  		}
>  
>  		spin_lock(&swap_avail_lock);
> @@ -1234,12 +1299,10 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_order)
>  		if (plist_node_empty(&next->avail_lists[node]))
>  			goto start_over;
>  	}
> -
>  	spin_unlock(&swap_avail_lock);
> -
> -check_out:
> +out:
> +	local_unlock(&percpu_swap_cluster.lock);
>  	atomic_long_sub(n_ret * size, &nr_swap_pages);
> -
>  	return n_ret;
>  }
>  
> @@ -2725,8 +2788,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
>  	arch_swap_invalidate_area(p->type);
>  	zswap_swapoff(p->type);
>  	mutex_unlock(&swapon_mutex);
> -	free_percpu(p->percpu_cluster);
> -	p->percpu_cluster = NULL;
>  	kfree(p->global_cluster);
>  	p->global_cluster = NULL;
>  	vfree(swap_map);
> @@ -3125,7 +3186,7 @@ static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si,
>  	unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
>  	struct swap_cluster_info *cluster_info;
>  	unsigned long i, j, idx;
> -	int cpu, err = -ENOMEM;
> +	int err = -ENOMEM;
>  
>  	cluster_info = kvcalloc(nr_clusters, sizeof(*cluster_info), GFP_KERNEL);
>  	if (!cluster_info)
> @@ -3134,20 +3195,7 @@ static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si,
>  	for (i = 0; i < nr_clusters; i++)
>  		spin_lock_init(&cluster_info[i].lock);
>  
> -	if (si->flags & SWP_SOLIDSTATE) {
> -		si->percpu_cluster = alloc_percpu(struct percpu_cluster);
> -		if (!si->percpu_cluster)
> -			goto err_free;
> -
> -		for_each_possible_cpu(cpu) {
> -			struct percpu_cluster *cluster;
> -
> -			cluster = per_cpu_ptr(si->percpu_cluster, cpu);
> -			for (i = 0; i < SWAP_NR_ORDERS; i++)
> -				cluster->next[i] = SWAP_ENTRY_INVALID;
> -			local_lock_init(&cluster->lock);
> -		}
> -	} else {
> +	if (!(si->flags & SWP_SOLIDSTATE)) {
>  		si->global_cluster = kmalloc(sizeof(*si->global_cluster),
>  				     GFP_KERNEL);
>  		if (!si->global_cluster)
> @@ -3424,8 +3472,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
>  bad_swap_unlock_inode:
>  	inode_unlock(inode);
>  bad_swap:
> -	free_percpu(si->percpu_cluster);
> -	si->percpu_cluster = NULL;
>  	kfree(si->global_cluster);
>  	si->global_cluster = NULL;
>  	inode = NULL;
> -- 
> 2.48.1
>

next prev parent reply	other threads:[~2025-02-25  4:01 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-24 18:02 [PATCH v2 0/7] mm, swap: remove swap slot cache Kairui Song
2025-02-24 18:02 ` [PATCH v2 1/7] mm, swap: avoid reclaiming irrelevant swap cache Kairui Song
2025-02-24 18:02 ` [PATCH v2 2/7] mm, swap: drop the flag TTRS_DIRECT Kairui Song
2025-02-24 18:02 ` [PATCH v2 3/7] mm, swap: avoid redundant swap device pinning Kairui Song
2025-02-24 18:02 ` [PATCH v2 4/7] mm, swap: don't update the counter up-front Kairui Song
2025-02-25  6:32   ` Baoquan He
2025-02-24 18:02 ` [PATCH v2 5/7] mm, swap: use percpu cluster as allocation fast path Kairui Song
2025-02-25  4:01   ` Baoquan He [this message]
2025-02-25  6:38   ` Baoquan He
2025-03-07  2:54   ` Kairui Song
2025-02-24 18:02 ` [PATCH v2 6/7] mm, swap: remove swap slot cache Kairui Song
2025-02-25  6:39   ` Baoquan He
2025-02-24 18:02 ` [PATCH v2 7/7] mm, swap: simplify folio swap allocation Kairui Song
2025-02-25  6:43   ` Baoquan He

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Z71AnDX+LQuXhY1Y@MiWiFi-R3L-srv \
    --to=bhe@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=baolin.wang@linux.alibaba.com \
    --cc=chrisl@kernel.org \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=kaleshsingh@google.com \
    --cc=kasong@tencent.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=nphamcs@gmail.com \
    --cc=v-songbaohua@oppo.com \
    --cc=willy@infradead.org \
    --cc=ying.huang@linux.alibaba.com \
    --cc=yosryahmed@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox