From: Baoquan He <bhe@redhat.com>
To: Kairui Song <kasong@tencent.com>
Cc: linux-mm@kvack.org, Andrew Morton <akpm@linux-foundation.org>,
Chris Li <chrisl@kernel.org>, Barry Song <v-songbaohua@oppo.com>,
Hugh Dickins <hughd@google.com>,
Yosry Ahmed <yosryahmed@google.com>,
"Huang, Ying" <ying.huang@linux.alibaba.com>,
Nhat Pham <nphamcs@gmail.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Baolin Wang <baolin.wang@linux.alibaba.com>,
Kalesh Singh <kaleshsingh@google.com>,
Matthew Wilcox <willy@infradead.org>,
linux-kernel@vger.kernel.org
Subject: Re: [PATCH v2 5/7] mm, swap: use percpu cluster as allocation fast path
Date: Tue, 25 Feb 2025 12:01:32 +0800 [thread overview]
Message-ID: <Z71AnDX+LQuXhY1Y@MiWiFi-R3L-srv> (raw)
In-Reply-To: <20250224180212.22802-6-ryncsn@gmail.com>
On 02/25/25 at 02:02am, Kairui Song wrote:
......snip...
> @@ -1204,19 +1252,36 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_order)
> int n_ret = 0;
> int node;
>
> + /* Fast path using percpu cluster */
> + local_lock(&percpu_swap_cluster.lock);
> + n_ret = swap_alloc_fast(swp_entries,
> + SWAP_HAS_CACHE,
> + order, n_goal);
Since we always pass in the SWAP_HAS_CACHE for the 2nd parameter, we can
hard code it inside fucntion swap_alloc_fast(), unless there will be
other callsite where different value is passed in for 'usage'. And the
function 2nd parameter of swap_alloc_slow() too. Anyway, it's not a big
deal, could be my personal peference.
Other than this nit, this looks good to me.
> + if (n_ret == n_goal)
> + goto out;
> +
> + n_goal = min_t(int, n_goal - n_ret, SWAP_BATCH);
> + /* Rotate the device and switch to a new cluster */
> spin_lock(&swap_avail_lock);
> start_over:
> node = numa_node_id();
> plist_for_each_entry_safe(si, next, &swap_avail_heads[node], avail_lists[node]) {
> - /* requeue si to after same-priority siblings */
> plist_requeue(&si->avail_lists[node], &swap_avail_heads[node]);
> spin_unlock(&swap_avail_lock);
> if (get_swap_device_info(si)) {
> - n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
> - n_goal, swp_entries, order);
> + /*
> + * For order 0 allocation, try best to fill the request
> + * as it's used by slot cache.
> + *
> + * For mTHP allocation, it always have n_goal == 1,
> + * and falling a mTHP swapin will just make the caller
> + * fallback to order 0 allocation, so just bail out.
> + */
> + n_ret += scan_swap_map_slots(si, SWAP_HAS_CACHE, n_goal,
> + swp_entries + n_ret, order);
> put_swap_device(si);
> if (n_ret || size > 1)
> - goto check_out;
> + goto out;
> }
>
> spin_lock(&swap_avail_lock);
> @@ -1234,12 +1299,10 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_order)
> if (plist_node_empty(&next->avail_lists[node]))
> goto start_over;
> }
> -
> spin_unlock(&swap_avail_lock);
> -
> -check_out:
> +out:
> + local_unlock(&percpu_swap_cluster.lock);
> atomic_long_sub(n_ret * size, &nr_swap_pages);
> -
> return n_ret;
> }
>
> @@ -2725,8 +2788,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
> arch_swap_invalidate_area(p->type);
> zswap_swapoff(p->type);
> mutex_unlock(&swapon_mutex);
> - free_percpu(p->percpu_cluster);
> - p->percpu_cluster = NULL;
> kfree(p->global_cluster);
> p->global_cluster = NULL;
> vfree(swap_map);
> @@ -3125,7 +3186,7 @@ static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si,
> unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
> struct swap_cluster_info *cluster_info;
> unsigned long i, j, idx;
> - int cpu, err = -ENOMEM;
> + int err = -ENOMEM;
>
> cluster_info = kvcalloc(nr_clusters, sizeof(*cluster_info), GFP_KERNEL);
> if (!cluster_info)
> @@ -3134,20 +3195,7 @@ static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si,
> for (i = 0; i < nr_clusters; i++)
> spin_lock_init(&cluster_info[i].lock);
>
> - if (si->flags & SWP_SOLIDSTATE) {
> - si->percpu_cluster = alloc_percpu(struct percpu_cluster);
> - if (!si->percpu_cluster)
> - goto err_free;
> -
> - for_each_possible_cpu(cpu) {
> - struct percpu_cluster *cluster;
> -
> - cluster = per_cpu_ptr(si->percpu_cluster, cpu);
> - for (i = 0; i < SWAP_NR_ORDERS; i++)
> - cluster->next[i] = SWAP_ENTRY_INVALID;
> - local_lock_init(&cluster->lock);
> - }
> - } else {
> + if (!(si->flags & SWP_SOLIDSTATE)) {
> si->global_cluster = kmalloc(sizeof(*si->global_cluster),
> GFP_KERNEL);
> if (!si->global_cluster)
> @@ -3424,8 +3472,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
> bad_swap_unlock_inode:
> inode_unlock(inode);
> bad_swap:
> - free_percpu(si->percpu_cluster);
> - si->percpu_cluster = NULL;
> kfree(si->global_cluster);
> si->global_cluster = NULL;
> inode = NULL;
> --
> 2.48.1
>
next prev parent reply other threads:[~2025-02-25 4:01 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-02-24 18:02 [PATCH v2 0/7] mm, swap: remove swap slot cache Kairui Song
2025-02-24 18:02 ` [PATCH v2 1/7] mm, swap: avoid reclaiming irrelevant swap cache Kairui Song
2025-02-24 18:02 ` [PATCH v2 2/7] mm, swap: drop the flag TTRS_DIRECT Kairui Song
2025-02-24 18:02 ` [PATCH v2 3/7] mm, swap: avoid redundant swap device pinning Kairui Song
2025-02-24 18:02 ` [PATCH v2 4/7] mm, swap: don't update the counter up-front Kairui Song
2025-02-25 6:32 ` Baoquan He
2025-02-24 18:02 ` [PATCH v2 5/7] mm, swap: use percpu cluster as allocation fast path Kairui Song
2025-02-25 4:01 ` Baoquan He [this message]
2025-02-25 6:38 ` Baoquan He
2025-03-07 2:54 ` Kairui Song
2025-02-24 18:02 ` [PATCH v2 6/7] mm, swap: remove swap slot cache Kairui Song
2025-02-25 6:39 ` Baoquan He
2025-02-24 18:02 ` [PATCH v2 7/7] mm, swap: simplify folio swap allocation Kairui Song
2025-02-25 6:43 ` Baoquan He
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=Z71AnDX+LQuXhY1Y@MiWiFi-R3L-srv \
--to=bhe@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=baolin.wang@linux.alibaba.com \
--cc=chrisl@kernel.org \
--cc=hannes@cmpxchg.org \
--cc=hughd@google.com \
--cc=kaleshsingh@google.com \
--cc=kasong@tencent.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=nphamcs@gmail.com \
--cc=v-songbaohua@oppo.com \
--cc=willy@infradead.org \
--cc=ying.huang@linux.alibaba.com \
--cc=yosryahmed@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox