From: Nhat Pham <nphamcs@gmail.com>
To: linux-mm@kvack.org
Cc: akpm@linux-foundation.org, hannes@cmpxchg.org, hughd@google.com,
yosry.ahmed@linux.dev, mhocko@kernel.org,
roman.gushchin@linux.dev, shakeel.butt@linux.dev,
muchun.song@linux.dev, len.brown@intel.com,
chengming.zhou@linux.dev, kasong@tencent.com, chrisl@kernel.org,
huang.ying.caritas@gmail.com, ryan.roberts@arm.com,
viro@zeniv.linux.org.uk, baohua@kernel.org, osalvador@suse.de,
lorenzo.stoakes@oracle.com, christophe.leroy@csgroup.eu,
pavel@kernel.org, kernel-team@meta.com,
linux-kernel@vger.kernel.org, cgroups@vger.kernel.org,
linux-pm@vger.kernel.org, peterx@redhat.com
Subject: [RFC PATCH v2 07/18] mm: swap: zswap: swap cache and zswap support for virtualized swap
Date: Tue, 29 Apr 2025 16:38:35 -0700 [thread overview]
Message-ID: <20250429233848.3093350-8-nphamcs@gmail.com> (raw)
In-Reply-To: <20250429233848.3093350-1-nphamcs@gmail.com>
Currently, the swap cache code assumes that the swap space is of a fixed
size. The virtual swap space is dynamically sized, so the existing
partitioning code cannot be easily reused. A dynamic partitioning is
planned, but for now keep the design simple and just use a flat
swapcache for vswap.
Similar to swap cache, the zswap tree code, specifically the range
partition logic, can no longer easily be reused for the new virtual swap
space design. Use a simple unified zswap tree in the new implementation
for now. As in the case of swap cache, range partitioning is planned as
a follow up work.
Since the vswap's implementation has begun to diverge from the old
implementation, we also introduce a new build config
(CONFIG_VIRTUAL_SWAP). Users who do not select this config will get the
old implementation, with no behavioral change.
Signed-off-by: Nhat Pham <nphamcs@gmail.com>
---
mm/swap.h | 22 ++++++++++++++--------
mm/swap_state.c | 44 +++++++++++++++++++++++++++++++++++---------
mm/zswap.c | 38 ++++++++++++++++++++++++++++++++------
3 files changed, 81 insertions(+), 23 deletions(-)
diff --git a/mm/swap.h b/mm/swap.h
index d5f8effa8015..06e20b1d79c4 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -22,22 +22,27 @@ void swap_write_unplug(struct swap_iocb *sio);
int swap_writepage(struct page *page, struct writeback_control *wbc);
void __swap_writepage(struct folio *folio, struct writeback_control *wbc);
-/* linux/mm/swap_state.c */
-/* One swap address space for each 64M swap space */
+/* Return the swap device position of the swap slot. */
+static inline loff_t swap_slot_pos(swp_slot_t slot)
+{
+ return ((loff_t)swp_slot_offset(slot)) << PAGE_SHIFT;
+}
+
#define SWAP_ADDRESS_SPACE_SHIFT 14
#define SWAP_ADDRESS_SPACE_PAGES (1 << SWAP_ADDRESS_SPACE_SHIFT)
#define SWAP_ADDRESS_SPACE_MASK (SWAP_ADDRESS_SPACE_PAGES - 1)
+
+/* linux/mm/swap_state.c */
+#ifdef CONFIG_VIRTUAL_SWAP
+extern struct address_space *swap_address_space(swp_entry_t entry);
+#define swap_cache_index(entry) entry.val
+#else
+/* One swap address space for each 64M swap space */
extern struct address_space *swapper_spaces[];
#define swap_address_space(entry) \
(&swapper_spaces[swp_type(entry)][swp_offset(entry) \
>> SWAP_ADDRESS_SPACE_SHIFT])
-/* Return the swap device position of the swap slot. */
-static inline loff_t swap_slot_pos(swp_slot_t slot)
-{
- return ((loff_t)swp_slot_offset(slot)) << PAGE_SHIFT;
-}
-
/*
* Return the swap cache index of the swap entry.
*/
@@ -46,6 +51,7 @@ static inline pgoff_t swap_cache_index(swp_entry_t entry)
BUILD_BUG_ON((SWP_OFFSET_MASK | SWAP_ADDRESS_SPACE_MASK) != SWP_OFFSET_MASK);
return swp_offset(entry) & SWAP_ADDRESS_SPACE_MASK;
}
+#endif
void show_swap_cache_info(void);
bool add_to_swap(struct folio *folio);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 1607d23a3d7b..f677ebf9c5d0 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -38,8 +38,18 @@ static const struct address_space_operations swap_aops = {
#endif
};
+#ifdef CONFIG_VIRTUAL_SWAP
+static struct address_space swapper_space __read_mostly;
+
+struct address_space *swap_address_space(swp_entry_t entry)
+{
+ return &swapper_space;
+}
+#else
struct address_space *swapper_spaces[MAX_SWAPFILES] __read_mostly;
static unsigned int nr_swapper_spaces[MAX_SWAPFILES] __read_mostly;
+#endif
+
static bool enable_vma_readahead __read_mostly = true;
#define SWAP_RA_ORDER_CEILING 5
@@ -718,23 +728,34 @@ struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
return folio;
}
+static void init_swapper_space(struct address_space *space)
+{
+ xa_init_flags(&space->i_pages, XA_FLAGS_LOCK_IRQ);
+ atomic_set(&space->i_mmap_writable, 0);
+ space->a_ops = &swap_aops;
+ /* swap cache doesn't use writeback related tags */
+ mapping_set_no_writeback_tags(space);
+}
+
+#ifdef CONFIG_VIRTUAL_SWAP
+int init_swap_address_space(unsigned int type, unsigned long nr_pages)
+{
+ return 0;
+}
+
+void exit_swap_address_space(unsigned int type) {}
+#else
int init_swap_address_space(unsigned int type, unsigned long nr_pages)
{
- struct address_space *spaces, *space;
+ struct address_space *spaces;
unsigned int i, nr;
nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES);
spaces = kvcalloc(nr, sizeof(struct address_space), GFP_KERNEL);
if (!spaces)
return -ENOMEM;
- for (i = 0; i < nr; i++) {
- space = spaces + i;
- xa_init_flags(&space->i_pages, XA_FLAGS_LOCK_IRQ);
- atomic_set(&space->i_mmap_writable, 0);
- space->a_ops = &swap_aops;
- /* swap cache doesn't use writeback related tags */
- mapping_set_no_writeback_tags(space);
- }
+ for (i = 0; i < nr; i++)
+ init_swapper_space(spaces + i);
nr_swapper_spaces[type] = nr;
swapper_spaces[type] = spaces;
@@ -752,6 +773,7 @@ void exit_swap_address_space(unsigned int type)
nr_swapper_spaces[type] = 0;
swapper_spaces[type] = NULL;
}
+#endif
static int swap_vma_ra_win(struct vm_fault *vmf, unsigned long *start,
unsigned long *end)
@@ -930,6 +952,10 @@ static int __init swap_init_sysfs(void)
int err;
struct kobject *swap_kobj;
+#ifdef CONFIG_VIRTUAL_SWAP
+ init_swapper_space(&swapper_space);
+#endif
+
err = vswap_init();
if (err) {
pr_err("failed to initialize virtual swap space\n");
diff --git a/mm/zswap.c b/mm/zswap.c
index 23365e76a3ce..c1327569ce80 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -203,8 +203,6 @@ struct zswap_entry {
struct list_head lru;
};
-static struct xarray *zswap_trees[MAX_SWAPFILES];
-static unsigned int nr_zswap_trees[MAX_SWAPFILES];
/* RCU-protected iteration */
static LIST_HEAD(zswap_pools);
@@ -231,12 +229,28 @@ static bool zswap_has_pool;
* helpers and fwd declarations
**********************************/
+#ifdef CONFIG_VIRTUAL_SWAP
+static DEFINE_XARRAY(zswap_tree);
+
+static inline struct xarray *swap_zswap_tree(swp_entry_t swp)
+{
+ return &zswap_tree;
+}
+
+#define zswap_tree_index(entry) entry.val
+#else
+static struct xarray *zswap_trees[MAX_SWAPFILES];
+static unsigned int nr_zswap_trees[MAX_SWAPFILES];
+
static inline struct xarray *swap_zswap_tree(swp_entry_t swp)
{
return &zswap_trees[swp_type(swp)][swp_offset(swp)
>> SWAP_ADDRESS_SPACE_SHIFT];
}
+#define zswap_tree_index(entry) swp_offset(entry)
+#endif
+
#define zswap_pool_debug(msg, p) \
pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \
zpool_get_type((p)->zpool))
@@ -1047,7 +1061,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
swp_entry_t swpentry)
{
struct xarray *tree;
- pgoff_t offset = swp_offset(swpentry);
+ pgoff_t offset = zswap_tree_index(swpentry);
struct folio *folio;
struct mempolicy *mpol;
bool folio_was_allocated;
@@ -1463,7 +1477,7 @@ static bool zswap_store_page(struct page *page,
goto compress_failed;
old = xa_store(swap_zswap_tree(page_swpentry),
- swp_offset(page_swpentry),
+ zswap_tree_index(page_swpentry),
entry, GFP_KERNEL);
if (xa_is_err(old)) {
int err = xa_err(old);
@@ -1612,7 +1626,7 @@ bool zswap_store(struct folio *folio)
bool zswap_load(struct folio *folio)
{
swp_entry_t swp = folio->swap;
- pgoff_t offset = swp_offset(swp);
+ pgoff_t offset = zswap_tree_index(swp);
bool swapcache = folio_test_swapcache(folio);
struct xarray *tree = swap_zswap_tree(swp);
struct zswap_entry *entry;
@@ -1670,7 +1684,7 @@ bool zswap_load(struct folio *folio)
void zswap_invalidate(swp_entry_t swp)
{
- pgoff_t offset = swp_offset(swp);
+ pgoff_t offset = zswap_tree_index(swp);
struct xarray *tree = swap_zswap_tree(swp);
struct zswap_entry *entry;
@@ -1682,6 +1696,16 @@ void zswap_invalidate(swp_entry_t swp)
zswap_entry_free(entry);
}
+#ifdef CONFIG_VIRTUAL_SWAP
+int zswap_swapon(int type, unsigned long nr_pages)
+{
+ return 0;
+}
+
+void zswap_swapoff(int type)
+{
+}
+#else
int zswap_swapon(int type, unsigned long nr_pages)
{
struct xarray *trees, *tree;
@@ -1718,6 +1742,8 @@ void zswap_swapoff(int type)
nr_zswap_trees[type] = 0;
zswap_trees[type] = NULL;
}
+#endif /* CONFIG_VIRTUAL_SWAP */
+
/*********************************
* debugfs functions
--
2.47.1
next prev parent reply other threads:[~2025-04-30 0:54 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-29 23:38 [RFC PATCH v2 00/18] Virtual Swap Space Nhat Pham
2025-04-29 23:38 ` [RFC PATCH v2 01/18] swap: rearrange the swap header file Nhat Pham
2025-04-29 23:38 ` [RFC PATCH v2 02/18] swapfile: rearrange functions Nhat Pham
2025-04-29 23:38 ` [RFC PATCH v2 03/18] swapfile: rearrange freeing steps Nhat Pham
2025-04-29 23:38 ` [RFC PATCH v2 04/18] mm: swap: add an abstract API for locking out swapoff Nhat Pham
2025-04-29 23:38 ` [RFC PATCH v2 05/18] mm: swap: add a separate type for physical swap slots Nhat Pham
2025-04-29 23:38 ` [RFC PATCH v2 06/18] mm: create scaffolds for the new virtual swap implementation Nhat Pham
2025-04-29 23:38 ` Nhat Pham [this message]
2025-04-29 23:38 ` [RFC PATCH v2 08/18] mm: swap: allocate a virtual swap slot for each swapped out page Nhat Pham
2025-04-29 23:38 ` [RFC PATCH v2 09/18] swap: implement the swap_cgroup API using virtual swap Nhat Pham
2025-04-29 23:38 ` [RFC PATCH v2 10/18] swap: manage swap entry lifetime at the virtual swap layer Nhat Pham
2025-04-29 23:38 ` [RFC PATCH v2 11/18] mm: swap: temporarily disable THP swapin and batched freeing swap Nhat Pham
2025-04-29 23:38 ` [RFC PATCH v2 12/18] mm: swap: decouple virtual swap slot from backing store Nhat Pham
2025-04-29 23:38 ` [RFC PATCH v2 13/18] zswap: do not start zswap shrinker if there is no physical swap slots Nhat Pham
2025-04-29 23:38 ` [RFC PATCH v2 14/18] memcg: swap: only charge " Nhat Pham
2025-04-29 23:38 ` [RFC PATCH v2 15/18] vswap: support THP swapin and batch free_swap_and_cache Nhat Pham
2025-04-29 23:38 ` [RFC PATCH v2 16/18] swap: simplify swapoff using virtual swap Nhat Pham
2025-04-29 23:38 ` [RFC PATCH v2 17/18] swapfile: move zeromap setup out of enable_swap_info Nhat Pham
2025-04-29 23:38 ` [RFC PATCH v2 18/18] swapfile: remove zeromap in virtual swap implementation Nhat Pham
2025-04-29 23:51 ` [RFC PATCH v2 00/18] Virtual Swap Space Nhat Pham
2025-05-30 6:47 ` YoungJun Park
2025-05-30 16:52 ` Nhat Pham
2025-05-30 16:54 ` Nhat Pham
2025-06-01 12:56 ` YoungJun Park
2025-06-01 16:14 ` Kairui Song
2025-06-02 15:17 ` YoungJun Park
2025-06-02 18:29 ` Nhat Pham
2025-06-03 9:50 ` Kairui Song
2025-06-01 21:08 ` Nhat Pham
2025-06-02 15:03 ` YoungJun Park
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250429233848.3093350-8-nphamcs@gmail.com \
--to=nphamcs@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=baohua@kernel.org \
--cc=cgroups@vger.kernel.org \
--cc=chengming.zhou@linux.dev \
--cc=chrisl@kernel.org \
--cc=christophe.leroy@csgroup.eu \
--cc=hannes@cmpxchg.org \
--cc=huang.ying.caritas@gmail.com \
--cc=hughd@google.com \
--cc=kasong@tencent.com \
--cc=kernel-team@meta.com \
--cc=len.brown@intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-pm@vger.kernel.org \
--cc=lorenzo.stoakes@oracle.com \
--cc=mhocko@kernel.org \
--cc=muchun.song@linux.dev \
--cc=osalvador@suse.de \
--cc=pavel@kernel.org \
--cc=peterx@redhat.com \
--cc=roman.gushchin@linux.dev \
--cc=ryan.roberts@arm.com \
--cc=shakeel.butt@linux.dev \
--cc=viro@zeniv.linux.org.uk \
--cc=yosry.ahmed@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox