linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] mm/swap: use swap_ops to register swap device's methods
@ 2026-03-02 10:40 Baoquan He
  2026-03-02 10:40 ` [PATCH 1/3] mm/swap: rename mm/page_io.c to mm/swap_io.c Baoquan He
                   ` (3 more replies)
  0 siblings, 4 replies; 13+ messages in thread
From: Baoquan He @ 2026-03-02 10:40 UTC (permalink / raw)
  To: linux-mm
  Cc: akpm, chrisl, kasong, shikemeng, nphamcs, baohua, youngjun.park,
	Baoquan He

This can simplify the code logic and benefit any new type of swap device
added later.

And also do renaming in this patchset:
-------
   file renaming:
   ---
   mm/page_io.c to mm/swap_io.c

   function renaming:
   ---
   swap_writepage_* to swap_write_folio_* in file mm/swap_io.c 


Baoquan He (3):
  mm/swap: rename mm/page_io.c to mm/swap_io.c
  mm/swap: use swap_ops to register swap device's methods
  mm/swap_io.c: rename swap_writepage_* to swap_write_folio_*

 MAINTAINERS                 |   2 +-
 include/linux/swap.h        |  13 +++++
 mm/Makefile                 |   2 +-
 mm/swap.h                   |   3 +-
 mm/{page_io.c => swap_io.c} | 104 +++++++++++++++++++++---------------
 mm/swapfile.c               |   2 +
 mm/zswap.c                  |   3 +-
 7 files changed, 80 insertions(+), 49 deletions(-)
 rename mm/{page_io.c => swap_io.c} (91%)

-- 
2.52.0



^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 1/3] mm/swap: rename mm/page_io.c to mm/swap_io.c
  2026-03-02 10:40 [PATCH 0/3] mm/swap: use swap_ops to register swap device's methods Baoquan He
@ 2026-03-02 10:40 ` Baoquan He
  2026-03-02 10:56   ` Barry Song
  2026-03-02 10:40 ` [PATCH 2/3] mm/swap: use swap_ops to register swap device's methods Baoquan He
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 13+ messages in thread
From: Baoquan He @ 2026-03-02 10:40 UTC (permalink / raw)
  To: linux-mm
  Cc: akpm, chrisl, kasong, shikemeng, nphamcs, baohua, youngjun.park,
	Baoquan He

Codes in mm/page_io.c are only related to swap io, it has
nothing to do with other page io.

Rename it to avoid confusion.

Signed-off-by: Baoquan He <bhe@redhat.com>
---
 MAINTAINERS                 | 2 +-
 mm/Makefile                 | 2 +-
 mm/swap.h                   | 2 +-
 mm/{page_io.c => swap_io.c} | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)
 rename mm/{page_io.c => swap_io.c} (99%)

diff --git a/MAINTAINERS b/MAINTAINERS
index 14899f1de77e..6ff65f8bc27b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16945,7 +16945,7 @@ F:	Documentation/mm/swap-table.rst
 F:	include/linux/swap.h
 F:	include/linux/swapfile.h
 F:	include/linux/swapops.h
-F:	mm/page_io.c
+F:	mm/swap_io.c
 F:	mm/swap.c
 F:	mm/swap.h
 F:	mm/swap_table.h
diff --git a/mm/Makefile b/mm/Makefile
index 8ad2ab08244e..a65ac900096a 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -75,7 +75,7 @@ ifdef CONFIG_MMU
 	obj-$(CONFIG_ADVISE_SYSCALLS)	+= madvise.o
 endif
 
-obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o
+obj-$(CONFIG_SWAP)	+= swap_io.o swap_state.o swapfile.o
 obj-$(CONFIG_ZSWAP)	+= zswap.o
 obj-$(CONFIG_HAS_DMA)	+= dmapool.o
 obj-$(CONFIG_HUGETLBFS)	+= hugetlb.o hugetlb_sysfs.o hugetlb_sysctl.o
diff --git a/mm/swap.h b/mm/swap.h
index a77016f2423b..161185057993 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -214,7 +214,7 @@ extern void __swap_cluster_free_entries(struct swap_info_struct *si,
 					struct swap_cluster_info *ci,
 					unsigned int ci_off, unsigned int nr_pages);
 
-/* linux/mm/page_io.c */
+/* linux/mm/swap_io.c */
 int sio_pool_init(void);
 struct swap_iocb;
 void swap_read_folio(struct folio *folio, struct swap_iocb **plug);
diff --git a/mm/page_io.c b/mm/swap_io.c
similarity index 99%
rename from mm/page_io.c
rename to mm/swap_io.c
index a2c034660c80..d1cdb10ba133 100644
--- a/mm/page_io.c
+++ b/mm/swap_io.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- *  linux/mm/page_io.c
+ *  linux/mm/swap_io.c
  *
  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
  *
-- 
2.52.0



^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 2/3] mm/swap: use swap_ops to register swap device's methods
  2026-03-02 10:40 [PATCH 0/3] mm/swap: use swap_ops to register swap device's methods Baoquan He
  2026-03-02 10:40 ` [PATCH 1/3] mm/swap: rename mm/page_io.c to mm/swap_io.c Baoquan He
@ 2026-03-02 10:40 ` Baoquan He
  2026-03-02 11:11   ` Barry Song
                     ` (3 more replies)
  2026-03-02 10:40 ` [PATCH 3/3] mm/swap_io.c: rename swap_writepage_* to swap_write_folio_* Baoquan He
  2026-03-02 14:43 ` [PATCH 0/3] mm/swap: use swap_ops to register swap device's methods YoungJun Park
  3 siblings, 4 replies; 13+ messages in thread
From: Baoquan He @ 2026-03-02 10:40 UTC (permalink / raw)
  To: linux-mm
  Cc: akpm, chrisl, kasong, shikemeng, nphamcs, baohua, youngjun.park,
	Baoquan He

This simplifies codes and makes logic clearer. And also makes later any
new swap device type being added easier to handle.

Currently there are three types of swap devices: bdev_fs, bdev_sync
and bdev_async, and only operations read_folio and write_folio are
included. In the future, there could be more swap device types added
and more appropriate opeations adapted into swap_ops.

Signed-off-by: Baoquan He <bhe@redhat.com>
---
 include/linux/swap.h |  13 ++++++
 mm/swap.h            |   1 -
 mm/swap_io.c         | 102 +++++++++++++++++++++++++------------------
 mm/swapfile.c        |   2 +
 mm/zswap.c           |   3 +-
 5 files changed, 76 insertions(+), 45 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 0effe3cc50f5..448e5e66ec5c 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -19,6 +19,7 @@
 struct notifier_block;
 
 struct bio;
+struct swap_iocb;
 
 struct pagevec;
 
@@ -222,6 +223,17 @@ enum {
 #define SWAP_CLUSTER_MAX_SKIPPED (SWAP_CLUSTER_MAX << 10)
 #define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX
 
+struct swap_ops {
+	void (*read_folio)(struct swap_info_struct *sis,
+			   struct folio *folio,
+			   struct swap_iocb **plug);
+	void (*write_folio)(struct swap_info_struct *sis,
+			    struct folio *folio,
+			    struct swap_iocb **plug);
+};
+
+int probe_swap_fs(struct swap_info_struct *sis);
+
 /*
  * The first page in the swap file is the swap header, which is always marked
  * bad to prevent it from being allocated as an entry. This also prevents the
@@ -284,6 +296,7 @@ struct swap_info_struct {
 	struct work_struct reclaim_work; /* reclaim worker */
 	struct list_head discard_clusters; /* discard clusters list */
 	struct plist_node avail_list;   /* entry in swap_avail_head */
+	struct swap_ops *ops;
 };
 
 static inline swp_entry_t page_swap_entry(struct page *page)
diff --git a/mm/swap.h b/mm/swap.h
index 161185057993..c390df3f5889 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -226,7 +226,6 @@ static inline void swap_read_unplug(struct swap_iocb *plug)
 }
 void swap_write_unplug(struct swap_iocb *sio);
 int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug);
-void __swap_writepage(struct folio *folio, struct swap_iocb **swap_plug);
 
 /* linux/mm/swap_state.c */
 extern struct address_space swap_space __read_mostly;
diff --git a/mm/swap_io.c b/mm/swap_io.c
index d1cdb10ba133..47077b345ae3 100644
--- a/mm/swap_io.c
+++ b/mm/swap_io.c
@@ -240,6 +240,7 @@ static void swap_zeromap_folio_clear(struct folio *folio)
 int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug)
 {
 	int ret = 0;
+	struct swap_info_struct *sis = __swap_entry_to_info(folio->swap);
 
 	if (folio_free_swap(folio))
 		goto out_unlock;
@@ -281,7 +282,8 @@ int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug)
 		return AOP_WRITEPAGE_ACTIVATE;
 	}
 
-	__swap_writepage(folio, swap_plug);
+	if (sis->ops && sis->ops->write_folio)
+		sis->ops->write_folio(sis, folio, swap_plug);
 	return 0;
 out_unlock:
 	folio_unlock(folio);
@@ -371,10 +373,11 @@ static void sio_write_complete(struct kiocb *iocb, long ret)
 	mempool_free(sio, sio_pool);
 }
 
-static void swap_writepage_fs(struct folio *folio, struct swap_iocb **swap_plug)
+static void swap_writepage_fs(struct swap_info_struct *sis,
+			      struct folio *folio,
+			      struct swap_iocb **swap_plug)
 {
 	struct swap_iocb *sio = swap_plug ? *swap_plug : NULL;
-	struct swap_info_struct *sis = __swap_entry_to_info(folio->swap);
 	struct file *swap_file = sis->swap_file;
 	loff_t pos = swap_dev_pos(folio->swap);
 
@@ -407,8 +410,9 @@ static void swap_writepage_fs(struct folio *folio, struct swap_iocb **swap_plug)
 		*swap_plug = sio;
 }
 
-static void swap_writepage_bdev_sync(struct folio *folio,
-		struct swap_info_struct *sis)
+static void swap_writepage_bdev_sync(struct swap_info_struct *sis,
+				     struct folio *folio,
+				     struct swap_iocb **plug)
 {
 	struct bio_vec bv;
 	struct bio bio;
@@ -427,8 +431,9 @@ static void swap_writepage_bdev_sync(struct folio *folio,
 	__end_swap_bio_write(&bio);
 }
 
-static void swap_writepage_bdev_async(struct folio *folio,
-		struct swap_info_struct *sis)
+static void swap_writepage_bdev_async(struct swap_info_struct *sis,
+				      struct folio *folio,
+				      struct swap_iocb **plug)
 {
 	struct bio *bio;
 
@@ -444,29 +449,6 @@ static void swap_writepage_bdev_async(struct folio *folio,
 	submit_bio(bio);
 }
 
-void __swap_writepage(struct folio *folio, struct swap_iocb **swap_plug)
-{
-	struct swap_info_struct *sis = __swap_entry_to_info(folio->swap);
-
-	VM_BUG_ON_FOLIO(!folio_test_swapcache(folio), folio);
-	/*
-	 * ->flags can be updated non-atomically (scan_swap_map_slots),
-	 * but that will never affect SWP_FS_OPS, so the data_race
-	 * is safe.
-	 */
-	if (data_race(sis->flags & SWP_FS_OPS))
-		swap_writepage_fs(folio, swap_plug);
-	/*
-	 * ->flags can be updated non-atomically (scan_swap_map_slots),
-	 * but that will never affect SWP_SYNCHRONOUS_IO, so the data_race
-	 * is safe.
-	 */
-	else if (data_race(sis->flags & SWP_SYNCHRONOUS_IO))
-		swap_writepage_bdev_sync(folio, sis);
-	else
-		swap_writepage_bdev_async(folio, sis);
-}
-
 void swap_write_unplug(struct swap_iocb *sio)
 {
 	struct iov_iter from;
@@ -535,9 +517,10 @@ static bool swap_read_folio_zeromap(struct folio *folio)
 	return true;
 }
 
-static void swap_read_folio_fs(struct folio *folio, struct swap_iocb **plug)
+static void swap_read_folio_fs(struct swap_info_struct *sis,
+			       struct folio *folio,
+			       struct swap_iocb **plug)
 {
-	struct swap_info_struct *sis = __swap_entry_to_info(folio->swap);
 	struct swap_iocb *sio = NULL;
 	loff_t pos = swap_dev_pos(folio->swap);
 
@@ -569,8 +552,9 @@ static void swap_read_folio_fs(struct folio *folio, struct swap_iocb **plug)
 		*plug = sio;
 }
 
-static void swap_read_folio_bdev_sync(struct folio *folio,
-		struct swap_info_struct *sis)
+static void swap_read_folio_bdev_sync(struct swap_info_struct *sis,
+				      struct folio *folio,
+				      struct swap_iocb **plug)
 {
 	struct bio_vec bv;
 	struct bio bio;
@@ -591,8 +575,9 @@ static void swap_read_folio_bdev_sync(struct folio *folio,
 	put_task_struct(current);
 }
 
-static void swap_read_folio_bdev_async(struct folio *folio,
-		struct swap_info_struct *sis)
+static void swap_read_folio_bdev_async(struct swap_info_struct *sis,
+				       struct folio *folio,
+				       struct swap_iocb **plug)
 {
 	struct bio *bio;
 
@@ -606,6 +591,42 @@ static void swap_read_folio_bdev_async(struct folio *folio,
 	submit_bio(bio);
 }
 
+static struct swap_ops bdev_fs_swap_ops = {
+	.read_folio = swap_read_folio_fs,
+	.write_folio = swap_writepage_fs,
+};
+
+static struct swap_ops bdev_sync_swap_ops = {
+	.read_folio = swap_read_folio_bdev_sync,
+	.write_folio = swap_writepage_bdev_sync,
+};
+
+static struct swap_ops bdev_async_swap_ops = {
+	.read_folio = swap_read_folio_bdev_async,
+	.write_folio = swap_writepage_bdev_async,
+};
+
+int probe_swap_fs(struct swap_info_struct *sis)
+{
+	/*
+	 * ->flags can be updated non-atomically (scan_swap_map_slots),
+	 * but that will never affect SWP_FS_OPS, so the data_race
+	 * is safe.
+	 */
+	if (data_race(sis->flags & SWP_FS_OPS))
+		sis->ops = &bdev_fs_swap_ops;
+	/*
+	 * ->flags can be updated non-atomically (scan_swap_map_slots),
+	 * but that will never affect SWP_SYNCHRONOUS_IO, so the data_race
+	 * is safe.
+	 */
+	else if (data_race(sis->flags & SWP_SYNCHRONOUS_IO))
+		sis->ops = &bdev_sync_swap_ops;
+	else
+		sis->ops = &bdev_async_swap_ops;
+	return 0;
+}
+
 void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
 {
 	struct swap_info_struct *sis = __swap_entry_to_info(folio->swap);
@@ -640,13 +661,8 @@ void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
 	/* We have to read from slower devices. Increase zswap protection. */
 	zswap_folio_swapin(folio);
 
-	if (data_race(sis->flags & SWP_FS_OPS)) {
-		swap_read_folio_fs(folio, plug);
-	} else if (synchronous) {
-		swap_read_folio_bdev_sync(folio, sis);
-	} else {
-		swap_read_folio_bdev_async(folio, sis);
-	}
+	if (sis->ops && sis->ops->read_folio)
+		sis->ops->read_folio(sis, folio, plug);
 
 finish:
 	if (workingset) {
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 915bc93964db..af498f9af328 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -3625,6 +3625,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	/* Sets SWP_WRITEOK, resurrect the percpu ref, expose the swap device */
 	enable_swap_info(si);
 
+	probe_swap_fs(si);
+
 	pr_info("Adding %uk swap on %s.  Priority:%d extents:%d across:%lluk %s%s%s%s\n",
 		K(si->pages), name->name, si->prio, nr_extents,
 		K((unsigned long long)span),
diff --git a/mm/zswap.c b/mm/zswap.c
index a399f7a10830..7ce906249c7a 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1055,7 +1055,8 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
 	folio_set_reclaim(folio);
 
 	/* start writeback */
-	__swap_writepage(folio, NULL);
+	if (si->ops && si->ops->write_folio)
+		si->ops->write_folio(si, folio, NULL);
 
 out:
 	if (ret && ret != -EEXIST) {
-- 
2.52.0



^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 3/3] mm/swap_io.c: rename swap_writepage_* to swap_write_folio_*
  2026-03-02 10:40 [PATCH 0/3] mm/swap: use swap_ops to register swap device's methods Baoquan He
  2026-03-02 10:40 ` [PATCH 1/3] mm/swap: rename mm/page_io.c to mm/swap_io.c Baoquan He
  2026-03-02 10:40 ` [PATCH 2/3] mm/swap: use swap_ops to register swap device's methods Baoquan He
@ 2026-03-02 10:40 ` Baoquan He
  2026-03-02 11:28   ` Barry Song
  2026-03-02 14:43 ` [PATCH 0/3] mm/swap: use swap_ops to register swap device's methods YoungJun Park
  3 siblings, 1 reply; 13+ messages in thread
From: Baoquan He @ 2026-03-02 10:40 UTC (permalink / raw)
  To: linux-mm
  Cc: akpm, chrisl, kasong, shikemeng, nphamcs, baohua, youngjun.park,
	Baoquan He

All these swap_writepage_* functions are hanlding passed in folio, but
not page. And this renaming make them consistent with the their
counterpart swap_read_folio_* functions.

Signed-off-by: Baoquan He <bhe@redhat.com>
---
 mm/swap_io.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/mm/swap_io.c b/mm/swap_io.c
index 47077b345ae3..ad315b128e36 100644
--- a/mm/swap_io.c
+++ b/mm/swap_io.c
@@ -373,7 +373,7 @@ static void sio_write_complete(struct kiocb *iocb, long ret)
 	mempool_free(sio, sio_pool);
 }
 
-static void swap_writepage_fs(struct swap_info_struct *sis,
+static void swap_write_folio_fs(struct swap_info_struct *sis,
 			      struct folio *folio,
 			      struct swap_iocb **swap_plug)
 {
@@ -410,7 +410,7 @@ static void swap_writepage_fs(struct swap_info_struct *sis,
 		*swap_plug = sio;
 }
 
-static void swap_writepage_bdev_sync(struct swap_info_struct *sis,
+static void swap_write_folio_bdev_sync(struct swap_info_struct *sis,
 				     struct folio *folio,
 				     struct swap_iocb **plug)
 {
@@ -431,7 +431,7 @@ static void swap_writepage_bdev_sync(struct swap_info_struct *sis,
 	__end_swap_bio_write(&bio);
 }
 
-static void swap_writepage_bdev_async(struct swap_info_struct *sis,
+static void swap_write_folio_bdev_async(struct swap_info_struct *sis,
 				      struct folio *folio,
 				      struct swap_iocb **plug)
 {
@@ -593,17 +593,17 @@ static void swap_read_folio_bdev_async(struct swap_info_struct *sis,
 
 static struct swap_ops bdev_fs_swap_ops = {
 	.read_folio = swap_read_folio_fs,
-	.write_folio = swap_writepage_fs,
+	.write_folio = swap_write_folio_fs,
 };
 
 static struct swap_ops bdev_sync_swap_ops = {
 	.read_folio = swap_read_folio_bdev_sync,
-	.write_folio = swap_writepage_bdev_sync,
+	.write_folio = swap_write_folio_bdev_sync,
 };
 
 static struct swap_ops bdev_async_swap_ops = {
 	.read_folio = swap_read_folio_bdev_async,
-	.write_folio = swap_writepage_bdev_async,
+	.write_folio = swap_write_folio_bdev_async,
 };
 
 int probe_swap_fs(struct swap_info_struct *sis)
-- 
2.52.0



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/3] mm/swap: rename mm/page_io.c to mm/swap_io.c
  2026-03-02 10:40 ` [PATCH 1/3] mm/swap: rename mm/page_io.c to mm/swap_io.c Baoquan He
@ 2026-03-02 10:56   ` Barry Song
  2026-03-02 13:25     ` Baoquan He
  0 siblings, 1 reply; 13+ messages in thread
From: Barry Song @ 2026-03-02 10:56 UTC (permalink / raw)
  To: Baoquan He
  Cc: linux-mm, akpm, chrisl, kasong, shikemeng, nphamcs, youngjun.park

On Mon, Mar 2, 2026 at 6:40 PM Baoquan He <bhe@redhat.com> wrote:
>
> Codes in mm/page_io.c are only related to swap io, it has
> nothing to do with other page io.

That is true. Meanwhile, swap.c and swap.h contain a fair
amount of non-swap-related code, such as lru_xxx and even
file-related functions like deactivate_file_folio().

Perhaps we should consider moving these to more
appropriate locations.

>
> Rename it to avoid confusion.
>
> Signed-off-by: Baoquan He <bhe@redhat.com>

Reviewed-by: Barry Song <baohua@kernel.org>


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/3] mm/swap: use swap_ops to register swap device's methods
  2026-03-02 10:40 ` [PATCH 2/3] mm/swap: use swap_ops to register swap device's methods Baoquan He
@ 2026-03-02 11:11   ` Barry Song
  2026-03-02 14:47     ` Baoquan He
  2026-03-02 12:20   ` YoungJun Park
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 13+ messages in thread
From: Barry Song @ 2026-03-02 11:11 UTC (permalink / raw)
  To: Baoquan He
  Cc: linux-mm, akpm, chrisl, kasong, shikemeng, nphamcs, youngjun.park

On Mon, Mar 2, 2026 at 6:40 PM Baoquan He <bhe@redhat.com> wrote:
>
> This simplifies codes and makes logic clearer. And also makes later any
> new swap device type being added easier to handle.
>
> Currently there are three types of swap devices: bdev_fs, bdev_sync
> and bdev_async, and only operations read_folio and write_folio are
> included. In the future, there could be more swap device types added
> and more appropriate opeations adapted into swap_ops.
>
> Signed-off-by: Baoquan He <bhe@redhat.com>
> ---
>  include/linux/swap.h |  13 ++++++
>  mm/swap.h            |   1 -
>  mm/swap_io.c         | 102 +++++++++++++++++++++++++------------------
>  mm/swapfile.c        |   2 +
>  mm/zswap.c           |   3 +-
>  5 files changed, 76 insertions(+), 45 deletions(-)
>
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index 0effe3cc50f5..448e5e66ec5c 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -19,6 +19,7 @@
>  struct notifier_block;
>
>  struct bio;
> +struct swap_iocb;
>
>  struct pagevec;
>
> @@ -222,6 +223,17 @@ enum {
>  #define SWAP_CLUSTER_MAX_SKIPPED (SWAP_CLUSTER_MAX << 10)
>  #define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX
>
> +struct swap_ops {
> +       void (*read_folio)(struct swap_info_struct *sis,
> +                          struct folio *folio,
> +                          struct swap_iocb **plug);
> +       void (*write_folio)(struct swap_info_struct *sis,
> +                           struct folio *folio,
> +                           struct swap_iocb **plug);
> +};
> +
> +int probe_swap_fs(struct swap_info_struct *sis);

Does probe_swap_fs sound a bit odd?
What about init_swap_ops? Not sure if we have a better name.

Do we really want it, along with swap_ops, to live in
include/linux/swap.h? Could it be placed in mm/swap.h instead?

> +
>  /*
>   * The first page in the swap file is the swap header, which is always marked
>   * bad to prevent it from being allocated as an entry. This also prevents the
> @@ -284,6 +296,7 @@ struct swap_info_struct {
>         struct work_struct reclaim_work; /* reclaim worker */
>         struct list_head discard_clusters; /* discard clusters list */
>         struct plist_node avail_list;   /* entry in swap_avail_head */
> +       struct swap_ops *ops;
>  };
>
>  static inline swp_entry_t page_swap_entry(struct page *page)
> diff --git a/mm/swap.h b/mm/swap.h
> index 161185057993..c390df3f5889 100644
> --- a/mm/swap.h
> +++ b/mm/swap.h
> @@ -226,7 +226,6 @@ static inline void swap_read_unplug(struct swap_iocb *plug)
>  }
>  void swap_write_unplug(struct swap_iocb *sio);
>  int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug);
> -void __swap_writepage(struct folio *folio, struct swap_iocb **swap_plug);
>
>  /* linux/mm/swap_state.c */
>  extern struct address_space swap_space __read_mostly;
> diff --git a/mm/swap_io.c b/mm/swap_io.c
> index d1cdb10ba133..47077b345ae3 100644
> --- a/mm/swap_io.c
> +++ b/mm/swap_io.c
> @@ -240,6 +240,7 @@ static void swap_zeromap_folio_clear(struct folio *folio)
>  int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug)
>  {
>         int ret = 0;
> +       struct swap_info_struct *sis = __swap_entry_to_info(folio->swap);
>
>         if (folio_free_swap(folio))
>                 goto out_unlock;
> @@ -281,7 +282,8 @@ int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug)
>                 return AOP_WRITEPAGE_ACTIVATE;
>         }
>
> -       __swap_writepage(folio, swap_plug);
> +       if (sis->ops && sis->ops->write_folio)
> +               sis->ops->write_folio(sis, folio, swap_plug);

Do we want a swap_write_folio() wrapper?

>         return 0;
>  out_unlock:
>         folio_unlock(folio);
> @@ -371,10 +373,11 @@ static void sio_write_complete(struct kiocb *iocb, long ret)
>         mempool_free(sio, sio_pool);
>  }
>
> -static void swap_writepage_fs(struct folio *folio, struct swap_iocb **swap_plug)
> +static void swap_writepage_fs(struct swap_info_struct *sis,
> +                             struct folio *folio,
> +                             struct swap_iocb **swap_plug)
>  {
>         struct swap_iocb *sio = swap_plug ? *swap_plug : NULL;
> -       struct swap_info_struct *sis = __swap_entry_to_info(folio->swap);
>         struct file *swap_file = sis->swap_file;
>         loff_t pos = swap_dev_pos(folio->swap);
>
> @@ -407,8 +410,9 @@ static void swap_writepage_fs(struct folio *folio, struct swap_iocb **swap_plug)
>                 *swap_plug = sio;
>  }
>
> -static void swap_writepage_bdev_sync(struct folio *folio,
> -               struct swap_info_struct *sis)
> +static void swap_writepage_bdev_sync(struct swap_info_struct *sis,
> +                                    struct folio *folio,
> +                                    struct swap_iocb **plug)
>  {
>         struct bio_vec bv;
>         struct bio bio;
> @@ -427,8 +431,9 @@ static void swap_writepage_bdev_sync(struct folio *folio,
>         __end_swap_bio_write(&bio);
>  }
>
> -static void swap_writepage_bdev_async(struct folio *folio,
> -               struct swap_info_struct *sis)
> +static void swap_writepage_bdev_async(struct swap_info_struct *sis,
> +                                     struct folio *folio,
> +                                     struct swap_iocb **plug)
>  {
>         struct bio *bio;
>
> @@ -444,29 +449,6 @@ static void swap_writepage_bdev_async(struct folio *folio,
>         submit_bio(bio);
>  }
>
> -void __swap_writepage(struct folio *folio, struct swap_iocb **swap_plug)
> -{
> -       struct swap_info_struct *sis = __swap_entry_to_info(folio->swap);
> -
> -       VM_BUG_ON_FOLIO(!folio_test_swapcache(folio), folio);
> -       /*
> -        * ->flags can be updated non-atomically (scan_swap_map_slots),
> -        * but that will never affect SWP_FS_OPS, so the data_race
> -        * is safe.
> -        */
> -       if (data_race(sis->flags & SWP_FS_OPS))
> -               swap_writepage_fs(folio, swap_plug);
> -       /*
> -        * ->flags can be updated non-atomically (scan_swap_map_slots),
> -        * but that will never affect SWP_SYNCHRONOUS_IO, so the data_race
> -        * is safe.
> -        */
> -       else if (data_race(sis->flags & SWP_SYNCHRONOUS_IO))
> -               swap_writepage_bdev_sync(folio, sis);
> -       else
> -               swap_writepage_bdev_async(folio, sis);
> -}
> -
>  void swap_write_unplug(struct swap_iocb *sio)
>  {
>         struct iov_iter from;
> @@ -535,9 +517,10 @@ static bool swap_read_folio_zeromap(struct folio *folio)
>         return true;
>  }
>
> -static void swap_read_folio_fs(struct folio *folio, struct swap_iocb **plug)
> +static void swap_read_folio_fs(struct swap_info_struct *sis,
> +                              struct folio *folio,
> +                              struct swap_iocb **plug)
>  {
> -       struct swap_info_struct *sis = __swap_entry_to_info(folio->swap);
>         struct swap_iocb *sio = NULL;
>         loff_t pos = swap_dev_pos(folio->swap);
>
> @@ -569,8 +552,9 @@ static void swap_read_folio_fs(struct folio *folio, struct swap_iocb **plug)
>                 *plug = sio;
>  }
>
> -static void swap_read_folio_bdev_sync(struct folio *folio,
> -               struct swap_info_struct *sis)
> +static void swap_read_folio_bdev_sync(struct swap_info_struct *sis,
> +                                     struct folio *folio,
> +                                     struct swap_iocb **plug)
>  {
>         struct bio_vec bv;
>         struct bio bio;
> @@ -591,8 +575,9 @@ static void swap_read_folio_bdev_sync(struct folio *folio,
>         put_task_struct(current);
>  }
>
> -static void swap_read_folio_bdev_async(struct folio *folio,
> -               struct swap_info_struct *sis)
> +static void swap_read_folio_bdev_async(struct swap_info_struct *sis,
> +                                      struct folio *folio,
> +                                      struct swap_iocb **plug)
>  {
>         struct bio *bio;
>
> @@ -606,6 +591,42 @@ static void swap_read_folio_bdev_async(struct folio *folio,
>         submit_bio(bio);
>  }
>
> +static struct swap_ops bdev_fs_swap_ops = {
> +       .read_folio = swap_read_folio_fs,
> +       .write_folio = swap_writepage_fs,
> +};

const?

> +
> +static struct swap_ops bdev_sync_swap_ops = {
> +       .read_folio = swap_read_folio_bdev_sync,
> +       .write_folio = swap_writepage_bdev_sync,
> +};

const?

> +
> +static struct swap_ops bdev_async_swap_ops = {
> +       .read_folio = swap_read_folio_bdev_async,
> +       .write_folio = swap_writepage_bdev_async,
> +};

const?

> +
> +int probe_swap_fs(struct swap_info_struct *sis)
> +{
> +       /*
> +        * ->flags can be updated non-atomically (scan_swap_map_slots),
> +        * but that will never affect SWP_FS_OPS, so the data_race
> +        * is safe.
> +        */
> +       if (data_race(sis->flags & SWP_FS_OPS))
> +               sis->ops = &bdev_fs_swap_ops;
> +       /*
> +        * ->flags can be updated non-atomically (scan_swap_map_slots),
> +        * but that will never affect SWP_SYNCHRONOUS_IO, so the data_race
> +        * is safe.
> +        */
> +       else if (data_race(sis->flags & SWP_SYNCHRONOUS_IO))
> +               sis->ops = &bdev_sync_swap_ops;
> +       else
> +               sis->ops = &bdev_async_swap_ops;
> +       return 0;
> +}
> +
>  void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
>  {
>         struct swap_info_struct *sis = __swap_entry_to_info(folio->swap);
> @@ -640,13 +661,8 @@ void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
>         /* We have to read from slower devices. Increase zswap protection. */
>         zswap_folio_swapin(folio);
>
> -       if (data_race(sis->flags & SWP_FS_OPS)) {
> -               swap_read_folio_fs(folio, plug);
> -       } else if (synchronous) {
> -               swap_read_folio_bdev_sync(folio, sis);
> -       } else {
> -               swap_read_folio_bdev_async(folio, sis);
> -       }
> +       if (sis->ops && sis->ops->read_folio)
> +               sis->ops->read_folio(sis, folio, plug);
>
>  finish:
>         if (workingset) {
> diff --git a/mm/swapfile.c b/mm/swapfile.c
> index 915bc93964db..af498f9af328 100644
> --- a/mm/swapfile.c
> +++ b/mm/swapfile.c
> @@ -3625,6 +3625,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
>         /* Sets SWP_WRITEOK, resurrect the percpu ref, expose the swap device */
>         enable_swap_info(si);
>
> +       probe_swap_fs(si);

Can we move this to enable_swap_info(), or perhaps even
deeper into setup_swap_info()?

> +
>         pr_info("Adding %uk swap on %s.  Priority:%d extents:%d across:%lluk %s%s%s%s\n",
>                 K(si->pages), name->name, si->prio, nr_extents,
>                 K((unsigned long long)span),
> diff --git a/mm/zswap.c b/mm/zswap.c
> index a399f7a10830..7ce906249c7a 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -1055,7 +1055,8 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
>         folio_set_reclaim(folio);
>
>         /* start writeback */
> -       __swap_writepage(folio, NULL);
> +       if (si->ops && si->ops->write_folio)
> +               si->ops->write_folio(si, folio, NULL);

swap_write_folio() inline wrapper?

>
>  out:
>         if (ret && ret != -EEXIST) {
> --
> 2.52.0
>

Thanks
Barry


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/3] mm/swap_io.c: rename swap_writepage_* to swap_write_folio_*
  2026-03-02 10:40 ` [PATCH 3/3] mm/swap_io.c: rename swap_writepage_* to swap_write_folio_* Baoquan He
@ 2026-03-02 11:28   ` Barry Song
  0 siblings, 0 replies; 13+ messages in thread
From: Barry Song @ 2026-03-02 11:28 UTC (permalink / raw)
  To: Baoquan He
  Cc: linux-mm, akpm, chrisl, kasong, shikemeng, nphamcs, youngjun.park

On Mon, Mar 2, 2026 at 6:40 PM Baoquan He <bhe@redhat.com> wrote:
>
> All these swap_writepage_* functions are hanlding passed in folio, but
> not page. And this renaming make them consistent with the their
> counterpart swap_read_folio_* functions.
>
> Signed-off-by: Baoquan He <bhe@redhat.com>

LGTM,

Reviewed-by: Barry Song <baohua@kernel.org>

> ---
>  mm/swap_io.c | 12 ++++++------
>  1 file changed, 6 insertions(+), 6 deletions(-)


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/3] mm/swap: use swap_ops to register swap device's methods
  2026-03-02 10:40 ` [PATCH 2/3] mm/swap: use swap_ops to register swap device's methods Baoquan He
  2026-03-02 11:11   ` Barry Song
@ 2026-03-02 12:20   ` YoungJun Park
  2026-03-02 14:09   ` YoungJun Park
  2026-03-02 14:53   ` Usama Arif
  3 siblings, 0 replies; 13+ messages in thread
From: YoungJun Park @ 2026-03-02 12:20 UTC (permalink / raw)
  To: Baoquan He; +Cc: linux-mm, akpm, chrisl, kasong, shikemeng, nphamcs, baohua

On Mon, Mar 02, 2026 at 06:40:15PM +0800, Baoquan He wrote:

Hello Baoquan!

...
> +	.read_folio = swap_read_folio_bdev_sync,
> +	.write_folio = swap_writepage_bdev_sync,
> +};
> +
> +static struct swap_ops bdev_async_swap_ops = {
> +	.read_folio = swap_read_folio_bdev_async,
> +	.write_folio = swap_writepage_bdev_async,
> +};
> +
> +int probe_swap_fs(struct swap_info_struct *sis)
> +{
> +	/*
> +	 * ->flags can be updated non-atomically (scan_swap_map_slots),
> +	 * but that will never affect SWP_FS_OPS, so the data_race
> +	 * is safe.
> +	 */
> +	if (data_race(sis->flags & SWP_FS_OPS))
> +		sis->ops = &bdev_fs_swap_ops;
> +	/*
> +	 * ->flags can be updated non-atomically (scan_swap_map_slots),
> +	 * but that will never affect SWP_SYNCHRONOUS_IO, so the data_race
> +	 * is safe.
> +	 */

The reference to scan_swap_map_slots appears to be outdated as the
function no longer exists in the current codebase. It might be
better to update this to a more generic term?

If comments are needed to be updated, 
Similar comments in may_enter_fs likely need a similar update
for consistency.

> +	else if (data_race(sis->flags & SWP_SYNCHRONOUS_IO))
> +		sis->ops = &bdev_sync_swap_ops;
> +	else
> +		sis->ops = &bdev_async_swap_ops;
> +	return 0;

it seems return value is not needed.

Thanks 
Youngjun Park


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/3] mm/swap: rename mm/page_io.c to mm/swap_io.c
  2026-03-02 10:56   ` Barry Song
@ 2026-03-02 13:25     ` Baoquan He
  0 siblings, 0 replies; 13+ messages in thread
From: Baoquan He @ 2026-03-02 13:25 UTC (permalink / raw)
  To: Barry Song
  Cc: linux-mm, akpm, chrisl, kasong, shikemeng, nphamcs, youngjun.park

On 03/02/26 at 06:56pm, Barry Song wrote:
> On Mon, Mar 2, 2026 at 6:40 PM Baoquan He <bhe@redhat.com> wrote:
> >
> > Codes in mm/page_io.c are only related to swap io, it has
> > nothing to do with other page io.
> 

Thanks for quick and careful reviewing on this patchset, Barry.

> That is true. Meanwhile, swap.c and swap.h contain a fair
> amount of non-swap-related code, such as lru_xxx and even
> file-related functions like deactivate_file_folio().
> 
> Perhaps we should consider moving these to more
> appropriate locations.

Yeah, agree. I will see if anyone else want to do the tidying up,
otherwise I can do it later.

> 
> >
> > Rename it to avoid confusion.
> >
> > Signed-off-by: Baoquan He <bhe@redhat.com>
> 
> Reviewed-by: Barry Song <baohua@kernel.org>
> 



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/3] mm/swap: use swap_ops to register swap device's methods
  2026-03-02 10:40 ` [PATCH 2/3] mm/swap: use swap_ops to register swap device's methods Baoquan He
  2026-03-02 11:11   ` Barry Song
  2026-03-02 12:20   ` YoungJun Park
@ 2026-03-02 14:09   ` YoungJun Park
  2026-03-02 14:53   ` Usama Arif
  3 siblings, 0 replies; 13+ messages in thread
From: YoungJun Park @ 2026-03-02 14:09 UTC (permalink / raw)
  To: Baoquan He; +Cc: linux-mm, akpm, chrisl, kasong, shikemeng, nphamcs, baohua

On Mon, Mar 02, 2026 at 06:40:15PM +0800, Baoquan He wrote:
> This simplifies codes and makes logic clearer. And also makes later any
> new swap device type being added easier to handle.
> 
> Currently there are three types of swap devices: bdev_fs, bdev_sync
> and bdev_async, and only operations read_folio and write_folio are
> included. In the future, there could be more swap device types added
> and more appropriate opeations adapted into swap_ops.
> 
> Signed-off-by: Baoquan He <bhe@redhat.com>
> ---
>  include/linux/swap.h |  13 ++++++
>  mm/swap.h            |   1 -
>  mm/swap_io.c         | 102 +++++++++++++++++++++++++------------------
>  mm/swapfile.c        |   2 +
>  mm/zswap.c           |   3 +-
>  5 files changed, 76 insertions(+), 45 deletions(-)
> 
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index 0effe3cc50f5..448e5e66ec5c 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -19,6 +19,7 @@
>  struct notifier_block;
>  
>  struct bio;
> +struct swap_iocb;
>  
>  struct pagevec;
>  
> @@ -222,6 +223,17 @@ enum {
>  #define SWAP_CLUSTER_MAX_SKIPPED (SWAP_CLUSTER_MAX << 10)
>  #define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX
>  
> +struct swap_ops {
> +	void (*read_folio)(struct swap_info_struct *sis,
> +			   struct folio *folio,
> +			   struct swap_iocb **plug);
> +	void (*write_folio)(struct swap_info_struct *sis,
> +			    struct folio *folio,
> +			    struct swap_iocb **plug);
> +};

I think swap_iocb is only required for fs-swap
(swap_folio_read_fs/swap_folio_write_fs).

If the goal is to support fs-swap through swap_ops, it might be worth
considering a more complete integration, including activate/deactivate
and swap_rw from aops, rather than only adding read/write hooks.

So.. we could keep SWP_FS_OPS as-is for now and just split
sync/async paths, and revisit a cleaner fs-swap integration later.
(I mean removing fs ops, and call swap_read/write_folio_fs on sync/async ops.)

How do you think? 

Thanks
Youngjun Park


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 0/3] mm/swap: use swap_ops to register swap device's methods
  2026-03-02 10:40 [PATCH 0/3] mm/swap: use swap_ops to register swap device's methods Baoquan He
                   ` (2 preceding siblings ...)
  2026-03-02 10:40 ` [PATCH 3/3] mm/swap_io.c: rename swap_writepage_* to swap_write_folio_* Baoquan He
@ 2026-03-02 14:43 ` YoungJun Park
  3 siblings, 0 replies; 13+ messages in thread
From: YoungJun Park @ 2026-03-02 14:43 UTC (permalink / raw)
  To: Baoquan He; +Cc: linux-mm, akpm, chrisl, kasong, shikemeng, nphamcs, baohua

On Mon, Mar 02, 2026 at 06:40:13PM +0800, Baoquan He wrote:
> This can simplify the code logic and benefit any new type of swap device
> added later.
> 
> And also do renaming in this patchset:
> -------
>    file renaming:
>    ---
>    mm/page_io.c to mm/swap_io.c
> 
>    function renaming:
>    ---
>    swap_writepage_* to swap_write_folio_* in file mm/swap_io.c 
> 
> 
> Baoquan He (3):
>   mm/swap: rename mm/page_io.c to mm/swap_io.c
>   mm/swap: use swap_ops to register swap device's methods
>   mm/swap_io.c: rename swap_writepage_* to swap_write_folio_*
> 
>  MAINTAINERS                 |   2 +-
>  include/linux/swap.h        |  13 +++++
>  mm/Makefile                 |   2 +-
>  mm/swap.h                   |   3 +-
>  mm/{page_io.c => swap_io.c} | 104 +++++++++++++++++++++---------------
>  mm/swapfile.c               |   2 +
>  mm/zswap.c                  |   3 +-
>  7 files changed, 80 insertions(+), 49 deletions(-)
>  rename mm/{page_io.c => swap_io.c} (91%)
> 
> -- 
> 2.52.0

Hi Baoquan,

Thank you for the swap_ops infrastructure patch. This is very
relevant to our flash swap work, and I believe it could
significantly help with our future contributions if the framework
is extended further.

As I understand it, the current patch serves as a foundation for
future infrastructure. However, from our perspective, the swap ops
are currently configured statically, and we would need a way to
apply or register custom ops for our use case.

I have a few suggestions (mixed with questions) regarding
extensibility.

1) Could the ops be replaced at activation time, similar to how
   SWP_FS_OPS works — i.e., giving block device drivers an
   opportunity to override the ops when the swap device is
   activated?

   This would allow block devices to handle swap read/write
   through swap_ops rather than doing it in submit_bio, which
   feels more generalized and leaves room for future extension.

   This direction would be the best fit for our use case.
   However, considering ongoing swap work that may involve
   managing multiple swap devices, I wonder whether binding ops
   to a single block device would still be appropriate in that
   scenario.

2) Would it be possible to defer swap device activation via a
   lazy registration interface, so that the ops can be registered
   before the device is fully activated?

   This would open the door for entities other than block
   devices — such as kernel modules or other forms — to act as
   swap backends. (Of course, block devices could also benefit
   from this path.)

3) Alternatively, are there any plans for a static binding
   mechanism, or any other extensibility approach you have in
   mind?

As an additional thought, if this ops framework could eventually
be extended to cover cluster allocation as well, it might enable
a model where base swap functionality is provided by the core,
while each vendor can freely plug in their own additional use
cases on top — making the swap device truly extensible.

Any thoughts on these would be greatly appreciated.

Best regards,
Youngjun Park 


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/3] mm/swap: use swap_ops to register swap device's methods
  2026-03-02 11:11   ` Barry Song
@ 2026-03-02 14:47     ` Baoquan He
  0 siblings, 0 replies; 13+ messages in thread
From: Baoquan He @ 2026-03-02 14:47 UTC (permalink / raw)
  To: Barry Song
  Cc: linux-mm, akpm, chrisl, kasong, shikemeng, nphamcs, youngjun.park

On 03/02/26 at 07:11pm, Barry Song wrote:
> On Mon, Mar 2, 2026 at 6:40 PM Baoquan He <bhe@redhat.com> wrote:
> >
> > This simplifies codes and makes logic clearer. And also makes later any
> > new swap device type being added easier to handle.
> >
> > Currently there are three types of swap devices: bdev_fs, bdev_sync
> > and bdev_async, and only operations read_folio and write_folio are
> > included. In the future, there could be more swap device types added
> > and more appropriate opeations adapted into swap_ops.
> >
> > Signed-off-by: Baoquan He <bhe@redhat.com>
> > ---
> >  include/linux/swap.h |  13 ++++++
> >  mm/swap.h            |   1 -
> >  mm/swap_io.c         | 102 +++++++++++++++++++++++++------------------
> >  mm/swapfile.c        |   2 +
> >  mm/zswap.c           |   3 +-
> >  5 files changed, 76 insertions(+), 45 deletions(-)
> >
> > diff --git a/include/linux/swap.h b/include/linux/swap.h
> > index 0effe3cc50f5..448e5e66ec5c 100644
> > --- a/include/linux/swap.h
> > +++ b/include/linux/swap.h
> > @@ -19,6 +19,7 @@
> >  struct notifier_block;
> >
> >  struct bio;
> > +struct swap_iocb;
> >
> >  struct pagevec;
> >
> > @@ -222,6 +223,17 @@ enum {
> >  #define SWAP_CLUSTER_MAX_SKIPPED (SWAP_CLUSTER_MAX << 10)
> >  #define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX
> >
> > +struct swap_ops {
> > +       void (*read_folio)(struct swap_info_struct *sis,
> > +                          struct folio *folio,
> > +                          struct swap_iocb **plug);
> > +       void (*write_folio)(struct swap_info_struct *sis,
> > +                           struct folio *folio,
> > +                           struct swap_iocb **plug);
> > +};
> > +
> > +int probe_swap_fs(struct swap_info_struct *sis);
> 
> Does probe_swap_fs sound a bit odd?
> What about init_swap_ops? Not sure if we have a better name.

Both is fine to me, if no other suggestion, I can take init_swap_ops()
instead.

> 
> Do we really want it, along with swap_ops, to live in
> include/linux/swap.h? Could it be placed in mm/swap.h instead?

You are right, putting it into mm/swap.h sounds better.

> 
> > +
> >  /*
> >   * The first page in the swap file is the swap header, which is always marked
> >   * bad to prevent it from being allocated as an entry. This also prevents the
> > @@ -284,6 +296,7 @@ struct swap_info_struct {
> >         struct work_struct reclaim_work; /* reclaim worker */
> >         struct list_head discard_clusters; /* discard clusters list */
> >         struct plist_node avail_list;   /* entry in swap_avail_head */
> > +       struct swap_ops *ops;
> >  };
> >
> >  static inline swp_entry_t page_swap_entry(struct page *page)
> > diff --git a/mm/swap.h b/mm/swap.h
> > index 161185057993..c390df3f5889 100644
> > --- a/mm/swap.h
> > +++ b/mm/swap.h
> > @@ -226,7 +226,6 @@ static inline void swap_read_unplug(struct swap_iocb *plug)
> >  }
> >  void swap_write_unplug(struct swap_iocb *sio);
> >  int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug);
> > -void __swap_writepage(struct folio *folio, struct swap_iocb **swap_plug);
> >
> >  /* linux/mm/swap_state.c */
> >  extern struct address_space swap_space __read_mostly;
> > diff --git a/mm/swap_io.c b/mm/swap_io.c
> > index d1cdb10ba133..47077b345ae3 100644
> > --- a/mm/swap_io.c
> > +++ b/mm/swap_io.c
> > @@ -240,6 +240,7 @@ static void swap_zeromap_folio_clear(struct folio *folio)
> >  int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug)
> >  {
> >         int ret = 0;
> > +       struct swap_info_struct *sis = __swap_entry_to_info(folio->swap);
> >
> >         if (folio_free_swap(folio))
> >                 goto out_unlock;
> > @@ -281,7 +282,8 @@ int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug)
> >                 return AOP_WRITEPAGE_ACTIVATE;
> >         }
> >
> > -       __swap_writepage(folio, swap_plug);
> > +       if (sis->ops && sis->ops->write_folio)
> > +               sis->ops->write_folio(sis, folio, swap_plug);
> 
> Do we want a swap_write_folio() wrapper?

Seems swap_writeout() is the counterpart of swap_read_folio(). I
personally prefer to rename swap_writeout() to swap_write_folio(). And
make a wrapper as _swap_write_folio()?

> 
> >         return 0;
> >  out_unlock:
> >         folio_unlock(folio);
> > @@ -371,10 +373,11 @@ static void sio_write_complete(struct kiocb *iocb, long ret)
> >         mempool_free(sio, sio_pool);
> >  }
> >
> > -static void swap_writepage_fs(struct folio *folio, struct swap_iocb **swap_plug)
> > +static void swap_writepage_fs(struct swap_info_struct *sis,
> > +                             struct folio *folio,
> > +                             struct swap_iocb **swap_plug)
> >  {
> >         struct swap_iocb *sio = swap_plug ? *swap_plug : NULL;
> > -       struct swap_info_struct *sis = __swap_entry_to_info(folio->swap);
> >         struct file *swap_file = sis->swap_file;
> >         loff_t pos = swap_dev_pos(folio->swap);
> >
> > @@ -407,8 +410,9 @@ static void swap_writepage_fs(struct folio *folio, struct swap_iocb **swap_plug)
> >                 *swap_plug = sio;
> >  }
> >
> > -static void swap_writepage_bdev_sync(struct folio *folio,
> > -               struct swap_info_struct *sis)
> > +static void swap_writepage_bdev_sync(struct swap_info_struct *sis,
> > +                                    struct folio *folio,
> > +                                    struct swap_iocb **plug)
> >  {
> >         struct bio_vec bv;
> >         struct bio bio;
> > @@ -427,8 +431,9 @@ static void swap_writepage_bdev_sync(struct folio *folio,
> >         __end_swap_bio_write(&bio);
> >  }
> >
> > -static void swap_writepage_bdev_async(struct folio *folio,
> > -               struct swap_info_struct *sis)
> > +static void swap_writepage_bdev_async(struct swap_info_struct *sis,
> > +                                     struct folio *folio,
> > +                                     struct swap_iocb **plug)
> >  {
> >         struct bio *bio;
> >
> > @@ -444,29 +449,6 @@ static void swap_writepage_bdev_async(struct folio *folio,
> >         submit_bio(bio);
> >  }
> >
> > -void __swap_writepage(struct folio *folio, struct swap_iocb **swap_plug)
> > -{
> > -       struct swap_info_struct *sis = __swap_entry_to_info(folio->swap);
> > -
> > -       VM_BUG_ON_FOLIO(!folio_test_swapcache(folio), folio);
> > -       /*
> > -        * ->flags can be updated non-atomically (scan_swap_map_slots),
> > -        * but that will never affect SWP_FS_OPS, so the data_race
> > -        * is safe.
> > -        */
> > -       if (data_race(sis->flags & SWP_FS_OPS))
> > -               swap_writepage_fs(folio, swap_plug);
> > -       /*
> > -        * ->flags can be updated non-atomically (scan_swap_map_slots),
> > -        * but that will never affect SWP_SYNCHRONOUS_IO, so the data_race
> > -        * is safe.
> > -        */
> > -       else if (data_race(sis->flags & SWP_SYNCHRONOUS_IO))
> > -               swap_writepage_bdev_sync(folio, sis);
> > -       else
> > -               swap_writepage_bdev_async(folio, sis);
> > -}
> > -
> >  void swap_write_unplug(struct swap_iocb *sio)
> >  {
> >         struct iov_iter from;
> > @@ -535,9 +517,10 @@ static bool swap_read_folio_zeromap(struct folio *folio)
> >         return true;
> >  }
> >
> > -static void swap_read_folio_fs(struct folio *folio, struct swap_iocb **plug)
> > +static void swap_read_folio_fs(struct swap_info_struct *sis,
> > +                              struct folio *folio,
> > +                              struct swap_iocb **plug)
> >  {
> > -       struct swap_info_struct *sis = __swap_entry_to_info(folio->swap);
> >         struct swap_iocb *sio = NULL;
> >         loff_t pos = swap_dev_pos(folio->swap);
> >
> > @@ -569,8 +552,9 @@ static void swap_read_folio_fs(struct folio *folio, struct swap_iocb **plug)
> >                 *plug = sio;
> >  }
> >
> > -static void swap_read_folio_bdev_sync(struct folio *folio,
> > -               struct swap_info_struct *sis)
> > +static void swap_read_folio_bdev_sync(struct swap_info_struct *sis,
> > +                                     struct folio *folio,
> > +                                     struct swap_iocb **plug)
> >  {
> >         struct bio_vec bv;
> >         struct bio bio;
> > @@ -591,8 +575,9 @@ static void swap_read_folio_bdev_sync(struct folio *folio,
> >         put_task_struct(current);
> >  }
> >
> > -static void swap_read_folio_bdev_async(struct folio *folio,
> > -               struct swap_info_struct *sis)
> > +static void swap_read_folio_bdev_async(struct swap_info_struct *sis,
> > +                                      struct folio *folio,
> > +                                      struct swap_iocb **plug)
> >  {
> >         struct bio *bio;
> >
> > @@ -606,6 +591,42 @@ static void swap_read_folio_bdev_async(struct folio *folio,
> >         submit_bio(bio);
> >  }
> >
> > +static struct swap_ops bdev_fs_swap_ops = {
> > +       .read_folio = swap_read_folio_fs,
> > +       .write_folio = swap_writepage_fs,
> > +};
> 
> const?

Right, will fix in v2.

> 
> > +
> > +static struct swap_ops bdev_sync_swap_ops = {
> > +       .read_folio = swap_read_folio_bdev_sync,
> > +       .write_folio = swap_writepage_bdev_sync,
> > +};
> 
> const?
> 
> > +
> > +static struct swap_ops bdev_async_swap_ops = {
> > +       .read_folio = swap_read_folio_bdev_async,
> > +       .write_folio = swap_writepage_bdev_async,
> > +};
> 
> const?
> 
> > +
> > +int probe_swap_fs(struct swap_info_struct *sis)
> > +{
> > +       /*
> > +        * ->flags can be updated non-atomically (scan_swap_map_slots),
> > +        * but that will never affect SWP_FS_OPS, so the data_race
> > +        * is safe.
> > +        */
> > +       if (data_race(sis->flags & SWP_FS_OPS))
> > +               sis->ops = &bdev_fs_swap_ops;
> > +       /*
> > +        * ->flags can be updated non-atomically (scan_swap_map_slots),
> > +        * but that will never affect SWP_SYNCHRONOUS_IO, so the data_race
> > +        * is safe.
> > +        */
> > +       else if (data_race(sis->flags & SWP_SYNCHRONOUS_IO))
> > +               sis->ops = &bdev_sync_swap_ops;
> > +       else
> > +               sis->ops = &bdev_async_swap_ops;
> > +       return 0;
> > +}
> > +
> >  void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
> >  {
> >         struct swap_info_struct *sis = __swap_entry_to_info(folio->swap);
> > @@ -640,13 +661,8 @@ void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
> >         /* We have to read from slower devices. Increase zswap protection. */
> >         zswap_folio_swapin(folio);
> >
> > -       if (data_race(sis->flags & SWP_FS_OPS)) {
> > -               swap_read_folio_fs(folio, plug);
> > -       } else if (synchronous) {
> > -               swap_read_folio_bdev_sync(folio, sis);
> > -       } else {
> > -               swap_read_folio_bdev_async(folio, sis);
> > -       }
> > +       if (sis->ops && sis->ops->read_folio)
> > +               sis->ops->read_folio(sis, folio, plug);
> >
> >  finish:
> >         if (workingset) {
> > diff --git a/mm/swapfile.c b/mm/swapfile.c
> > index 915bc93964db..af498f9af328 100644
> > --- a/mm/swapfile.c
> > +++ b/mm/swapfile.c
> > @@ -3625,6 +3625,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
> >         /* Sets SWP_WRITEOK, resurrect the percpu ref, expose the swap device */
> >         enable_swap_info(si);
> >
> > +       probe_swap_fs(si);
> 
> Can we move this to enable_swap_info(), or perhaps even
> deeper into setup_swap_info()?
> 
> > +
> >         pr_info("Adding %uk swap on %s.  Priority:%d extents:%d across:%lluk %s%s%s%s\n",
> >                 K(si->pages), name->name, si->prio, nr_extents,
> >                 K((unsigned long long)span),
> > diff --git a/mm/zswap.c b/mm/zswap.c
> > index a399f7a10830..7ce906249c7a 100644
> > --- a/mm/zswap.c
> > +++ b/mm/zswap.c
> > @@ -1055,7 +1055,8 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
> >         folio_set_reclaim(folio);
> >
> >         /* start writeback */
> > -       __swap_writepage(folio, NULL);
> > +       if (si->ops && si->ops->write_folio)
> > +               si->ops->write_folio(si, folio, NULL);
> 
> swap_write_folio() inline wrapper?

Replied at above place. We will need a read_folio version too.



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/3] mm/swap: use swap_ops to register swap device's methods
  2026-03-02 10:40 ` [PATCH 2/3] mm/swap: use swap_ops to register swap device's methods Baoquan He
                     ` (2 preceding siblings ...)
  2026-03-02 14:09   ` YoungJun Park
@ 2026-03-02 14:53   ` Usama Arif
  3 siblings, 0 replies; 13+ messages in thread
From: Usama Arif @ 2026-03-02 14:53 UTC (permalink / raw)
  To: Baoquan He
  Cc: Usama Arif, linux-mm, akpm, chrisl, kasong, shikemeng, nphamcs,
	baohua, youngjun.park

On Mon,  2 Mar 2026 18:40:15 +0800 Baoquan He <bhe@redhat.com> wrote:

> This simplifies codes and makes logic clearer. And also makes later any
> new swap device type being added easier to handle.
> 
> Currently there are three types of swap devices: bdev_fs, bdev_sync
> and bdev_async, and only operations read_folio and write_folio are
> included. In the future, there could be more swap device types added
> and more appropriate opeations adapted into swap_ops.
> 
> Signed-off-by: Baoquan He <bhe@redhat.com>
> ---
>  include/linux/swap.h |  13 ++++++
>  mm/swap.h            |   1 -
>  mm/swap_io.c         | 102 +++++++++++++++++++++++++------------------
>  mm/swapfile.c        |   2 +
>  mm/zswap.c           |   3 +-
>  5 files changed, 76 insertions(+), 45 deletions(-)
> 
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index 0effe3cc50f5..448e5e66ec5c 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -19,6 +19,7 @@
>  struct notifier_block;
>  
>  struct bio;
> +struct swap_iocb;
>  
>  struct pagevec;
>  
> @@ -222,6 +223,17 @@ enum {
>  #define SWAP_CLUSTER_MAX_SKIPPED (SWAP_CLUSTER_MAX << 10)
>  #define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX
>  
> +struct swap_ops {
> +	void (*read_folio)(struct swap_info_struct *sis,
> +			   struct folio *folio,
> +			   struct swap_iocb **plug);
> +	void (*write_folio)(struct swap_info_struct *sis,
> +			    struct folio *folio,
> +			    struct swap_iocb **plug);
> +};
> +
> +int probe_swap_fs(struct swap_info_struct *sis);
> +

Would it be better to put these in mm/swap.h as they are only used in mm/?

>  /*
>   * The first page in the swap file is the swap header, which is always marked
>   * bad to prevent it from being allocated as an entry. This also prevents the
> @@ -284,6 +296,7 @@ struct swap_info_struct {
>  	struct work_struct reclaim_work; /* reclaim worker */
>  	struct list_head discard_clusters; /* discard clusters list */
>  	struct plist_node avail_list;   /* entry in swap_avail_head */
> +	struct swap_ops *ops;
>  };
>  
>  static inline swp_entry_t page_swap_entry(struct page *page)
> diff --git a/mm/swap.h b/mm/swap.h
> index 161185057993..c390df3f5889 100644
> --- a/mm/swap.h
> +++ b/mm/swap.h
> @@ -226,7 +226,6 @@ static inline void swap_read_unplug(struct swap_iocb *plug)
>  }
>  void swap_write_unplug(struct swap_iocb *sio);
>  int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug);
> -void __swap_writepage(struct folio *folio, struct swap_iocb **swap_plug);
>  
>  /* linux/mm/swap_state.c */
>  extern struct address_space swap_space __read_mostly;
> diff --git a/mm/swap_io.c b/mm/swap_io.c
> index d1cdb10ba133..47077b345ae3 100644
> --- a/mm/swap_io.c
> +++ b/mm/swap_io.c
> @@ -240,6 +240,7 @@ static void swap_zeromap_folio_clear(struct folio *folio)
>  int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug)
>  {
>  	int ret = 0;
> +	struct swap_info_struct *sis = __swap_entry_to_info(folio->swap);
>  
>  	if (folio_free_swap(folio))
>  		goto out_unlock;
> @@ -281,7 +282,8 @@ int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug)
>  		return AOP_WRITEPAGE_ACTIVATE;
>  	}
>  
> -	__swap_writepage(folio, swap_plug);
> +	if (sis->ops && sis->ops->write_folio)
> +		sis->ops->write_folio(sis, folio, swap_plug);

The old __swap_writepage() always dispatched to one of the three write
functions unconditionally. If the guard condition is false (ops is NULL),
swap_writeout() returns 0 (success) but the folio is never unlocked --
the write functions are the ones that call folio_unlock(). Would this
leave the folio locked and lead to a deadlock? Similar issue in swap_read_folio.

>  	return 0;
>  out_unlock:
>  	folio_unlock(folio);
> @@ -371,10 +373,11 @@ static void sio_write_complete(struct kiocb *iocb, long ret)
>  	mempool_free(sio, sio_pool);
>  }
>  
> -static void swap_writepage_fs(struct folio *folio, struct swap_iocb **swap_plug)
> +static void swap_writepage_fs(struct swap_info_struct *sis,
> +			      struct folio *folio,
> +			      struct swap_iocb **swap_plug)
>  {
>  	struct swap_iocb *sio = swap_plug ? *swap_plug : NULL;
> -	struct swap_info_struct *sis = __swap_entry_to_info(folio->swap);
>  	struct file *swap_file = sis->swap_file;
>  	loff_t pos = swap_dev_pos(folio->swap);
>  
> @@ -407,8 +410,9 @@ static void swap_writepage_fs(struct folio *folio, struct swap_iocb **swap_plug)
>  		*swap_plug = sio;
>  }
>  
> -static void swap_writepage_bdev_sync(struct folio *folio,
> -		struct swap_info_struct *sis)
> +static void swap_writepage_bdev_sync(struct swap_info_struct *sis,
> +				     struct folio *folio,
> +				     struct swap_iocb **plug)
>  {
>  	struct bio_vec bv;
>  	struct bio bio;
> @@ -427,8 +431,9 @@ static void swap_writepage_bdev_sync(struct folio *folio,
>  	__end_swap_bio_write(&bio);
>  }
>  
> -static void swap_writepage_bdev_async(struct folio *folio,
> -		struct swap_info_struct *sis)
> +static void swap_writepage_bdev_async(struct swap_info_struct *sis,
> +				      struct folio *folio,
> +				      struct swap_iocb **plug)
>  {
>  	struct bio *bio;
>  
> @@ -444,29 +449,6 @@ static void swap_writepage_bdev_async(struct folio *folio,
>  	submit_bio(bio);
>  }
>  
> -void __swap_writepage(struct folio *folio, struct swap_iocb **swap_plug)
> -{
> -	struct swap_info_struct *sis = __swap_entry_to_info(folio->swap);
> -
> -	VM_BUG_ON_FOLIO(!folio_test_swapcache(folio), folio);
> -	/*
> -	 * ->flags can be updated non-atomically (scan_swap_map_slots),
> -	 * but that will never affect SWP_FS_OPS, so the data_race
> -	 * is safe.
> -	 */
> -	if (data_race(sis->flags & SWP_FS_OPS))
> -		swap_writepage_fs(folio, swap_plug);
> -	/*
> -	 * ->flags can be updated non-atomically (scan_swap_map_slots),
> -	 * but that will never affect SWP_SYNCHRONOUS_IO, so the data_race
> -	 * is safe.
> -	 */
> -	else if (data_race(sis->flags & SWP_SYNCHRONOUS_IO))
> -		swap_writepage_bdev_sync(folio, sis);
> -	else
> -		swap_writepage_bdev_async(folio, sis);
> -}
> -
>  void swap_write_unplug(struct swap_iocb *sio)
>  {
>  	struct iov_iter from;
> @@ -535,9 +517,10 @@ static bool swap_read_folio_zeromap(struct folio *folio)
>  	return true;
>  }
>  
> -static void swap_read_folio_fs(struct folio *folio, struct swap_iocb **plug)
> +static void swap_read_folio_fs(struct swap_info_struct *sis,
> +			       struct folio *folio,
> +			       struct swap_iocb **plug)
>  {
> -	struct swap_info_struct *sis = __swap_entry_to_info(folio->swap);
>  	struct swap_iocb *sio = NULL;
>  	loff_t pos = swap_dev_pos(folio->swap);
>  
> @@ -569,8 +552,9 @@ static void swap_read_folio_fs(struct folio *folio, struct swap_iocb **plug)
>  		*plug = sio;
>  }
>  
> -static void swap_read_folio_bdev_sync(struct folio *folio,
> -		struct swap_info_struct *sis)
> +static void swap_read_folio_bdev_sync(struct swap_info_struct *sis,
> +				      struct folio *folio,
> +				      struct swap_iocb **plug)
>  {
>  	struct bio_vec bv;
>  	struct bio bio;
> @@ -591,8 +575,9 @@ static void swap_read_folio_bdev_sync(struct folio *folio,
>  	put_task_struct(current);
>  }
>  
> -static void swap_read_folio_bdev_async(struct folio *folio,
> -		struct swap_info_struct *sis)
> +static void swap_read_folio_bdev_async(struct swap_info_struct *sis,
> +				       struct folio *folio,
> +				       struct swap_iocb **plug)
>  {
>  	struct bio *bio;
>  
> @@ -606,6 +591,42 @@ static void swap_read_folio_bdev_async(struct folio *folio,
>  	submit_bio(bio);
>  }
>  
> +static struct swap_ops bdev_fs_swap_ops = {
> +	.read_folio = swap_read_folio_fs,
> +	.write_folio = swap_writepage_fs,
> +};
> +
> +static struct swap_ops bdev_sync_swap_ops = {
> +	.read_folio = swap_read_folio_bdev_sync,
> +	.write_folio = swap_writepage_bdev_sync,
> +};
> +
> +static struct swap_ops bdev_async_swap_ops = {
> +	.read_folio = swap_read_folio_bdev_async,
> +	.write_folio = swap_writepage_bdev_async,
> +};
> +

Should we have all of these as static const struct swap_ops?

> +int probe_swap_fs(struct swap_info_struct *sis)
> +{
> +	/*
> +	 * ->flags can be updated non-atomically (scan_swap_map_slots),
> +	 * but that will never affect SWP_FS_OPS, so the data_race
> +	 * is safe.
> +	 */
> +	if (data_race(sis->flags & SWP_FS_OPS))
> +		sis->ops = &bdev_fs_swap_ops;
> +	/*
> +	 * ->flags can be updated non-atomically (scan_swap_map_slots),
> +	 * but that will never affect SWP_SYNCHRONOUS_IO, so the data_race
> +	 * is safe.
> +	 */
> +	else if (data_race(sis->flags & SWP_SYNCHRONOUS_IO))
> +		sis->ops = &bdev_sync_swap_ops;
> +	else
> +		sis->ops = &bdev_async_swap_ops;
> +	return 0;

The return is always 0, so this function could be void.

> +}
> +
>  void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
>  {
>  	struct swap_info_struct *sis = __swap_entry_to_info(folio->swap);
> @@ -640,13 +661,8 @@ void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
>  	/* We have to read from slower devices. Increase zswap protection. */
>  	zswap_folio_swapin(folio);
>  
> -	if (data_race(sis->flags & SWP_FS_OPS)) {
> -		swap_read_folio_fs(folio, plug);
> -	} else if (synchronous) {
> -		swap_read_folio_bdev_sync(folio, sis);
> -	} else {
> -		swap_read_folio_bdev_async(folio, sis);
> -	}
> +	if (sis->ops && sis->ops->read_folio)
> +		sis->ops->read_folio(sis, folio, plug);
>  
>  finish:
>  	if (workingset) {
> diff --git a/mm/swapfile.c b/mm/swapfile.c
> index 915bc93964db..af498f9af328 100644
> --- a/mm/swapfile.c
> +++ b/mm/swapfile.c
> @@ -3625,6 +3625,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
>  	/* Sets SWP_WRITEOK, resurrect the percpu ref, expose the swap device */
>  	enable_swap_info(si);
>  
> +	probe_swap_fs(si);
> +

Should probe_swap_fs() be called before enable_swap_info() rather than
after it? enable_swap_info() sets SWP_WRITEOK and adds the device to
swap_active_head, making it available for allocation. At that point
si->ops is still NULL. If another CPU allocates swap from the new
device and reclaim writes to it before probe_swap_fs() runs, the
write will be silently dropped.

>  	pr_info("Adding %uk swap on %s.  Priority:%d extents:%d across:%lluk %s%s%s%s\n",
>  		K(si->pages), name->name, si->prio, nr_extents,
>  		K((unsigned long long)span),
> diff --git a/mm/zswap.c b/mm/zswap.c
> index a399f7a10830..7ce906249c7a 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -1055,7 +1055,8 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
>  	folio_set_reclaim(folio);
>  
>  	/* start writeback */
> -	__swap_writepage(folio, NULL);
> +	if (si->ops && si->ops->write_folio)
> +		si->ops->write_folio(si, folio, NULL);
>  
>  out:
>  	if (ret && ret != -EEXIST) {
> -- 
> 2.52.0


^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2026-03-02 14:53 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2026-03-02 10:40 [PATCH 0/3] mm/swap: use swap_ops to register swap device's methods Baoquan He
2026-03-02 10:40 ` [PATCH 1/3] mm/swap: rename mm/page_io.c to mm/swap_io.c Baoquan He
2026-03-02 10:56   ` Barry Song
2026-03-02 13:25     ` Baoquan He
2026-03-02 10:40 ` [PATCH 2/3] mm/swap: use swap_ops to register swap device's methods Baoquan He
2026-03-02 11:11   ` Barry Song
2026-03-02 14:47     ` Baoquan He
2026-03-02 12:20   ` YoungJun Park
2026-03-02 14:09   ` YoungJun Park
2026-03-02 14:53   ` Usama Arif
2026-03-02 10:40 ` [PATCH 3/3] mm/swap_io.c: rename swap_writepage_* to swap_write_folio_* Baoquan He
2026-03-02 11:28   ` Barry Song
2026-03-02 14:43 ` [PATCH 0/3] mm/swap: use swap_ops to register swap device's methods YoungJun Park

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox