linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 1/3] mm: zswap: rename is_zswap_enabled() to zswap_is_enabled()
@ 2024-06-11  2:45 Yosry Ahmed
  2024-06-11  2:45 ` [PATCH v3 2/3] mm: zswap: add zswap_never_enabled() Yosry Ahmed
                   ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: Yosry Ahmed @ 2024-06-11  2:45 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Johannes Weiner, Nhat Pham, Chengming Zhou, Barry Song, Chris Li,
	David Hildenbrand, Matthew Wilcox, linux-mm, linux-kernel,
	Yosry Ahmed

In preparation of introducing a similar function, rename
is_zswap_enabled() to use zswap_* prefix like other zswap functions.

Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
---
 include/linux/zswap.h | 4 ++--
 mm/memcontrol.c       | 2 +-
 mm/zswap.c            | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/linux/zswap.h b/include/linux/zswap.h
index 2a85b941db975..ce5e7bfe8f1ec 100644
--- a/include/linux/zswap.h
+++ b/include/linux/zswap.h
@@ -35,7 +35,7 @@ void zswap_swapoff(int type);
 void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg);
 void zswap_lruvec_state_init(struct lruvec *lruvec);
 void zswap_folio_swapin(struct folio *folio);
-bool is_zswap_enabled(void);
+bool zswap_is_enabled(void);
 #else
 
 struct zswap_lruvec_state {};
@@ -60,7 +60,7 @@ static inline void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg) {}
 static inline void zswap_lruvec_state_init(struct lruvec *lruvec) {}
 static inline void zswap_folio_swapin(struct folio *folio) {}
 
-static inline bool is_zswap_enabled(void)
+static inline bool zswap_is_enabled(void)
 {
 	return false;
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1303ed01bb5e5..a811dfff10cda 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -8469,7 +8469,7 @@ void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size)
 bool mem_cgroup_zswap_writeback_enabled(struct mem_cgroup *memcg)
 {
 	/* if zswap is disabled, do not block pages going to the swapping device */
-	return !is_zswap_enabled() || !memcg || READ_ONCE(memcg->zswap_writeback);
+	return !zswap_is_enabled() || !memcg || READ_ONCE(memcg->zswap_writeback);
 }
 
 static u64 zswap_current_read(struct cgroup_subsys_state *css,
diff --git a/mm/zswap.c b/mm/zswap.c
index b9b35ef86d9be..a8c8dd8cfe6f5 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -131,7 +131,7 @@ static bool zswap_shrinker_enabled = IS_ENABLED(
 		CONFIG_ZSWAP_SHRINKER_DEFAULT_ON);
 module_param_named(shrinker_enabled, zswap_shrinker_enabled, bool, 0644);
 
-bool is_zswap_enabled(void)
+bool zswap_is_enabled(void)
 {
 	return zswap_enabled;
 }
-- 
2.45.2.505.gda0bf45e8d-goog



^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v3 2/3] mm: zswap: add zswap_never_enabled()
  2024-06-11  2:45 [PATCH v3 1/3] mm: zswap: rename is_zswap_enabled() to zswap_is_enabled() Yosry Ahmed
@ 2024-06-11  2:45 ` Yosry Ahmed
  2024-06-11 16:32   ` Nhat Pham
  2024-06-11 21:53   ` Barry Song
  2024-06-11  2:45 ` [PATCH v3 3/3] mm: zswap: handle incorrect attempts to load large folios Yosry Ahmed
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 10+ messages in thread
From: Yosry Ahmed @ 2024-06-11  2:45 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Johannes Weiner, Nhat Pham, Chengming Zhou, Barry Song, Chris Li,
	David Hildenbrand, Matthew Wilcox, linux-mm, linux-kernel,
	Yosry Ahmed

Add zswap_never_enabled() to skip the xarray lookup in zswap_load() if
zswap was never enabled on the system. It is implemented using static
branches for efficiency, as enabling zswap should be a rare event. This
could shave some cycles off zswap_load() when CONFIG_ZSWAP is used but
zswap is never enabled.

However, the real motivation behind this patch is two-fold:
- Incoming large folio swapin work will need to fallback to order-0
  folios if zswap was ever enabled, because any part of the folio could
  be in zswap, until proper handling of large folios with zswap is
  added.

- A warning and recovery attempt will be added in a following change in
  case the above was not done incorrectly. Zswap will fail the read if
  the folio is large and it was ever enabled.

Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
---
 mm/zswap.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/mm/zswap.c b/mm/zswap.c
index a8c8dd8cfe6f5..7fcd751e847d6 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -83,6 +83,7 @@ static bool zswap_pool_reached_full;
 static int zswap_setup(void);
 
 /* Enable/disable zswap */
+static DEFINE_STATIC_KEY_MAYBE(CONFIG_ZSWAP_DEFAULT_ON, zswap_ever_enabled);
 static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON);
 static int zswap_enabled_param_set(const char *,
 				   const struct kernel_param *);
@@ -136,6 +137,11 @@ bool zswap_is_enabled(void)
 	return zswap_enabled;
 }
 
+static bool zswap_never_enabled(void)
+{
+	return !static_branch_maybe(CONFIG_ZSWAP_DEFAULT_ON, &zswap_ever_enabled);
+}
+
 /*********************************
 * data structures
 **********************************/
@@ -1557,6 +1563,9 @@ bool zswap_load(struct folio *folio)
 
 	VM_WARN_ON_ONCE(!folio_test_locked(folio));
 
+	if (zswap_never_enabled())
+		return false;
+
 	/*
 	 * When reading into the swapcache, invalidate our entry. The
 	 * swapcache can be the authoritative owner of the page and
@@ -1735,6 +1744,7 @@ static int zswap_setup(void)
 			zpool_get_type(pool->zpools[0]));
 		list_add(&pool->list, &zswap_pools);
 		zswap_has_pool = true;
+		static_branch_enable(&zswap_ever_enabled);
 	} else {
 		pr_err("pool creation failed\n");
 		zswap_enabled = false;
-- 
2.45.2.505.gda0bf45e8d-goog



^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v3 3/3] mm: zswap: handle incorrect attempts to load large folios
  2024-06-11  2:45 [PATCH v3 1/3] mm: zswap: rename is_zswap_enabled() to zswap_is_enabled() Yosry Ahmed
  2024-06-11  2:45 ` [PATCH v3 2/3] mm: zswap: add zswap_never_enabled() Yosry Ahmed
@ 2024-06-11  2:45 ` Yosry Ahmed
  2024-06-11 21:56   ` Barry Song
  2024-06-11  2:59 ` [PATCH v3 1/3] mm: zswap: rename is_zswap_enabled() to zswap_is_enabled() Barry Song
  2024-06-11 15:58 ` Nhat Pham
  3 siblings, 1 reply; 10+ messages in thread
From: Yosry Ahmed @ 2024-06-11  2:45 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Johannes Weiner, Nhat Pham, Chengming Zhou, Barry Song, Chris Li,
	David Hildenbrand, Matthew Wilcox, linux-mm, linux-kernel,
	Yosry Ahmed

Zswap does not support storing or loading large folios. Until proper
support is added, attempts to load large folios from zswap are a bug.

For example, if a swapin fault observes that contiguous PTEs are
pointing to contiguous swap entries and tries to swap them in as a large
folio, swap_read_folio() will pass in a large folio to zswap_load(), but
zswap_load() will only effectively load the first page in the folio. If
the first page is not in zswap, the folio will be read from disk, even
though other pages may be in zswap.

In both cases, this will lead to silent data corruption. Proper support
needs to be added before large folio swapins and zswap can work
together.

Looking at callers of swap_read_folio(), it seems like they are either
allocated from __read_swap_cache_async() or do_swap_page() in the
SWP_SYNCHRONOUS_IO path. Both of which allocate order-0 folios, so
everything is fine for now.

However, there is ongoing work to add to support large folio swapins
[1]. To make sure new development does not break zswap (or get broken by
zswap), add minimal handling of incorrect loads of large folios to
zswap.

First, move the call folio_mark_uptodate() inside zswap_load().

If a large folio load is attempted, and zswap was ever enabled on the
system, return 'true' without calling folio_mark_uptodate(). This will
prevent the folio from being read from disk, and will emit an IO error
because the folio is not uptodate (e.g. do_swap_fault() will return
VM_FAULT_SIGBUS). It may not be reliable recovery in all cases, but it
is better than nothing.

This was tested by hacking the allocation in __read_swap_cache_async()
to use order 2 and __GFP_COMP.

In the future, to handle this correctly, the swapin code should:
(a) Fallback to order-0 swapins if zswap was ever used on the machine,
because compressed pages remain in zswap after it is disabled.
(b) Add proper support to swapin large folios from zswap (fully or
partially).

Probably start with (a) then followup with (b).

[1]https://lore.kernel.org/linux-mm/20240304081348.197341-6-21cnbao@gmail.com/

Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
---
 mm/page_io.c |  1 -
 mm/zswap.c   | 12 ++++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/mm/page_io.c b/mm/page_io.c
index f1a9cfab6e748..8f441dd8e109f 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -517,7 +517,6 @@ void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
 	delayacct_swapin_start();
 
 	if (zswap_load(folio)) {
-		folio_mark_uptodate(folio);
 		folio_unlock(folio);
 	} else if (data_race(sis->flags & SWP_FS_OPS)) {
 		swap_read_folio_fs(folio, plug);
diff --git a/mm/zswap.c b/mm/zswap.c
index 7fcd751e847d6..505f4b9812891 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1566,6 +1566,17 @@ bool zswap_load(struct folio *folio)
 	if (zswap_never_enabled())
 		return false;
 
+	/*
+	 * Large folios should not be swapped in while zswap is being used, as
+	 * they are not properly handled. Zswap does not properly load large
+	 * folios, and a large folio may only be partially in zswap.
+	 *
+	 * Return true without marking the folio uptodate so that an IO error is
+	 * emitted (e.g. do_swap_page() will sigbus).
+	 */
+	if (WARN_ON_ONCE(folio_test_large(folio)))
+		return true;
+
 	/*
 	 * When reading into the swapcache, invalidate our entry. The
 	 * swapcache can be the authoritative owner of the page and
@@ -1600,6 +1611,7 @@ bool zswap_load(struct folio *folio)
 		folio_mark_dirty(folio);
 	}
 
+	folio_mark_uptodate(folio);
 	return true;
 }
 
-- 
2.45.2.505.gda0bf45e8d-goog



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3 1/3] mm: zswap: rename is_zswap_enabled() to zswap_is_enabled()
  2024-06-11  2:45 [PATCH v3 1/3] mm: zswap: rename is_zswap_enabled() to zswap_is_enabled() Yosry Ahmed
  2024-06-11  2:45 ` [PATCH v3 2/3] mm: zswap: add zswap_never_enabled() Yosry Ahmed
  2024-06-11  2:45 ` [PATCH v3 3/3] mm: zswap: handle incorrect attempts to load large folios Yosry Ahmed
@ 2024-06-11  2:59 ` Barry Song
  2024-06-11 15:58 ` Nhat Pham
  3 siblings, 0 replies; 10+ messages in thread
From: Barry Song @ 2024-06-11  2:59 UTC (permalink / raw)
  To: Yosry Ahmed
  Cc: Andrew Morton, Johannes Weiner, Nhat Pham, Chengming Zhou,
	Chris Li, David Hildenbrand, Matthew Wilcox, linux-mm,
	linux-kernel

On Tue, Jun 11, 2024 at 2:45 PM Yosry Ahmed <yosryahmed@google.com> wrote:
>
> In preparation of introducing a similar function, rename
> is_zswap_enabled() to use zswap_* prefix like other zswap functions.
>
> Signed-off-by: Yosry Ahmed <yosryahmed@google.com>

Reviewed-by: Barry Song <baohua@kernel.org>

> ---
>  include/linux/zswap.h | 4 ++--
>  mm/memcontrol.c       | 2 +-
>  mm/zswap.c            | 2 +-
>  3 files changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/include/linux/zswap.h b/include/linux/zswap.h
> index 2a85b941db975..ce5e7bfe8f1ec 100644
> --- a/include/linux/zswap.h
> +++ b/include/linux/zswap.h
> @@ -35,7 +35,7 @@ void zswap_swapoff(int type);
>  void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg);
>  void zswap_lruvec_state_init(struct lruvec *lruvec);
>  void zswap_folio_swapin(struct folio *folio);
> -bool is_zswap_enabled(void);
> +bool zswap_is_enabled(void);
>  #else
>
>  struct zswap_lruvec_state {};
> @@ -60,7 +60,7 @@ static inline void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg) {}
>  static inline void zswap_lruvec_state_init(struct lruvec *lruvec) {}
>  static inline void zswap_folio_swapin(struct folio *folio) {}
>
> -static inline bool is_zswap_enabled(void)
> +static inline bool zswap_is_enabled(void)
>  {
>         return false;
>  }
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 1303ed01bb5e5..a811dfff10cda 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -8469,7 +8469,7 @@ void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size)
>  bool mem_cgroup_zswap_writeback_enabled(struct mem_cgroup *memcg)
>  {
>         /* if zswap is disabled, do not block pages going to the swapping device */
> -       return !is_zswap_enabled() || !memcg || READ_ONCE(memcg->zswap_writeback);
> +       return !zswap_is_enabled() || !memcg || READ_ONCE(memcg->zswap_writeback);
>  }
>
>  static u64 zswap_current_read(struct cgroup_subsys_state *css,
> diff --git a/mm/zswap.c b/mm/zswap.c
> index b9b35ef86d9be..a8c8dd8cfe6f5 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -131,7 +131,7 @@ static bool zswap_shrinker_enabled = IS_ENABLED(
>                 CONFIG_ZSWAP_SHRINKER_DEFAULT_ON);
>  module_param_named(shrinker_enabled, zswap_shrinker_enabled, bool, 0644);
>
> -bool is_zswap_enabled(void)
> +bool zswap_is_enabled(void)
>  {
>         return zswap_enabled;
>  }
> --
> 2.45.2.505.gda0bf45e8d-goog
>


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3 1/3] mm: zswap: rename is_zswap_enabled() to zswap_is_enabled()
  2024-06-11  2:45 [PATCH v3 1/3] mm: zswap: rename is_zswap_enabled() to zswap_is_enabled() Yosry Ahmed
                   ` (2 preceding siblings ...)
  2024-06-11  2:59 ` [PATCH v3 1/3] mm: zswap: rename is_zswap_enabled() to zswap_is_enabled() Barry Song
@ 2024-06-11 15:58 ` Nhat Pham
  3 siblings, 0 replies; 10+ messages in thread
From: Nhat Pham @ 2024-06-11 15:58 UTC (permalink / raw)
  To: Yosry Ahmed
  Cc: Andrew Morton, Johannes Weiner, Chengming Zhou, Barry Song,
	Chris Li, David Hildenbrand, Matthew Wilcox, linux-mm,
	linux-kernel

On Mon, Jun 10, 2024 at 7:45 PM Yosry Ahmed <yosryahmed@google.com> wrote:
>
> In preparation of introducing a similar function, rename
> is_zswap_enabled() to use zswap_* prefix like other zswap functions.
>
> Signed-off-by: Yosry Ahmed <yosryahmed@google.com>

Ooops this is my bad :) Thanks for making it more consistent, Yosry!
Reviewed-by: Nhat Pham <nphamcs@gmail.com>


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3 2/3] mm: zswap: add zswap_never_enabled()
  2024-06-11  2:45 ` [PATCH v3 2/3] mm: zswap: add zswap_never_enabled() Yosry Ahmed
@ 2024-06-11 16:32   ` Nhat Pham
  2024-06-11 21:53   ` Barry Song
  1 sibling, 0 replies; 10+ messages in thread
From: Nhat Pham @ 2024-06-11 16:32 UTC (permalink / raw)
  To: Yosry Ahmed
  Cc: Andrew Morton, Johannes Weiner, Chengming Zhou, Barry Song,
	Chris Li, David Hildenbrand, Matthew Wilcox, linux-mm,
	linux-kernel

On Mon, Jun 10, 2024 at 7:45 PM Yosry Ahmed <yosryahmed@google.com> wrote:
>
> Add zswap_never_enabled() to skip the xarray lookup in zswap_load() if
> zswap was never enabled on the system. It is implemented using static
> branches for efficiency, as enabling zswap should be a rare event. This
> could shave some cycles off zswap_load() when CONFIG_ZSWAP is used but
> zswap is never enabled.
>
> However, the real motivation behind this patch is two-fold:
> - Incoming large folio swapin work will need to fallback to order-0
>   folios if zswap was ever enabled, because any part of the folio could
>   be in zswap, until proper handling of large folios with zswap is
>   added.
>
> - A warning and recovery attempt will be added in a following change in
>   case the above was not done incorrectly. Zswap will fail the read if
>   the folio is large and it was ever enabled.
>
> Signed-off-by: Yosry Ahmed <yosryahmed@google.com>

This LGTM.
Reviewed-by: Nhat Pham <nphamcs@gmail.com>


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3 2/3] mm: zswap: add zswap_never_enabled()
  2024-06-11  2:45 ` [PATCH v3 2/3] mm: zswap: add zswap_never_enabled() Yosry Ahmed
  2024-06-11 16:32   ` Nhat Pham
@ 2024-06-11 21:53   ` Barry Song
       [not found]     ` <CAJD7tkY6h1RkbYHbaQcTuVXOsY-t=arytf5HtcKfx7A75x06bg@mail.gmail.com>
  1 sibling, 1 reply; 10+ messages in thread
From: Barry Song @ 2024-06-11 21:53 UTC (permalink / raw)
  To: Yosry Ahmed
  Cc: Andrew Morton, Johannes Weiner, Nhat Pham, Chengming Zhou,
	Chris Li, David Hildenbrand, Matthew Wilcox, linux-mm,
	linux-kernel

On Tue, Jun 11, 2024 at 2:45 PM Yosry Ahmed <yosryahmed@google.com> wrote:
>
> Add zswap_never_enabled() to skip the xarray lookup in zswap_load() if
> zswap was never enabled on the system. It is implemented using static
> branches for efficiency, as enabling zswap should be a rare event. This
> could shave some cycles off zswap_load() when CONFIG_ZSWAP is used but
> zswap is never enabled.
>
> However, the real motivation behind this patch is two-fold:
> - Incoming large folio swapin work will need to fallback to order-0
>   folios if zswap was ever enabled, because any part of the folio could
>   be in zswap, until proper handling of large folios with zswap is
>   added.
>
> - A warning and recovery attempt will be added in a following change in
>   case the above was not done incorrectly. Zswap will fail the read if
>   the folio is large and it was ever enabled.
>
> Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
> ---
>  mm/zswap.c | 10 ++++++++++
>  1 file changed, 10 insertions(+)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index a8c8dd8cfe6f5..7fcd751e847d6 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -83,6 +83,7 @@ static bool zswap_pool_reached_full;
>  static int zswap_setup(void);
>
>  /* Enable/disable zswap */
> +static DEFINE_STATIC_KEY_MAYBE(CONFIG_ZSWAP_DEFAULT_ON, zswap_ever_enabled);
>  static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON);
>  static int zswap_enabled_param_set(const char *,
>                                    const struct kernel_param *);
> @@ -136,6 +137,11 @@ bool zswap_is_enabled(void)
>         return zswap_enabled;
>  }
>
> +static bool zswap_never_enabled(void)
> +{
> +       return !static_branch_maybe(CONFIG_ZSWAP_DEFAULT_ON, &zswap_ever_enabled);
> +}

Will we "extern" this one so that mm-core can use it to fallback
to small folios?
or you prefer this to be done within the coming swapin series?

> +
>  /*********************************
>  * data structures
>  **********************************/
> @@ -1557,6 +1563,9 @@ bool zswap_load(struct folio *folio)
>
>         VM_WARN_ON_ONCE(!folio_test_locked(folio));
>
> +       if (zswap_never_enabled())
> +               return false;
> +
>         /*
>          * When reading into the swapcache, invalidate our entry. The
>          * swapcache can be the authoritative owner of the page and
> @@ -1735,6 +1744,7 @@ static int zswap_setup(void)
>                         zpool_get_type(pool->zpools[0]));
>                 list_add(&pool->list, &zswap_pools);
>                 zswap_has_pool = true;
> +               static_branch_enable(&zswap_ever_enabled);
>         } else {
>                 pr_err("pool creation failed\n");
>                 zswap_enabled = false;
> --
> 2.45.2.505.gda0bf45e8d-goog
>

Thanks
Barry


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3 3/3] mm: zswap: handle incorrect attempts to load large folios
  2024-06-11  2:45 ` [PATCH v3 3/3] mm: zswap: handle incorrect attempts to load large folios Yosry Ahmed
@ 2024-06-11 21:56   ` Barry Song
  0 siblings, 0 replies; 10+ messages in thread
From: Barry Song @ 2024-06-11 21:56 UTC (permalink / raw)
  To: Yosry Ahmed
  Cc: Andrew Morton, Johannes Weiner, Nhat Pham, Chengming Zhou,
	Chris Li, David Hildenbrand, Matthew Wilcox, linux-mm,
	linux-kernel

On Tue, Jun 11, 2024 at 2:45 PM Yosry Ahmed <yosryahmed@google.com> wrote:
>
> Zswap does not support storing or loading large folios. Until proper
> support is added, attempts to load large folios from zswap are a bug.
>
> For example, if a swapin fault observes that contiguous PTEs are
> pointing to contiguous swap entries and tries to swap them in as a large
> folio, swap_read_folio() will pass in a large folio to zswap_load(), but
> zswap_load() will only effectively load the first page in the folio. If
> the first page is not in zswap, the folio will be read from disk, even
> though other pages may be in zswap.
>
> In both cases, this will lead to silent data corruption. Proper support
> needs to be added before large folio swapins and zswap can work
> together.
>
> Looking at callers of swap_read_folio(), it seems like they are either
> allocated from __read_swap_cache_async() or do_swap_page() in the
> SWP_SYNCHRONOUS_IO path. Both of which allocate order-0 folios, so
> everything is fine for now.
>
> However, there is ongoing work to add to support large folio swapins
> [1]. To make sure new development does not break zswap (or get broken by
> zswap), add minimal handling of incorrect loads of large folios to
> zswap.
>
> First, move the call folio_mark_uptodate() inside zswap_load().
>
> If a large folio load is attempted, and zswap was ever enabled on the
> system, return 'true' without calling folio_mark_uptodate(). This will
> prevent the folio from being read from disk, and will emit an IO error
> because the folio is not uptodate (e.g. do_swap_fault() will return
> VM_FAULT_SIGBUS). It may not be reliable recovery in all cases, but it
> is better than nothing.
>
> This was tested by hacking the allocation in __read_swap_cache_async()
> to use order 2 and __GFP_COMP.
>
> In the future, to handle this correctly, the swapin code should:
> (a) Fallback to order-0 swapins if zswap was ever used on the machine,
> because compressed pages remain in zswap after it is disabled.
> (b) Add proper support to swapin large folios from zswap (fully or
> partially).
>
> Probably start with (a) then followup with (b).
>
> [1]https://lore.kernel.org/linux-mm/20240304081348.197341-6-21cnbao@gmail.com/
>
> Signed-off-by: Yosry Ahmed <yosryahmed@google.com>

Acked-by: Barry Song <baohua@kernel.org>

> ---
>  mm/page_io.c |  1 -
>  mm/zswap.c   | 12 ++++++++++++
>  2 files changed, 12 insertions(+), 1 deletion(-)
>
> diff --git a/mm/page_io.c b/mm/page_io.c
> index f1a9cfab6e748..8f441dd8e109f 100644
> --- a/mm/page_io.c
> +++ b/mm/page_io.c
> @@ -517,7 +517,6 @@ void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
>         delayacct_swapin_start();
>
>         if (zswap_load(folio)) {
> -               folio_mark_uptodate(folio);
>                 folio_unlock(folio);
>         } else if (data_race(sis->flags & SWP_FS_OPS)) {
>                 swap_read_folio_fs(folio, plug);
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 7fcd751e847d6..505f4b9812891 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -1566,6 +1566,17 @@ bool zswap_load(struct folio *folio)
>         if (zswap_never_enabled())
>                 return false;
>
> +       /*
> +        * Large folios should not be swapped in while zswap is being used, as
> +        * they are not properly handled. Zswap does not properly load large
> +        * folios, and a large folio may only be partially in zswap.
> +        *
> +        * Return true without marking the folio uptodate so that an IO error is
> +        * emitted (e.g. do_swap_page() will sigbus).
> +        */
> +       if (WARN_ON_ONCE(folio_test_large(folio)))
> +               return true;
> +
>         /*
>          * When reading into the swapcache, invalidate our entry. The
>          * swapcache can be the authoritative owner of the page and
> @@ -1600,6 +1611,7 @@ bool zswap_load(struct folio *folio)
>                 folio_mark_dirty(folio);
>         }
>
> +       folio_mark_uptodate(folio);
>         return true;
>  }
>
> --
> 2.45.2.505.gda0bf45e8d-goog
>


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3 2/3] mm: zswap: add zswap_never_enabled()
       [not found]     ` <CAJD7tkY6h1RkbYHbaQcTuVXOsY-t=arytf5HtcKfx7A75x06bg@mail.gmail.com>
@ 2024-06-11 22:19       ` Barry Song
  2024-06-11 23:37         ` Yosry Ahmed
  0 siblings, 1 reply; 10+ messages in thread
From: Barry Song @ 2024-06-11 22:19 UTC (permalink / raw)
  To: Yosry Ahmed
  Cc: Andrew Morton, Johannes Weiner, Nhat Pham, Chengming Zhou,
	Chris Li, David Hildenbrand, Matthew Wilcox, linux-mm,
	linux-kernel

On Wed, Jun 12, 2024 at 9:55 AM Yosry Ahmed <yosryahmed@google.com> wrote:
>
> On Tue, Jun 11, 2024 at 2:53 PM Barry Song <21cnbao@gmail.com> wrote:
> >
> > On Tue, Jun 11, 2024 at 2:45 PM Yosry Ahmed <yosryahmed@google.com> wrote:
> > >
> > > Add zswap_never_enabled() to skip the xarray lookup in zswap_load() if
> > > zswap was never enabled on the system. It is implemented using static
> > > branches for efficiency, as enabling zswap should be a rare event. This
> > > could shave some cycles off zswap_load() when CONFIG_ZSWAP is used but
> > > zswap is never enabled.
> > >
> > > However, the real motivation behind this patch is two-fold:
> > > - Incoming large folio swapin work will need to fallback to order-0
> > >   folios if zswap was ever enabled, because any part of the folio could
> > >   be in zswap, until proper handling of large folios with zswap is
> > >   added.
> > >
> > > - A warning and recovery attempt will be added in a following change in
> > >   case the above was not done incorrectly. Zswap will fail the read if
> > >   the folio is large and it was ever enabled.
> > >
> > > Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
> > > ---
> > >  mm/zswap.c | 10 ++++++++++
> > >  1 file changed, 10 insertions(+)
> > >
> > > diff --git a/mm/zswap.c b/mm/zswap.c
> > > index a8c8dd8cfe6f5..7fcd751e847d6 100644
> > > --- a/mm/zswap.c
> > > +++ b/mm/zswap.c
> > > @@ -83,6 +83,7 @@ static bool zswap_pool_reached_full;
> > >  static int zswap_setup(void);
> > >
> > >  /* Enable/disable zswap */
> > > +static DEFINE_STATIC_KEY_MAYBE(CONFIG_ZSWAP_DEFAULT_ON, zswap_ever_enabled);
> > >  static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON);
> > >  static int zswap_enabled_param_set(const char *,
> > >                                    const struct kernel_param *);
> > > @@ -136,6 +137,11 @@ bool zswap_is_enabled(void)
> > >         return zswap_enabled;
> > >  }
> > >
> > > +static bool zswap_never_enabled(void)
> > > +{
> > > +       return !static_branch_maybe(CONFIG_ZSWAP_DEFAULT_ON, &zswap_ever_enabled);
> > > +}
> >
> > Will we "extern" this one so that mm-core can use it to fallback
> > to small folios?
> > or you prefer this to be done within the coming swapin series?
>
> My intention was to keep it static for now, and expose it in the
> header when needed (in the swapin series). If others think it's better
> to do this now to avoid the churn I am happy to do it as well.

Personally, I'd vote for exposing it now to avoid one more patch which might
come shortly. And this patchset serves the clear purpose of drawing attention
from mm-core to fallback to small folios.

Thanks
Barry


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v3 2/3] mm: zswap: add zswap_never_enabled()
  2024-06-11 22:19       ` Barry Song
@ 2024-06-11 23:37         ` Yosry Ahmed
  0 siblings, 0 replies; 10+ messages in thread
From: Yosry Ahmed @ 2024-06-11 23:37 UTC (permalink / raw)
  To: Barry Song
  Cc: Andrew Morton, Johannes Weiner, Nhat Pham, Chengming Zhou,
	Chris Li, David Hildenbrand, Matthew Wilcox, linux-mm,
	linux-kernel

On Wed, Jun 12, 2024 at 10:19:58AM +1200, Barry Song wrote:
> On Wed, Jun 12, 2024 at 9:55 AM Yosry Ahmed <yosryahmed@google.com> wrote:
> >
> > On Tue, Jun 11, 2024 at 2:53 PM Barry Song <21cnbao@gmail.com> wrote:
> > >
> > > On Tue, Jun 11, 2024 at 2:45 PM Yosry Ahmed <yosryahmed@google.com> wrote:
> > > >
> > > > Add zswap_never_enabled() to skip the xarray lookup in zswap_load() if
> > > > zswap was never enabled on the system. It is implemented using static
> > > > branches for efficiency, as enabling zswap should be a rare event. This
> > > > could shave some cycles off zswap_load() when CONFIG_ZSWAP is used but
> > > > zswap is never enabled.
> > > >
> > > > However, the real motivation behind this patch is two-fold:
> > > > - Incoming large folio swapin work will need to fallback to order-0
> > > >   folios if zswap was ever enabled, because any part of the folio could
> > > >   be in zswap, until proper handling of large folios with zswap is
> > > >   added.
> > > >
> > > > - A warning and recovery attempt will be added in a following change in
> > > >   case the above was not done incorrectly. Zswap will fail the read if
> > > >   the folio is large and it was ever enabled.
> > > >
> > > > Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
> > > > ---
> > > >  mm/zswap.c | 10 ++++++++++
> > > >  1 file changed, 10 insertions(+)
> > > >
> > > > diff --git a/mm/zswap.c b/mm/zswap.c
> > > > index a8c8dd8cfe6f5..7fcd751e847d6 100644
> > > > --- a/mm/zswap.c
> > > > +++ b/mm/zswap.c
> > > > @@ -83,6 +83,7 @@ static bool zswap_pool_reached_full;
> > > >  static int zswap_setup(void);
> > > >
> > > >  /* Enable/disable zswap */
> > > > +static DEFINE_STATIC_KEY_MAYBE(CONFIG_ZSWAP_DEFAULT_ON, zswap_ever_enabled);
> > > >  static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON);
> > > >  static int zswap_enabled_param_set(const char *,
> > > >                                    const struct kernel_param *);
> > > > @@ -136,6 +137,11 @@ bool zswap_is_enabled(void)
> > > >         return zswap_enabled;
> > > >  }
> > > >
> > > > +static bool zswap_never_enabled(void)
> > > > +{
> > > > +       return !static_branch_maybe(CONFIG_ZSWAP_DEFAULT_ON, &zswap_ever_enabled);
> > > > +}
> > >
> > > Will we "extern" this one so that mm-core can use it to fallback
> > > to small folios?
> > > or you prefer this to be done within the coming swapin series?
> >
> > My intention was to keep it static for now, and expose it in the
> > header when needed (in the swapin series). If others think it's better
> > to do this now to avoid the churn I am happy to do it as well.
> 
> Personally, I'd vote for exposing it now to avoid one more patch which might
> come shortly. And this patchset serves the clear purpose of drawing attention
> from mm-core to fallback to small folios.

Sure. Andrew, unless anyone objects, could you please squash the
following diff and add the following sentence to the commit log:

"Expose zswap_never_enabled() in the header for the swapin work to use
it later."

diff --git a/include/linux/zswap.h b/include/linux/zswap.h
index ce5e7bfe8f1ec..bf83ae5e285d4 100644
--- a/include/linux/zswap.h
+++ b/include/linux/zswap.h
@@ -36,6 +36,7 @@ void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg);
 void zswap_lruvec_state_init(struct lruvec *lruvec);
 void zswap_folio_swapin(struct folio *folio);
 bool zswap_is_enabled(void);
+bool zswap_never_enabled(void);
 #else
 
 struct zswap_lruvec_state {};
@@ -65,6 +66,11 @@ static inline bool zswap_is_enabled(void)
 	return false;
 }
 
+static inline bool zswap_never_enabled(void)
+{
+	return false;
+}
+
 #endif
 
 #endif /* _LINUX_ZSWAP_H */
diff --git a/mm/zswap.c b/mm/zswap.c
index 505f4b9812891..a546c01602aaf 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -137,7 +137,7 @@ bool zswap_is_enabled(void)
 	return zswap_enabled;
 }
 
-static bool zswap_never_enabled(void)
+bool zswap_never_enabled(void)
 {
 	return !static_branch_maybe(CONFIG_ZSWAP_DEFAULT_ON, &zswap_ever_enabled);
 }

> 
> Thanks
> Barry


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2024-06-11 23:37 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-06-11  2:45 [PATCH v3 1/3] mm: zswap: rename is_zswap_enabled() to zswap_is_enabled() Yosry Ahmed
2024-06-11  2:45 ` [PATCH v3 2/3] mm: zswap: add zswap_never_enabled() Yosry Ahmed
2024-06-11 16:32   ` Nhat Pham
2024-06-11 21:53   ` Barry Song
     [not found]     ` <CAJD7tkY6h1RkbYHbaQcTuVXOsY-t=arytf5HtcKfx7A75x06bg@mail.gmail.com>
2024-06-11 22:19       ` Barry Song
2024-06-11 23:37         ` Yosry Ahmed
2024-06-11  2:45 ` [PATCH v3 3/3] mm: zswap: handle incorrect attempts to load large folios Yosry Ahmed
2024-06-11 21:56   ` Barry Song
2024-06-11  2:59 ` [PATCH v3 1/3] mm: zswap: rename is_zswap_enabled() to zswap_is_enabled() Barry Song
2024-06-11 15:58 ` Nhat Pham

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox