linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Kairui Song <ryncsn@gmail.com>
To: linux-mm@kvack.org
Cc: Andrew Morton <akpm@linux-foundation.org>,
	"Huang, Ying" <ying.huang@intel.com>,
	David Hildenbrand <david@redhat.com>,
	Hugh Dickins <hughd@google.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Matthew Wilcox <willy@infradead.org>,
	Michal Hocko <mhocko@suse.com>,
	linux-kernel@vger.kernel.org, Kairui Song <kasong@tencent.com>
Subject: [PATCH 05/24] mm/swap: move readahead policy checking into swapin_readahead
Date: Mon, 20 Nov 2023 03:47:21 +0800	[thread overview]
Message-ID: <20231119194740.94101-6-ryncsn@gmail.com> (raw)
In-Reply-To: <20231119194740.94101-1-ryncsn@gmail.com>

From: Kairui Song <kasong@tencent.com>

This makes swapin_readahead a main entry for swapin pages,
prepare for optimizations in later commits.

This also makes swapoff able to make use of readahead checking
based on entry. Swapping off a 10G ZRAM (lzo-rle) is faster:

Before:
time swapoff /dev/zram0
real    0m12.337s
user    0m0.001s
sys     0m12.329s

After:
time swapoff /dev/zram0
real    0m9.728s
user    0m0.001s
sys     0m9.719s

And what's more, because now swapoff will also make use of no-readahead
swapin helper, this also fixed a bug for no-readahead case (eg. ZRAM):
when a process that swapped out some memory previously was moved to a new
cgroup, and the original cgroup is dead, swapoff the swap device will
make the swapped in pages accounted into the process doing the swapoff
instead of the new cgroup the process was moved to.

This can be easily reproduced by:
- Setup a ramdisk (eg. ZRAM) swap.
- Create memory cgroup A, B and C.
- Spawn process P1 in cgroup A and make it swap out some pages.
- Move process P1 to memory cgroup B.
- Destroy cgroup A.
- Do a swapoff in cgroup C.
- Swapped in pages is accounted into cgroup C.

This patch will fix it make the swapped in pages accounted in cgroup B.

The same bug exists for readahead path too, we'll fix it in later
commits.

Signed-off-by: Kairui Song <kasong@tencent.com>
---
 mm/memory.c     | 22 +++++++---------------
 mm/swap.h       |  6 ++----
 mm/swap_state.c | 33 ++++++++++++++++++++++++++-------
 mm/swapfile.c   |  2 +-
 4 files changed, 36 insertions(+), 27 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index fba4a5229163..f4237a2e3b93 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3792,6 +3792,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 	rmap_t rmap_flags = RMAP_NONE;
 	bool exclusive = false;
 	swp_entry_t entry;
+	bool swapcached;
 	pte_t pte;
 	vm_fault_t ret = 0;
 
@@ -3855,22 +3856,13 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 	swapcache = folio;
 
 	if (!folio) {
-		if (data_race(si->flags & SWP_SYNCHRONOUS_IO) &&
-		    __swap_count(entry) == 1) {
-			/* skip swapcache and readahead */
-			page = swapin_no_readahead(entry, GFP_HIGHUSER_MOVABLE,
-						vmf);
-			if (page)
-				folio = page_folio(page);
+		page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
+					vmf, &swapcached);
+		if (page) {
+			folio = page_folio(page);
+			if (swapcached)
+				swapcache = folio;
 		} else {
-			page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
-						vmf);
-			if (page)
-				folio = page_folio(page);
-			swapcache = folio;
-		}
-
-		if (!folio) {
 			/*
 			 * Back out if somebody else faulted in this pte
 			 * while we released the pte lock.
diff --git a/mm/swap.h b/mm/swap.h
index ea4be4791394..f82d43d7b52a 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -55,9 +55,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t flag,
 				    struct mempolicy *mpol, pgoff_t ilx);
 struct page *swapin_readahead(swp_entry_t entry, gfp_t flag,
-			      struct vm_fault *vmf);
-struct page *swapin_no_readahead(swp_entry_t entry, gfp_t flag,
-				 struct vm_fault *vmf);
+			      struct vm_fault *vmf, bool *swapcached);
 
 static inline unsigned int folio_swap_flags(struct folio *folio)
 {
@@ -89,7 +87,7 @@ static inline struct page *swap_cluster_readahead(swp_entry_t entry,
 }
 
 static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
-			struct vm_fault *vmf)
+			struct vm_fault *vmf, bool *swapcached)
 {
 	return NULL;
 }
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 45dd8b7c195d..fd0047ae324e 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -316,6 +316,11 @@ void free_pages_and_swap_cache(struct encoded_page **pages, int nr)
 	release_pages(pages, nr);
 }
 
+static inline bool swap_use_no_readahead(struct swap_info_struct *si, swp_entry_t entry)
+{
+	return data_race(si->flags & SWP_SYNCHRONOUS_IO) && __swap_count(entry) == 1;
+}
+
 static inline bool swap_use_vma_readahead(void)
 {
 	return READ_ONCE(enable_vma_readahead) && !atomic_read(&nr_rotate_swap);
@@ -861,8 +866,8 @@ static struct page *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
  * Returns the struct page for entry and addr after the swap entry is read
  * in.
  */
-struct page *swapin_no_readahead(swp_entry_t entry, gfp_t gfp_mask,
-				 struct vm_fault *vmf)
+static struct page *swapin_no_readahead(swp_entry_t entry, gfp_t gfp_mask,
+					struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct page *page = NULL;
@@ -904,6 +909,8 @@ struct page *swapin_no_readahead(swp_entry_t entry, gfp_t gfp_mask,
  * @entry: swap entry of this memory
  * @gfp_mask: memory allocation flags
  * @vmf: fault information
+ * @swapcached: pointer to a bool used as indicator if the
+ *              page is swapped in through swapcache.
  *
  * Returns the struct page for entry and addr, after queueing swapin.
  *
@@ -912,17 +919,29 @@ struct page *swapin_no_readahead(swp_entry_t entry, gfp_t gfp_mask,
  * or vma-based(ie, virtual address based on faulty address) readahead.
  */
 struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
-				struct vm_fault *vmf)
+			      struct vm_fault *vmf, bool *swapcached)
 {
 	struct mempolicy *mpol;
-	pgoff_t ilx;
 	struct page *page;
+	pgoff_t ilx;
+	bool cached;
 
 	mpol = get_vma_policy(vmf->vma, vmf->address, 0, &ilx);
-	page = swap_use_vma_readahead() ?
-		swap_vma_readahead(entry, gfp_mask, mpol, ilx, vmf) :
-		swap_cluster_readahead(entry, gfp_mask, mpol, ilx);
+	if (swap_use_no_readahead(swp_swap_info(entry), entry)) {
+		page = swapin_no_readahead(entry, gfp_mask, vmf);
+		cached = false;
+	} else if (swap_use_vma_readahead()) {
+		page = swap_vma_readahead(entry, gfp_mask, mpol, ilx, vmf);
+		cached = true;
+	} else {
+		page = swap_cluster_readahead(entry, gfp_mask, mpol, ilx);
+		cached = true;
+	}
 	mpol_cond_put(mpol);
+
+	if (swapcached)
+		*swapcached = cached;
+
 	return page;
 }
 
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 756104ebd585..0142bfc71b81 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1874,7 +1874,7 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 			};
 
 			page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
-						&vmf);
+						&vmf, NULL);
 			if (page)
 				folio = page_folio(page);
 		}
-- 
2.42.0



  parent reply	other threads:[~2023-11-19 19:48 UTC|newest]

Thread overview: 93+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-11-19 19:47 [PATCH 00/24] Swapin path refactor for optimization and bugfix Kairui Song
2023-11-19 19:47 ` [PATCH 01/24] mm/swap: fix a potential undefined behavior issue Kairui Song
2023-11-19 20:55   ` Matthew Wilcox
2023-11-20  3:35     ` Chris Li
2023-11-20 11:14       ` Kairui Song
2023-11-20 17:34         ` Chris Li
2023-11-19 19:47 ` [PATCH 02/24] mm/swapfile.c: add back some comment Kairui Song
2023-11-19 19:47 ` [PATCH 03/24] mm/swap: move no readahead swapin code to a stand alone helper Kairui Song
2023-11-19 21:00   ` Matthew Wilcox
2023-11-20 11:14     ` Kairui Song
2023-11-20 14:55   ` Dan Carpenter
2023-11-21  5:34   ` Chris Li
2023-11-22 17:33     ` Kairui Song
2023-11-19 19:47 ` [PATCH 04/24] mm/swap: avoid setting page lock bit and doing extra unlock check Kairui Song
2023-11-20  4:17   ` Chris Li
2023-11-20 11:15     ` Kairui Song
2023-11-20 17:44       ` Chris Li
2023-11-22 17:32         ` Kairui Song
2023-11-22 20:57           ` Chris Li
2023-11-24  8:14             ` Kairui Song
2023-11-24  8:37               ` Christopher Li
2023-11-19 19:47 ` Kairui Song [this message]
2023-11-21  6:15   ` [PATCH 05/24] mm/swap: move readahead policy checking into swapin_readahead Chris Li
2023-11-21  6:35     ` Kairui Song
2023-11-21  7:41       ` Chris Li
2023-11-21  8:32         ` Kairui Song
2023-11-21 15:24           ` Chris Li
2023-11-19 19:47 ` [PATCH 06/24] swap: rework swapin_no_readahead arguments Kairui Song
2023-11-20  0:20   ` kernel test robot
2023-11-21  6:44   ` Chris Li
2023-11-23 10:51     ` Kairui Song
2023-11-19 19:47 ` [PATCH 07/24] mm/swap: move swap_count to header to be shared Kairui Song
2023-11-21  6:51   ` Chris Li
2023-11-21  7:03     ` Kairui Song
2023-11-19 19:47 ` [PATCH 08/24] mm/swap: check readahead policy per entry Kairui Song
2023-11-20  6:04   ` Huang, Ying
2023-11-20 11:17     ` Kairui Song
2023-11-21  1:10       ` Huang, Ying
2023-11-21  5:20         ` Chris Li
2023-11-21  5:13       ` Chris Li
2023-11-21  7:54   ` Chris Li
2023-11-23 10:52     ` Kairui Song
2023-11-19 19:47 ` [PATCH 09/24] mm/swap: inline __swap_count Kairui Song
2023-11-20  7:41   ` Huang, Ying
2023-11-21  8:02     ` Chris Li
2023-11-19 19:47 ` [PATCH 10/24] mm/swap: remove nr_rotate_swap and related code Kairui Song
2023-11-21 15:45   ` Chris Li
2023-11-19 19:47 ` [PATCH 11/24] mm/swap: also handle swapcache lookup in swapin_readahead Kairui Song
2023-11-20  0:47   ` kernel test robot
2023-11-21 16:06   ` Chris Li
2023-11-24  8:42     ` Kairui Song
2023-11-24  9:10       ` Chris Li
2023-11-19 19:47 ` [PATCH 12/24] mm/swap: simplify arguments for swap_cache_get_folio Kairui Song
2023-11-21 16:36   ` Chris Li
2023-11-19 19:47 ` [PATCH 13/24] swap: simplify swap_cache_get_folio Kairui Song
2023-11-21 16:50   ` Chris Li
2023-11-19 19:47 ` [PATCH 14/24] mm/swap: do shadow lookup as well when doing swap cache lookup Kairui Song
2023-11-21 16:55   ` Chris Li
2023-11-19 19:47 ` [PATCH 15/24] mm/swap: avoid an duplicated swap cache lookup for SYNCHRONOUS_IO device Kairui Song
2023-11-21 17:15   ` Chris Li
2023-11-22 18:08     ` Kairui Song
2023-11-19 19:47 ` [PATCH 16/24] mm/swap: reduce scope of get_swap_device in swapin path Kairui Song
2023-11-19 21:12   ` Matthew Wilcox
2023-11-20 11:14     ` Kairui Song
2023-11-21 17:25   ` Chris Li
2023-11-22  0:36   ` Huang, Ying
2023-11-23 11:13     ` Kairui Song
2023-11-24  0:40       ` Huang, Ying
2023-11-19 19:47 ` [PATCH 17/24] mm/swap: fix false error when swapoff race with swapin Kairui Song
2023-11-19 19:47 ` [PATCH 18/24] mm/swap: introduce a helper non fault swapin Kairui Song
2023-11-20  1:07   ` kernel test robot
2023-11-22  4:40   ` Chris Li
2023-11-28 11:22     ` Kairui Song
2023-12-13  2:22       ` Chris Li
2023-11-19 19:47 ` [PATCH 19/24] shmem, swap: refactor error check on OOM or race Kairui Song
2023-11-20  7:04   ` Chris Li
2023-11-20 11:17     ` Kairui Song
2023-11-19 19:47 ` [PATCH 20/24] swap: simplify and make swap_find_cache static Kairui Song
2023-11-22  5:01   ` Chris Li
2023-11-19 19:47 ` [PATCH 21/24] swap: make swapin_readahead result checking argument mandatory Kairui Song
2023-11-22  5:15   ` Chris Li
2023-11-24  8:14     ` Kairui Song
2023-11-19 19:47 ` [PATCH 22/24] swap: make swap_cluster_readahead static Kairui Song
2023-11-22  5:20   ` Chris Li
2023-11-19 19:47 ` [PATCH 23/24] swap: fix multiple swap leak when after cgroup migrate Kairui Song
2023-11-20  7:35   ` Huang, Ying
2023-11-20 11:17     ` Kairui Song
2023-11-22  5:34       ` Chris Li
2023-11-19 19:47 ` [PATCH 24/24] mm/swap: change swapin_readahead to swapin_page_fault Kairui Song
2023-11-20 19:09 ` [PATCH 00/24] Swapin path refactor for optimization and bugfix Yosry Ahmed
2023-11-20 20:22   ` Chris Li
2023-11-22  6:46     ` Kairui Song
2023-11-22  6:43   ` Kairui Song

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231119194740.94101-6-ryncsn@gmail.com \
    --to=ryncsn@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=david@redhat.com \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=kasong@tencent.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.com \
    --cc=willy@infradead.org \
    --cc=ying.huang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox