linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Kairui Song <ryncsn@gmail.com>
To: linux-mm@kvack.org
Cc: Andrew Morton <akpm@linux-foundation.org>,
	"Huang, Ying" <ying.huang@intel.com>,
	David Hildenbrand <david@redhat.com>,
	Hugh Dickins <hughd@google.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Matthew Wilcox <willy@infradead.org>,
	Michal Hocko <mhocko@suse.com>,
	linux-kernel@vger.kernel.org, Kairui Song <kasong@tencent.com>
Subject: [PATCH 23/24] swap: fix multiple swap leak when after cgroup migrate
Date: Mon, 20 Nov 2023 03:47:39 +0800	[thread overview]
Message-ID: <20231119194740.94101-24-ryncsn@gmail.com> (raw)
In-Reply-To: <20231119194740.94101-1-ryncsn@gmail.com>

From: Kairui Song <kasong@tencent.com>

When a process which previously swapped some memory was moved to
another cgroup, and the cgroup it previous in is dead, then swapped in
pages will be leaked into rootcg. Previous commits fixed the bug for
no readahead path, this commit fix the same issue for readahead path.

This can be easily reproduced by:
- Setup a SSD or HDD swap.
- Create memory cgroup A, B and C.
- Spawn process P1 in cgroup A and make it swap out some pages.
- Move process P1 to memory cgroup B.
- Destroy cgroup A.
- Do a swapoff in cgroup C
- Swapped in pages is accounted into cgroup C.

This patch will fix it make the swapped in pages accounted in cgroup B.

Signed-off-by: Kairui Song <kasong@tencent.com>
---
 mm/swap.h       |  2 +-
 mm/swap_state.c | 19 ++++++++++---------
 mm/zswap.c      |  2 +-
 3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/mm/swap.h b/mm/swap.h
index 795a25df87da..4374bf11ca41 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -55,7 +55,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 				   struct swap_iocb **plug);
 struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 				     struct mempolicy *mpol, pgoff_t ilx,
-				     bool *new_page_allocated);
+				     struct mm_struct *mm, bool *new_page_allocated);
 struct page *swapin_readahead(swp_entry_t entry, gfp_t flag,
 			      struct vm_fault *vmf, enum swap_cache_result *result);
 struct page *swapin_page_non_fault(swp_entry_t entry, gfp_t gfp_mask,
diff --git a/mm/swap_state.c b/mm/swap_state.c
index b377e55cb850..362a6f674b36 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -416,7 +416,7 @@ struct folio *filemap_get_incore_folio(struct address_space *mapping,
 
 struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 				     struct mempolicy *mpol, pgoff_t ilx,
-				     bool *new_page_allocated)
+				     struct mm_struct *mm, bool *new_page_allocated)
 {
 	struct swap_info_struct *si;
 	struct folio *folio;
@@ -462,7 +462,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 						mpol, ilx, numa_node_id());
 		if (!folio)
                         goto fail_put_swap;
-		if (mem_cgroup_swapin_charge_folio(folio, NULL, gfp_mask, entry))
+		if (mem_cgroup_swapin_charge_folio(folio, mm, gfp_mask, entry))
 			goto fail_put_folio;
 
 		/*
@@ -540,7 +540,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 
 	mpol = get_vma_policy(vma, addr, 0, &ilx);
 	page = __read_swap_cache_async(entry, gfp_mask, mpol, ilx,
-					&page_allocated);
+				       vma->vm_mm, &page_allocated);
 	mpol_cond_put(mpol);
 
 	if (page_allocated)
@@ -628,7 +628,8 @@ static unsigned long swapin_nr_pages(unsigned long offset)
  * are fairly likely to have been swapped out from the same node.
  */
 static struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
-					   struct mempolicy *mpol, pgoff_t ilx)
+					   struct mempolicy *mpol, pgoff_t ilx,
+					   struct mm_struct *mm)
 {
 	struct page *page;
 	unsigned long entry_offset = swp_offset(entry);
@@ -657,7 +658,7 @@ static struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
 		/* Ok, do the async read-ahead now */
 		page = __read_swap_cache_async(
 				swp_entry(swp_type(entry), offset),
-				gfp_mask, mpol, ilx, &page_allocated);
+				gfp_mask, mpol, ilx, mm, &page_allocated);
 		if (!page)
 			continue;
 		if (page_allocated) {
@@ -675,7 +676,7 @@ static struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
 skip:
 	/* The page was likely read above, so no need for plugging here */
 	page = __read_swap_cache_async(entry, gfp_mask, mpol, ilx,
-					&page_allocated);
+				       mm, &page_allocated);
 	if (unlikely(page_allocated))
 		swap_readpage(page, false, NULL);
 	return page;
@@ -830,7 +831,7 @@ static struct page *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
 		pte_unmap(pte);
 		pte = NULL;
 		page = __read_swap_cache_async(entry, gfp_mask, mpol, ilx,
-						&page_allocated);
+					       vmf->vma->vm_mm, &page_allocated);
 		if (!page)
 			continue;
 		if (page_allocated) {
@@ -850,7 +851,7 @@ static struct page *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
 skip:
 	/* The page was likely read above, so no need for plugging here */
 	page = __read_swap_cache_async(targ_entry, gfp_mask, mpol, targ_ilx,
-					&page_allocated);
+				       vmf->vma->vm_mm, &page_allocated);
 	if (unlikely(page_allocated))
 		swap_readpage(page, false, NULL);
 	return page;
@@ -980,7 +981,7 @@ struct page *swapin_page_non_fault(swp_entry_t entry, gfp_t gfp_mask,
 			workingset_refault(page_folio(page), shadow);
 		cache_result = SWAP_CACHE_BYPASS;
 	} else {
-		page = swap_cluster_readahead(entry, gfp_mask, mpol, ilx);
+		page = swap_cluster_readahead(entry, gfp_mask, mpol, ilx, mm);
 		cache_result = SWAP_CACHE_MISS;
 	}
 done:
diff --git a/mm/zswap.c b/mm/zswap.c
index 030cc137138f..e2712ff169b1 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1081,7 +1081,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
 	/* try to allocate swap cache page */
 	mpol = get_task_policy(current);
 	page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol,
-				NO_INTERLEAVE_INDEX, &page_was_allocated);
+				NO_INTERLEAVE_INDEX, NULL, &page_was_allocated);
 	if (!page) {
 		ret = -ENOMEM;
 		goto fail;
-- 
2.42.0



  parent reply	other threads:[~2023-11-19 19:49 UTC|newest]

Thread overview: 93+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-11-19 19:47 [PATCH 00/24] Swapin path refactor for optimization and bugfix Kairui Song
2023-11-19 19:47 ` [PATCH 01/24] mm/swap: fix a potential undefined behavior issue Kairui Song
2023-11-19 20:55   ` Matthew Wilcox
2023-11-20  3:35     ` Chris Li
2023-11-20 11:14       ` Kairui Song
2023-11-20 17:34         ` Chris Li
2023-11-19 19:47 ` [PATCH 02/24] mm/swapfile.c: add back some comment Kairui Song
2023-11-19 19:47 ` [PATCH 03/24] mm/swap: move no readahead swapin code to a stand alone helper Kairui Song
2023-11-19 21:00   ` Matthew Wilcox
2023-11-20 11:14     ` Kairui Song
2023-11-20 14:55   ` Dan Carpenter
2023-11-21  5:34   ` Chris Li
2023-11-22 17:33     ` Kairui Song
2023-11-19 19:47 ` [PATCH 04/24] mm/swap: avoid setting page lock bit and doing extra unlock check Kairui Song
2023-11-20  4:17   ` Chris Li
2023-11-20 11:15     ` Kairui Song
2023-11-20 17:44       ` Chris Li
2023-11-22 17:32         ` Kairui Song
2023-11-22 20:57           ` Chris Li
2023-11-24  8:14             ` Kairui Song
2023-11-24  8:37               ` Christopher Li
2023-11-19 19:47 ` [PATCH 05/24] mm/swap: move readahead policy checking into swapin_readahead Kairui Song
2023-11-21  6:15   ` Chris Li
2023-11-21  6:35     ` Kairui Song
2023-11-21  7:41       ` Chris Li
2023-11-21  8:32         ` Kairui Song
2023-11-21 15:24           ` Chris Li
2023-11-19 19:47 ` [PATCH 06/24] swap: rework swapin_no_readahead arguments Kairui Song
2023-11-20  0:20   ` kernel test robot
2023-11-21  6:44   ` Chris Li
2023-11-23 10:51     ` Kairui Song
2023-11-19 19:47 ` [PATCH 07/24] mm/swap: move swap_count to header to be shared Kairui Song
2023-11-21  6:51   ` Chris Li
2023-11-21  7:03     ` Kairui Song
2023-11-19 19:47 ` [PATCH 08/24] mm/swap: check readahead policy per entry Kairui Song
2023-11-20  6:04   ` Huang, Ying
2023-11-20 11:17     ` Kairui Song
2023-11-21  1:10       ` Huang, Ying
2023-11-21  5:20         ` Chris Li
2023-11-21  5:13       ` Chris Li
2023-11-21  7:54   ` Chris Li
2023-11-23 10:52     ` Kairui Song
2023-11-19 19:47 ` [PATCH 09/24] mm/swap: inline __swap_count Kairui Song
2023-11-20  7:41   ` Huang, Ying
2023-11-21  8:02     ` Chris Li
2023-11-19 19:47 ` [PATCH 10/24] mm/swap: remove nr_rotate_swap and related code Kairui Song
2023-11-21 15:45   ` Chris Li
2023-11-19 19:47 ` [PATCH 11/24] mm/swap: also handle swapcache lookup in swapin_readahead Kairui Song
2023-11-20  0:47   ` kernel test robot
2023-11-21 16:06   ` Chris Li
2023-11-24  8:42     ` Kairui Song
2023-11-24  9:10       ` Chris Li
2023-11-19 19:47 ` [PATCH 12/24] mm/swap: simplify arguments for swap_cache_get_folio Kairui Song
2023-11-21 16:36   ` Chris Li
2023-11-19 19:47 ` [PATCH 13/24] swap: simplify swap_cache_get_folio Kairui Song
2023-11-21 16:50   ` Chris Li
2023-11-19 19:47 ` [PATCH 14/24] mm/swap: do shadow lookup as well when doing swap cache lookup Kairui Song
2023-11-21 16:55   ` Chris Li
2023-11-19 19:47 ` [PATCH 15/24] mm/swap: avoid an duplicated swap cache lookup for SYNCHRONOUS_IO device Kairui Song
2023-11-21 17:15   ` Chris Li
2023-11-22 18:08     ` Kairui Song
2023-11-19 19:47 ` [PATCH 16/24] mm/swap: reduce scope of get_swap_device in swapin path Kairui Song
2023-11-19 21:12   ` Matthew Wilcox
2023-11-20 11:14     ` Kairui Song
2023-11-21 17:25   ` Chris Li
2023-11-22  0:36   ` Huang, Ying
2023-11-23 11:13     ` Kairui Song
2023-11-24  0:40       ` Huang, Ying
2023-11-19 19:47 ` [PATCH 17/24] mm/swap: fix false error when swapoff race with swapin Kairui Song
2023-11-19 19:47 ` [PATCH 18/24] mm/swap: introduce a helper non fault swapin Kairui Song
2023-11-20  1:07   ` kernel test robot
2023-11-22  4:40   ` Chris Li
2023-11-28 11:22     ` Kairui Song
2023-12-13  2:22       ` Chris Li
2023-11-19 19:47 ` [PATCH 19/24] shmem, swap: refactor error check on OOM or race Kairui Song
2023-11-20  7:04   ` Chris Li
2023-11-20 11:17     ` Kairui Song
2023-11-19 19:47 ` [PATCH 20/24] swap: simplify and make swap_find_cache static Kairui Song
2023-11-22  5:01   ` Chris Li
2023-11-19 19:47 ` [PATCH 21/24] swap: make swapin_readahead result checking argument mandatory Kairui Song
2023-11-22  5:15   ` Chris Li
2023-11-24  8:14     ` Kairui Song
2023-11-19 19:47 ` [PATCH 22/24] swap: make swap_cluster_readahead static Kairui Song
2023-11-22  5:20   ` Chris Li
2023-11-19 19:47 ` Kairui Song [this message]
2023-11-20  7:35   ` [PATCH 23/24] swap: fix multiple swap leak when after cgroup migrate Huang, Ying
2023-11-20 11:17     ` Kairui Song
2023-11-22  5:34       ` Chris Li
2023-11-19 19:47 ` [PATCH 24/24] mm/swap: change swapin_readahead to swapin_page_fault Kairui Song
2023-11-20 19:09 ` [PATCH 00/24] Swapin path refactor for optimization and bugfix Yosry Ahmed
2023-11-20 20:22   ` Chris Li
2023-11-22  6:46     ` Kairui Song
2023-11-22  6:43   ` Kairui Song

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231119194740.94101-24-ryncsn@gmail.com \
    --to=ryncsn@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=david@redhat.com \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=kasong@tencent.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.com \
    --cc=willy@infradead.org \
    --cc=ying.huang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox