linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Muchun Song <songmuchun@bytedance.com>
To: hannes@cmpxchg.org, mhocko@kernel.org, roman.gushchin@linux.dev,
	shakeelb@google.com
Cc: cgroups@vger.kernel.org, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org, duanxiongchun@bytedance.com,
	longman@redhat.com, Muchun Song <songmuchun@bytedance.com>
Subject: [PATCH v4 11/11] mm: lru: use lruvec lock to serialize memcg changes
Date: Tue, 24 May 2022 14:05:51 +0800	[thread overview]
Message-ID: <20220524060551.80037-12-songmuchun@bytedance.com> (raw)
In-Reply-To: <20220524060551.80037-1-songmuchun@bytedance.com>

As described by commit fc574c23558c ("mm/swap.c: serialize memcg
changes in pagevec_lru_move_fn"), TestClearPageLRU() aims to
serialize mem_cgroup_move_account() during pagevec_lru_move_fn().
Now folio_lruvec_lock*() has the ability to detect whether page
memcg has been changed. So we can use lruvec lock to serialize
mem_cgroup_move_account() during pagevec_lru_move_fn(). This
change is a partial revert of the commit fc574c23558c ("mm/swap.c:
serialize memcg changes in pagevec_lru_move_fn").

And pagevec_lru_move_fn() is more hot compare with
mem_cgroup_move_account(), removing an atomic operation would be
an optimization. Also this change would not dirty cacheline for a
page which isn't on the LRU.

Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
 mm/memcontrol.c | 31 +++++++++++++++++++++++++++++++
 mm/swap.c       | 45 ++++++++++++++-------------------------------
 mm/vmscan.c     |  9 ++++-----
 3 files changed, 49 insertions(+), 36 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1a35f7fde3ed..7b6d9c308d91 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1331,12 +1331,38 @@ struct lruvec *folio_lruvec_lock(struct folio *folio)
 	lruvec = folio_lruvec(folio);
 	spin_lock(&lruvec->lru_lock);
 
+	/*
+	 * The memcg of the page can be changed by any the following routines:
+	 *
+	 * 1) mem_cgroup_move_account() or
+	 * 2) memcg_reparent_objcgs()
+	 *
+	 * The possible bad scenario would like:
+	 *
+	 * CPU0:                CPU1:                CPU2:
+	 * lruvec = folio_lruvec()
+	 *
+	 *                      if (!isolate_lru_page())
+	 *                              mem_cgroup_move_account()
+	 *
+	 *                                           memcg_reparent_objcgs()
+	 *
+	 * spin_lock(&lruvec->lru_lock)
+	 *                ^^^^^^
+	 *              wrong lock
+	 *
+	 * Either CPU1 or CPU2 can change page memcg, so we need to check
+	 * whether page memcg is changed, if so, we should reacquire the
+	 * new lruvec lock.
+	 */
 	if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
 		spin_unlock(&lruvec->lru_lock);
 		goto retry;
 	}
 
 	/*
+	 * When we reach here, it means that the folio_memcg(folio) is stable.
+	 *
 	 * Preemption is disabled in the internal of spin_lock, which can serve
 	 * as RCU read-side critical sections.
 	 */
@@ -1367,6 +1393,7 @@ struct lruvec *folio_lruvec_lock_irq(struct folio *folio)
 	lruvec = folio_lruvec(folio);
 	spin_lock_irq(&lruvec->lru_lock);
 
+	/* See the comments in folio_lruvec_lock(). */
 	if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
 		spin_unlock_irq(&lruvec->lru_lock);
 		goto retry;
@@ -1402,6 +1429,7 @@ struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
 	lruvec = folio_lruvec(folio);
 	spin_lock_irqsave(&lruvec->lru_lock, *flags);
 
+	/* See the comments in folio_lruvec_lock(). */
 	if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
 		spin_unlock_irqrestore(&lruvec->lru_lock, *flags);
 		goto retry;
@@ -5751,7 +5779,10 @@ static int mem_cgroup_move_account(struct page *page,
 	obj_cgroup_put(rcu_dereference(from->objcg));
 	rcu_read_unlock();
 
+	/* See the comments in folio_lruvec_lock(). */
+	spin_lock(&from_vec->lru_lock);
 	folio->memcg_data = (unsigned long)rcu_access_pointer(to->objcg);
+	spin_unlock(&from_vec->lru_lock);
 
 	__folio_memcg_unlock(from);
 
diff --git a/mm/swap.c b/mm/swap.c
index 9680f2fc48b1..984b100e84e4 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -199,14 +199,8 @@ static void pagevec_lru_move_fn(struct pagevec *pvec,
 		struct page *page = pvec->pages[i];
 		struct folio *folio = page_folio(page);
 
-		/* block memcg migration during page moving between lru */
-		if (!TestClearPageLRU(page))
-			continue;
-
 		lruvec = folio_lruvec_relock_irqsave(folio, lruvec, &flags);
 		(*move_fn)(page, lruvec);
-
-		SetPageLRU(page);
 	}
 	if (lruvec)
 		unlock_page_lruvec_irqrestore(lruvec, flags);
@@ -218,7 +212,7 @@ static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec)
 {
 	struct folio *folio = page_folio(page);
 
-	if (!folio_test_unevictable(folio)) {
+	if (folio_test_lru(folio) && !folio_test_unevictable(folio)) {
 		lruvec_del_folio(lruvec, folio);
 		folio_clear_active(folio);
 		lruvec_add_folio_tail(lruvec, folio);
@@ -313,7 +307,8 @@ void lru_note_cost_folio(struct folio *folio)
 
 static void __folio_activate(struct folio *folio, struct lruvec *lruvec)
 {
-	if (!folio_test_active(folio) && !folio_test_unevictable(folio)) {
+	if (folio_test_lru(folio) && !folio_test_active(folio) &&
+	    !folio_test_unevictable(folio)) {
 		long nr_pages = folio_nr_pages(folio);
 
 		lruvec_del_folio(lruvec, folio);
@@ -370,12 +365,9 @@ static void folio_activate(struct folio *folio)
 {
 	struct lruvec *lruvec;
 
-	if (folio_test_clear_lru(folio)) {
-		lruvec = folio_lruvec_lock_irq(folio);
-		__folio_activate(folio, lruvec);
-		unlock_page_lruvec_irq(lruvec);
-		folio_set_lru(folio);
-	}
+	lruvec = folio_lruvec_lock_irq(folio);
+	__folio_activate(folio, lruvec);
+	unlock_page_lruvec_irq(lruvec);
 }
 #endif
 
@@ -518,6 +510,9 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec)
 	bool active = PageActive(page);
 	int nr_pages = thp_nr_pages(page);
 
+	if (!PageLRU(page))
+		return;
+
 	if (PageUnevictable(page))
 		return;
 
@@ -555,7 +550,7 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec)
 
 static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec)
 {
-	if (PageActive(page) && !PageUnevictable(page)) {
+	if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
 		int nr_pages = thp_nr_pages(page);
 
 		del_page_from_lru_list(page, lruvec);
@@ -571,7 +566,7 @@ static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec)
 
 static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec)
 {
-	if (PageAnon(page) && PageSwapBacked(page) &&
+	if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) &&
 	    !PageSwapCache(page) && !PageUnevictable(page)) {
 		int nr_pages = thp_nr_pages(page);
 
@@ -1006,8 +1001,9 @@ void __pagevec_release(struct pagevec *pvec)
 }
 EXPORT_SYMBOL(__pagevec_release);
 
-static void __pagevec_lru_add_fn(struct folio *folio, struct lruvec *lruvec)
+static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec)
 {
+	struct folio *folio = page_folio(page);
 	int was_unevictable = folio_test_clear_unevictable(folio);
 	long nr_pages = folio_nr_pages(folio);
 
@@ -1053,20 +1049,7 @@ static void __pagevec_lru_add_fn(struct folio *folio, struct lruvec *lruvec)
  */
 void __pagevec_lru_add(struct pagevec *pvec)
 {
-	int i;
-	struct lruvec *lruvec = NULL;
-	unsigned long flags = 0;
-
-	for (i = 0; i < pagevec_count(pvec); i++) {
-		struct folio *folio = page_folio(pvec->pages[i]);
-
-		lruvec = folio_lruvec_relock_irqsave(folio, lruvec, &flags);
-		__pagevec_lru_add_fn(folio, lruvec);
-	}
-	if (lruvec)
-		unlock_page_lruvec_irqrestore(lruvec, flags);
-	release_pages(pvec->pages, pvec->nr);
-	pagevec_reinit(pvec);
+	pagevec_lru_move_fn(pvec, __pagevec_lru_add_fn);
 }
 
 /**
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 6c9e2eafc8f9..ec1272ca5ead 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4793,18 +4793,17 @@ void check_move_unevictable_pages(struct pagevec *pvec)
 		nr_pages = thp_nr_pages(page);
 		pgscanned += nr_pages;
 
-		/* block memcg migration during page moving between lru */
-		if (!TestClearPageLRU(page))
+		lruvec = folio_lruvec_relock_irq(folio, lruvec);
+
+		if (!PageLRU(page) || !PageUnevictable(page))
 			continue;
 
-		lruvec = folio_lruvec_relock_irq(folio, lruvec);
-		if (page_evictable(page) && PageUnevictable(page)) {
+		if (page_evictable(page)) {
 			del_page_from_lru_list(page, lruvec);
 			ClearPageUnevictable(page);
 			add_page_to_lru_list(page, lruvec);
 			pgrescued += nr_pages;
 		}
-		SetPageLRU(page);
 	}
 
 	if (lruvec) {
-- 
2.11.0



      parent reply	other threads:[~2022-05-24  6:28 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-05-24  6:05 [PATCH v4 00/11] Use obj_cgroup APIs to charge the LRU pages Muchun Song
2022-05-24  6:05 ` [PATCH v4 01/11] mm: memcontrol: prepare objcg API for non-kmem usage Muchun Song
2022-05-24 19:01   ` Johannes Weiner
2022-05-25  8:46     ` Muchun Song
2022-05-25  2:36   ` Roman Gushchin
2022-05-25  7:57     ` Muchun Song
2022-05-25 12:37       ` Johannes Weiner
2022-05-25 13:08         ` Muchun Song
2022-05-24  6:05 ` [PATCH v4 02/11] mm: memcontrol: introduce compact_folio_lruvec_lock_irqsave Muchun Song
2022-05-24 19:22   ` Johannes Weiner
2022-05-25  9:38     ` Muchun Song
2022-05-24  6:05 ` [PATCH v4 03/11] mm: memcontrol: make lruvec lock safe when LRU pages are reparented Muchun Song
2022-05-24 19:23   ` Waiman Long
2022-05-25 10:20     ` Muchun Song
2022-05-25 14:59       ` Waiman Long
2022-05-24 19:27   ` Johannes Weiner
2022-05-25  9:53     ` Muchun Song
2022-05-25 12:30       ` Johannes Weiner
2022-05-25 13:03         ` Muchun Song
2022-05-25 14:48           ` Johannes Weiner
2022-05-25 15:38             ` Muchun Song
2022-05-26 20:17               ` Waiman Long
2022-05-27  2:55                 ` Muchun Song
2022-05-24  6:05 ` [PATCH v4 04/11] mm: vmscan: rework move_pages_to_lru() Muchun Song
2022-05-24 19:38   ` Johannes Weiner
2022-05-25 11:38     ` Muchun Song
2022-05-24 19:52   ` Waiman Long
2022-05-25 11:43     ` Muchun Song
2022-05-25  2:43   ` Roman Gushchin
2022-05-25 11:41     ` Muchun Song
2022-05-24  6:05 ` [PATCH v4 05/11] mm: thp: introduce folio_split_queue_lock{_irqsave}() Muchun Song
2022-05-24  6:05 ` [PATCH v4 06/11] mm: thp: make split queue lock safe when LRU pages are reparented Muchun Song
2022-05-25  2:54   ` Roman Gushchin
2022-05-25 11:44     ` Muchun Song
2022-05-24  6:05 ` [PATCH v4 07/11] mm: memcontrol: make all the callers of {folio,page}_memcg() safe Muchun Song
2022-05-25  3:03   ` Roman Gushchin
2022-05-25 11:51     ` Muchun Song
2022-05-24  6:05 ` [PATCH v4 08/11] mm: memcontrol: introduce memcg_reparent_ops Muchun Song
2022-05-24  6:05 ` [PATCH v4 09/11] mm: memcontrol: use obj_cgroup APIs to charge the LRU pages Muchun Song
2022-05-24 12:29   ` kernel test robot
2022-05-24 18:16   ` kernel test robot
2022-05-25  7:14   ` [mm] bec0ae1210: WARNING:possible_recursive_locking_detected kernel test robot
2022-05-24  6:05 ` [PATCH v4 10/11] mm: lru: add VM_BUG_ON_FOLIO to lru maintenance function Muchun Song
2022-05-24 19:44   ` Johannes Weiner
2022-05-25 11:59     ` Muchun Song
2022-05-25  2:40   ` Roman Gushchin
2022-05-25 11:58     ` Muchun Song
2022-05-24  6:05 ` Muchun Song [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220524060551.80037-12-songmuchun@bytedance.com \
    --to=songmuchun@bytedance.com \
    --cc=cgroups@vger.kernel.org \
    --cc=duanxiongchun@bytedance.com \
    --cc=hannes@cmpxchg.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=longman@redhat.com \
    --cc=mhocko@kernel.org \
    --cc=roman.gushchin@linux.dev \
    --cc=shakeelb@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox