linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Hugh Dickins <hughd@google.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>,
	Hugh Dickins <hughd@google.com>,
	 linux-mm@kvack.org, Tony Luck <tony.luck@intel.com>,
	 Miaohe Lin <linmiaohe@huawei.com>,
	nao.horiguchi@gmail.com,  Matthew Wilcox <willy@infradead.org>,
	David Hildenbrand <david@redhat.com>,
	 Muchun Song <muchun.song@linux.dev>,
	Benjamin LaHaise <bcrl@kvack.org>,
	 jglisse@redhat.com, Zi Yan <ziy@nvidia.com>,
	 Jiaqi Yan <jiaqiyan@google.com>,
	Vishal Moola <vishal.moola@gmail.com>,
	 Alistair Popple <apopple@nvidia.com>,
	Jane Chu <jane.chu@oracle.com>,
	 Oscar Salvador <osalvador@suse.de>
Subject: [PATCH mm-unstable] mm: migrate: folio_ref_freeze() under xas_lock_irq()
Date: Mon, 24 Jun 2024 22:04:38 -0700 (PDT)	[thread overview]
Message-ID: <07edaae7-ea5d-b6ae-3a10-f611946f9688@google.com> (raw)

Commit "mm: migrate: split folio_migrate_mapping()" drew attention to
"Note, the folio_ref_freeze() is moved out of xas_lock_irq(), Since the
folio is already isolated and locked during migration, so suppose that
there is no functional change."

That was a mistake. Freezing a folio's refcount to 0 is much like taking
a spinlock: see how filemap_get_entry() takes rcu_read_lock() then spins
around until the folio is unfrozen. If the task freezing is preempted (or
calls cond_resched(), as folio_mc_copy() may do), then it risks deadlock:
in my case, one CPU in zap_pte_range() with free_swap_and_cache_nr()
trying to reclaim swap while PTL is held, all the other CPUs in reclaim
spinning for that PTL.

I'm uncertain whether it's necessary for interrupts to be disabled as
well as preemption, but since they have to be disabled for the page
cache migration, it's much the best to do it all together as before.
So revert to folio_ref_freeze() under xas_lock_irq(): but keep the
preliminary folio_ref_count() check, which does make sense before
trying to copy the folio's data.

Use "expected_count" for the expected count throughout.

Signed-off-by: Hugh Dickins <hughd@google.com>
---
 mm/migrate.c | 59 +++++++++++++++++++++++++---------------------------
 1 file changed, 28 insertions(+), 31 deletions(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index 27f070f64f27..8beedbb42a93 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -400,8 +400,8 @@ static int folio_expected_refs(struct address_space *mapping,
  * 2 for folios with a mapping
  * 3 for folios with a mapping and PagePrivate/PagePrivate2 set.
  */
-static void __folio_migrate_mapping(struct address_space *mapping,
-		struct folio *newfolio, struct folio *folio, int expected_cnt)
+static int __folio_migrate_mapping(struct address_space *mapping,
+		struct folio *newfolio, struct folio *folio, int expected_count)
 {
 	XA_STATE(xas, &mapping->i_pages, folio_index(folio));
 	struct zone *oldzone, *newzone;
@@ -415,13 +415,18 @@ static void __folio_migrate_mapping(struct address_space *mapping,
 		newfolio->mapping = folio->mapping;
 		if (folio_test_swapbacked(folio))
 			__folio_set_swapbacked(newfolio);
-		return;
+		return MIGRATEPAGE_SUCCESS;
 	}
 
 	oldzone = folio_zone(folio);
 	newzone = folio_zone(newfolio);
 
 	xas_lock_irq(&xas);
+	if (!folio_ref_freeze(folio, expected_count)) {
+		xas_unlock_irq(&xas);
+		return -EAGAIN;
+	}
+
 	/* Now we know that no one else is looking at the folio */
 	newfolio->index = folio->index;
 	newfolio->mapping = folio->mapping;
@@ -456,7 +461,7 @@ static void __folio_migrate_mapping(struct address_space *mapping,
 	 * old folio by unfreezing to one less reference.
 	 * We know this isn't the last reference.
 	 */
-	folio_ref_unfreeze(folio, expected_cnt - nr);
+	folio_ref_unfreeze(folio, expected_count - nr);
 
 	xas_unlock(&xas);
 	/* Leave irq disabled to prevent preemption while updating stats */
@@ -504,23 +509,19 @@ static void __folio_migrate_mapping(struct address_space *mapping,
 		}
 	}
 	local_irq_enable();
+
+	return MIGRATEPAGE_SUCCESS;
 }
 
 int folio_migrate_mapping(struct address_space *mapping, struct folio *newfolio,
 			  struct folio *folio, int extra_count)
 {
-	int expected_cnt = folio_expected_refs(mapping, folio) + extra_count;
+	int expected_count = folio_expected_refs(mapping, folio) + extra_count;
 
-	if (!mapping) {
-		if (folio_ref_count(folio) != expected_cnt)
-			return -EAGAIN;
-	} else {
-		if (!folio_ref_freeze(folio, expected_cnt))
-			return -EAGAIN;
-	}
+	if (folio_ref_count(folio) != expected_count)
+		return -EAGAIN;
 
-	__folio_migrate_mapping(mapping, newfolio, folio, expected_cnt);
-	return MIGRATEPAGE_SUCCESS;
+	return __folio_migrate_mapping(mapping, newfolio, folio, expected_count);
 }
 EXPORT_SYMBOL(folio_migrate_mapping);
 
@@ -534,16 +535,18 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
 	XA_STATE(xas, &mapping->i_pages, folio_index(src));
 	int ret, expected_count = folio_expected_refs(mapping, src);
 
-	if (!folio_ref_freeze(src, expected_count))
+	if (folio_ref_count(src) != expected_count)
 		return -EAGAIN;
 
 	ret = folio_mc_copy(dst, src);
-	if (unlikely(ret)) {
-		folio_ref_unfreeze(src, expected_count);
+	if (unlikely(ret))
 		return ret;
-	}
 
 	xas_lock_irq(&xas);
+	if (!folio_ref_freeze(src, expected_count)) {
+		xas_unlock_irq(&xas);
+		return -EAGAIN;
+	}
 
 	dst->index = src->index;
 	dst->mapping = src->mapping;
@@ -660,24 +663,18 @@ static int __migrate_folio(struct address_space *mapping, struct folio *dst,
 			   struct folio *src, void *src_private,
 			   enum migrate_mode mode)
 {
-	int ret, expected_cnt = folio_expected_refs(mapping, src);
+	int ret, expected_count = folio_expected_refs(mapping, src);
 
-	if (!mapping) {
-		if (folio_ref_count(src) != expected_cnt)
-			return -EAGAIN;
-	} else {
-		if (!folio_ref_freeze(src, expected_cnt))
-			return -EAGAIN;
-	}
+	if (folio_ref_count(src) != expected_count)
+		return -EAGAIN;
 
 	ret = folio_mc_copy(dst, src);
-	if (unlikely(ret)) {
-		if (mapping)
-			folio_ref_unfreeze(src, expected_cnt);
+	if (unlikely(ret))
 		return ret;
-	}
 
-	__folio_migrate_mapping(mapping, dst, src, expected_cnt);
+	ret = __folio_migrate_mapping(mapping, dst, src, expected_count);
+	if (ret != MIGRATEPAGE_SUCCESS)
+		return ret;
 
 	if (src_private)
 		folio_attach_private(dst, folio_detach_private(src));
-- 
2.35.3



             reply	other threads:[~2024-06-25  5:04 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-06-25  5:04 Hugh Dickins [this message]
2024-06-25  9:01 ` Kefeng Wang
2024-06-25 19:23   ` Andrew Morton
2024-06-25 19:51   ` Hugh Dickins
2024-06-26  7:24     ` Kefeng Wang
2024-06-25 20:01 ` David Hildenbrand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=07edaae7-ea5d-b6ae-3a10-f611946f9688@google.com \
    --to=hughd@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=apopple@nvidia.com \
    --cc=bcrl@kvack.org \
    --cc=david@redhat.com \
    --cc=jane.chu@oracle.com \
    --cc=jglisse@redhat.com \
    --cc=jiaqiyan@google.com \
    --cc=linmiaohe@huawei.com \
    --cc=linux-mm@kvack.org \
    --cc=muchun.song@linux.dev \
    --cc=nao.horiguchi@gmail.com \
    --cc=osalvador@suse.de \
    --cc=tony.luck@intel.com \
    --cc=vishal.moola@gmail.com \
    --cc=wangkefeng.wang@huawei.com \
    --cc=willy@infradead.org \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox