linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Shivank Garg <shivankg@amd.com>
To: <akpm@linux-foundation.org>, <david@redhat.com>
Cc: <ziy@nvidia.com>, <willy@infradead.org>,
	<matthew.brost@intel.com>, <joshua.hahnjy@gmail.com>,
	<rakie.kim@sk.com>, <byungchul@sk.com>, <gourry@gourry.net>,
	<ying.huang@linux.alibaba.com>, <apopple@nvidia.com>,
	<lorenzo.stoakes@oracle.com>, <Liam.Howlett@oracle.com>,
	<vbabka@suse.cz>, <rppt@kernel.org>, <surenb@google.com>,
	<mhocko@suse.com>, <vkoul@kernel.org>, <lucas.demarchi@intel.com>,
	<rdunlap@infradead.org>, <jgg@ziepe.ca>, <kuba@kernel.org>,
	<justonli@chromium.org>, <ivecera@redhat.com>,
	<dave.jiang@intel.com>, <Jonathan.Cameron@huawei.com>,
	<dan.j.williams@intel.com>, <rientjes@google.com>,
	<Raghavendra.KodsaraThimmappa@amd.com>, <bharata@amd.com>,
	<shivankg@amd.com>, <alirad.malek@zptcorp.com>,
	<yiannis@zptcorp.com>, <weixugc@google.com>,
	<linux-kernel@vger.kernel.org>, <linux-mm@kvack.org>
Subject: [RFC V3 4/9] mm/migrate: add migrate_folios_batch_move to  batch the folio move operations
Date: Tue, 23 Sep 2025 17:47:39 +0000	[thread overview]
Message-ID: <20250923174752.35701-5-shivankg@amd.com> (raw)
In-Reply-To: <20250923174752.35701-1-shivankg@amd.com>

This is a preparatory patch that enables batch copying for folios
undergoing migration. By enabling batch copying the folio content, we can
efficiently utilize the capabilities of DMA hardware or multi-threaded
folio copy. It uses MIGRATE_NO_COPY to skip folio copy during metadata
copy process and performed the copies in a batch later.

Currently, the folio move operation is performed individually for each
folio in sequential manner:
for_each_folio() {
        Copy folio metadata like flags and mappings
        Copy the folio content from src to dst
        Update page tables with dst folio
}

With this patch, we transition to a batch processing approach as shown
below:
for_each_folio() {
        Copy folio metadata like flags and mappings
}
Batch copy all src folios to dst
for_each_folio() {
        Update page tables with dst folios
}

dst->private is used to store page states and possible anon_vma value,
thus needs to be cleared during metadata copy process. To avoid additional
memory allocation to store the data during batch copy process, src->private
is used to store the data after metadata copy process, since src is no
longer used.

Co-developed-by: Zi Yan <ziy@nvidia.com>
Signed-off-by: Zi Yan <ziy@nvidia.com>
Signed-off-by: Shivank Garg <shivankg@amd.com>
---
 mm/migrate.c | 197 +++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 193 insertions(+), 4 deletions(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index 3fe78ecb146a..ce94e73a930d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -843,12 +843,15 @@ static int __migrate_folio(struct address_space *mapping, struct folio *dst,
 			   enum migrate_mode mode)
 {
 	int rc, expected_count = folio_expected_ref_count(src) + 1;
+	unsigned long dst_private = (unsigned long)dst->private;
 
 	/* Check whether src does not have extra refs before we do more work */
 	if (folio_ref_count(src) != expected_count)
 		return -EAGAIN;
 
-	if (mode != MIGRATE_NO_COPY) {
+	if (mode == MIGRATE_NO_COPY) {
+		dst->private = NULL;
+	} else {
 		rc = folio_mc_copy(dst, src);
 		if (unlikely(rc))
 			return rc;
@@ -862,6 +865,10 @@ static int __migrate_folio(struct address_space *mapping, struct folio *dst,
 		folio_attach_private(dst, folio_detach_private(src));
 
 	folio_migrate_flags(dst, src);
+
+	if (mode == MIGRATE_NO_COPY)
+		src->private = (void *)dst_private;
+
 	return MIGRATEPAGE_SUCCESS;
 }
 
@@ -1149,7 +1156,7 @@ static void __migrate_folio_record(struct folio *dst,
 	dst->private = (void *)anon_vma + old_page_state;
 }
 
-static void __migrate_folio_extract(struct folio *dst,
+static void __migrate_folio_read(struct folio *dst,
 				   int *old_page_state,
 				   struct anon_vma **anon_vmap)
 {
@@ -1157,6 +1164,12 @@ static void __migrate_folio_extract(struct folio *dst,
 
 	*anon_vmap = (struct anon_vma *)(private & ~PAGE_OLD_STATES);
 	*old_page_state = private & PAGE_OLD_STATES;
+}
+static void __migrate_folio_extract(struct folio *dst,
+				   int *old_page_state,
+				   struct anon_vma **anon_vmap)
+{
+	__migrate_folio_read(dst, old_page_state, anon_vmap);
 	dst->private = NULL;
 }
 
@@ -1776,6 +1789,176 @@ static void migrate_folios_move(struct list_head *src_folios,
 	}
 }
 
+static void migrate_folios_batch_move(struct list_head *src_folios,
+		struct list_head *dst_folios,
+		free_folio_t put_new_folio, unsigned long private,
+		enum migrate_mode mode, int reason,
+		struct list_head *ret_folios,
+		struct migrate_pages_stats *stats,
+		int *retry, int *thp_retry, int *nr_failed,
+		int *nr_retry_pages)
+{
+	struct folio *folio, *folio2, *dst, *dst2;
+	int rc, nr_pages = 0, nr_batched_folios = 0;
+	int old_page_state = 0;
+	struct anon_vma *anon_vma = NULL;
+	int is_thp = 0;
+	LIST_HEAD(err_src);
+	LIST_HEAD(err_dst);
+
+	/*
+	 * Iterate over the list of locked src/dst folios to copy the metadata
+	 */
+	dst = list_first_entry(dst_folios, struct folio, lru);
+	dst2 = list_next_entry(dst, lru);
+	list_for_each_entry_safe(folio, folio2, src_folios, lru) {
+		is_thp = folio_test_large(folio) && folio_test_pmd_mappable(folio);
+		nr_pages = folio_nr_pages(folio);
+
+		/*
+		 * dst->private is not cleared here. It is cleared and moved to
+		 * src->private in __migrate_folio().
+		 */
+		__migrate_folio_read(dst, &old_page_state, &anon_vma);
+
+		/*
+		 * Use MIGRATE_NO_COPY mode in migrate_folio family functions
+		 * to copy the flags, mapping and some other ancillary information.
+		 * This does everything except the page copy. The actual page copy
+		 * is handled later in a batch manner.
+		 */
+		if (unlikely(page_movable_ops(&folio->page)))
+			rc = -EAGAIN;
+		else
+			rc = _move_to_new_folio_prep(dst, folio, MIGRATE_NO_COPY);
+		/*
+		 * The rules are:
+		 *	Success: folio will be copied in batch
+		 *	-EAGAIN: move src/dst folios to tmp lists for
+		 *	         non-batch retry
+		 *	Other errno: put src folio on ret_folios list, restore
+		 *	             the dst folio
+		 */
+		if (rc == -EAGAIN) {
+			*retry += 1;
+			*thp_retry += is_thp;
+			*nr_retry_pages += nr_pages;
+
+			list_move_tail(&folio->lru, &err_src);
+			list_move_tail(&dst->lru, &err_dst);
+			__migrate_folio_record(dst, old_page_state, anon_vma);
+		} else if (rc != MIGRATEPAGE_SUCCESS) {
+			*nr_failed += 1;
+			stats->nr_thp_failed += is_thp;
+			stats->nr_failed_pages += nr_pages;
+
+			list_del(&dst->lru);
+			migrate_folio_undo_src(folio,
+					old_page_state & PAGE_WAS_MAPPED,
+					anon_vma, true, ret_folios);
+			migrate_folio_undo_dst(dst, true, put_new_folio, private);
+		} else { /* MIGRATEPAGE_SUCCESS */
+			nr_batched_folios++;
+		}
+
+		dst = dst2;
+		dst2 = list_next_entry(dst, lru);
+	}
+
+	/* Exit if folio list for batch migration is empty */
+	if (!nr_batched_folios)
+		goto out;
+
+	/* Batch copy the folios */
+	rc = folios_mc_copy(dst_folios, src_folios, nr_batched_folios);
+
+	/* TODO:  Is there a better way of handling the poison
+	 * recover for batch copy, instead of falling back to serial copy?
+	 */
+	/* fallback to serial page copy if needed */
+	if (rc) {
+		dst = list_first_entry(dst_folios, struct folio, lru);
+		dst2 = list_next_entry(dst, lru);
+		list_for_each_entry_safe(folio, folio2, src_folios, lru) {
+			is_thp = folio_test_large(folio) &&
+				 folio_test_pmd_mappable(folio);
+			nr_pages = folio_nr_pages(folio);
+			rc = folio_mc_copy(dst, folio);
+
+			if (rc) {
+				/*
+				 * dst->private is moved to src->private in
+				 * __migrate_folio(), so page state and anon_vma
+				 * values can be extracted from (src) folio.
+				 */
+				__migrate_folio_extract(folio, &old_page_state,
+						&anon_vma);
+				migrate_folio_undo_src(folio,
+						old_page_state & PAGE_WAS_MAPPED,
+						anon_vma, true, ret_folios);
+				list_del(&dst->lru);
+				migrate_folio_undo_dst(dst, true, put_new_folio,
+						private);
+			}
+
+			switch (rc) {
+			case MIGRATEPAGE_SUCCESS:
+				stats->nr_succeeded += nr_pages;
+				stats->nr_thp_succeeded += is_thp;
+				break;
+			default:
+				*nr_failed += 1;
+				stats->nr_thp_failed += is_thp;
+				stats->nr_failed_pages += nr_pages;
+				break;
+			}
+
+			dst = dst2;
+			dst2 = list_next_entry(dst, lru);
+		}
+	}
+
+	/*
+	 * Iterate the folio lists to remove migration pte and restore them
+	 * as working pte. Unlock the folios, add/remove them to LRU lists (if
+	 * applicable) and release the src folios.
+	 */
+	dst = list_first_entry(dst_folios, struct folio, lru);
+	dst2 = list_next_entry(dst, lru);
+	list_for_each_entry_safe(folio, folio2, src_folios, lru) {
+		is_thp = folio_test_large(folio) && folio_test_pmd_mappable(folio);
+		nr_pages = folio_nr_pages(folio);
+		/*
+		 * dst->private is moved to src->private in __migrate_folio(),
+		 * so page state and anon_vma values can be extracted from
+		 * (src) folio.
+		 */
+		__migrate_folio_extract(folio, &old_page_state, &anon_vma);
+		list_del(&dst->lru);
+
+		_move_to_new_folio_finalize(dst, folio, MIGRATEPAGE_SUCCESS);
+
+		/*
+		 * Below few steps are only applicable for lru pages which is
+		 * ensured as we have removed the non-lru pages from our list.
+		 */
+		_migrate_folio_move_finalize1(folio, dst, old_page_state);
+
+		_migrate_folio_move_finalize2(folio, dst, reason, anon_vma);
+
+		/* Page migration successful, increase stat counter */
+		stats->nr_succeeded += nr_pages;
+		stats->nr_thp_succeeded += is_thp;
+
+		dst = dst2;
+		dst2 = list_next_entry(dst, lru);
+	}
+out:
+	/* Add tmp folios back to the list to re-attempt migration. */
+	list_splice(&err_src, src_folios);
+	list_splice(&err_dst, dst_folios);
+}
+
 static void migrate_folios_undo(struct list_head *src_folios,
 		struct list_head *dst_folios,
 		free_folio_t put_new_folio, unsigned long private,
@@ -1986,13 +2169,19 @@ static int migrate_pages_batch(struct list_head *from,
 	/* Flush TLBs for all unmapped folios */
 	try_to_unmap_flush();
 
-	retry = 1;
+	retry = 0;
+	/* Batch move the unmapped folios */
+	migrate_folios_batch_move(&unmap_folios, &dst_folios,
+				put_new_folio, private, mode, reason,
+				ret_folios, stats, &retry, &thp_retry,
+				&nr_failed, &nr_retry_pages);
+
 	for (pass = 0; pass < nr_pass && retry; pass++) {
 		retry = 0;
 		thp_retry = 0;
 		nr_retry_pages = 0;
 
-		/* Move the unmapped folios */
+		/* Move the remaining unmapped folios */
 		migrate_folios_move(&unmap_folios, &dst_folios,
 				put_new_folio, private, mode, reason,
 				ret_folios, stats, &retry, &thp_retry,
-- 
2.43.0



  parent reply	other threads:[~2025-09-23 17:49 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-09-23 17:47 [RFC V3 0/9] Accelerate page migration with batch copying and hardware offload Shivank Garg
2025-09-23 17:47 ` [RFC V3 1/9] mm/migrate: factor out code in move_to_new_folio() and migrate_folio_move() Shivank Garg
2025-10-02 10:30   ` Jonathan Cameron
2025-09-23 17:47 ` [RFC V3 2/9] mm/migrate: revive MIGRATE_NO_COPY in migrate_mode Shivank Garg
2025-09-23 17:47 ` [RFC V3 3/9] mm: Introduce folios_mc_copy() for batch copying folios Shivank Garg
2025-09-23 17:47 ` Shivank Garg [this message]
2025-10-02 11:03   ` [RFC V3 4/9] mm/migrate: add migrate_folios_batch_move to batch the folio move operations Jonathan Cameron
2025-10-16  9:17     ` Garg, Shivank
2025-09-23 17:47 ` [RFC V3 5/9] mm: add support for copy offload for folio Migration Shivank Garg
2025-10-02 11:10   ` Jonathan Cameron
2025-10-16  9:40     ` Garg, Shivank
2025-09-23 17:47 ` [RFC V3 6/9] mtcopy: introduce multi-threaded page copy routine Shivank Garg
2025-10-02 11:29   ` Jonathan Cameron
2025-10-20  8:28   ` Byungchul Park
2025-11-06  6:27     ` Garg, Shivank
2025-11-12  2:12       ` Byungchul Park
2025-09-23 17:47 ` [RFC V3 7/9] dcbm: add dma core batch migrator for batch page offloading Shivank Garg
2025-10-02 11:38   ` Jonathan Cameron
2025-10-16  9:59     ` Garg, Shivank
2025-09-23 17:47 ` [RFC V3 8/9] adjust NR_MAX_BATCHED_MIGRATION for testing Shivank Garg
2025-09-23 17:47 ` [RFC V3 9/9] mtcopy: spread threads across die " Shivank Garg
2025-09-24  1:49 ` [RFC V3 0/9] Accelerate page migration with batch copying and hardware offload Huang, Ying
2025-09-24  2:03   ` Zi Yan
2025-09-24  3:11     ` Huang, Ying
2025-09-24  3:22 ` Zi Yan
2025-10-02 17:10   ` Garg, Shivank

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250923174752.35701-5-shivankg@amd.com \
    --to=shivankg@amd.com \
    --cc=Jonathan.Cameron@huawei.com \
    --cc=Liam.Howlett@oracle.com \
    --cc=Raghavendra.KodsaraThimmappa@amd.com \
    --cc=akpm@linux-foundation.org \
    --cc=alirad.malek@zptcorp.com \
    --cc=apopple@nvidia.com \
    --cc=bharata@amd.com \
    --cc=byungchul@sk.com \
    --cc=dan.j.williams@intel.com \
    --cc=dave.jiang@intel.com \
    --cc=david@redhat.com \
    --cc=gourry@gourry.net \
    --cc=ivecera@redhat.com \
    --cc=jgg@ziepe.ca \
    --cc=joshua.hahnjy@gmail.com \
    --cc=justonli@chromium.org \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=lucas.demarchi@intel.com \
    --cc=matthew.brost@intel.com \
    --cc=mhocko@suse.com \
    --cc=rakie.kim@sk.com \
    --cc=rdunlap@infradead.org \
    --cc=rientjes@google.com \
    --cc=rppt@kernel.org \
    --cc=surenb@google.com \
    --cc=vbabka@suse.cz \
    --cc=vkoul@kernel.org \
    --cc=weixugc@google.com \
    --cc=willy@infradead.org \
    --cc=yiannis@zptcorp.com \
    --cc=ying.huang@linux.alibaba.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox