* [PATCH RFC V2 1/9] mm/migrate: factor out code in move_to_new_folio() and migrate_folio_move()
2025-03-19 19:22 [PATCH RFC V2 0/9] Enhancements to Page Migration with Multi-threading and Batch Offloading to DMA Shivank Garg
@ 2025-03-19 19:22 ` Shivank Garg
2025-03-19 19:22 ` [PATCH RFC V2 2/9] mm/migrate: revive MIGRATE_NO_COPY in migrate_mode Shivank Garg
` (7 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Shivank Garg @ 2025-03-19 19:22 UTC (permalink / raw)
To: akpm, linux-mm, ziy
Cc: AneeshKumar.KizhakeVeetil, baolin.wang, bharata, david,
gregory.price, honggyu.kim, jane.chu, jhubbard, jon.grimm,
k.shutemov, leesuyeon0506, leillc, liam.howlett, linux-kernel,
mel.gorman, Michael.Day, Raghavendra.KodsaraThimmappa, riel,
rientjes, santosh.shukla, shivankg, shy828301, sj,
wangkefeng.wang, weixugc, willy, ying.huang, anannara,
wei.huang2, Jonathan.Cameron, hyeonggon.yoo, byungchul
From: Zi Yan <ziy@nvidia.com>
No function change is intended. The factored out code will be reused in
an upcoming batched folio move function.
Signed-off-by: Zi Yan <ziy@nvidia.com>
Signed-off-by: Shivank Garg <shivankg@amd.com>
---
mm/migrate.c | 101 +++++++++++++++++++++++++++++++++------------------
1 file changed, 65 insertions(+), 36 deletions(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index fb19a18892c8..ce7ddc56e878 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1014,18 +1014,7 @@ static int fallback_migrate_folio(struct address_space *mapping,
return migrate_folio(mapping, dst, src, mode);
}
-/*
- * Move a page to a newly allocated page
- * The page is locked and all ptes have been successfully removed.
- *
- * The new page will have replaced the old page if this function
- * is successful.
- *
- * Return value:
- * < 0 - error code
- * MIGRATEPAGE_SUCCESS - success
- */
-static int move_to_new_folio(struct folio *dst, struct folio *src,
+static int _move_to_new_folio_prep(struct folio *dst, struct folio *src,
enum migrate_mode mode)
{
int rc = -EAGAIN;
@@ -1072,7 +1061,13 @@ static int move_to_new_folio(struct folio *dst, struct folio *src,
WARN_ON_ONCE(rc == MIGRATEPAGE_SUCCESS &&
!folio_test_isolated(src));
}
+out:
+ return rc;
+}
+static void _move_to_new_folio_finalize(struct folio *dst, struct folio *src,
+ int rc)
+{
/*
* When successful, old pagecache src->mapping must be cleared before
* src is freed; but stats require that PageAnon be left as PageAnon.
@@ -1099,7 +1094,29 @@ static int move_to_new_folio(struct folio *dst, struct folio *src,
if (likely(!folio_is_zone_device(dst)))
flush_dcache_folio(dst);
}
-out:
+}
+
+
+/*
+ * Move a page to a newly allocated page
+ * The page is locked and all ptes have been successfully removed.
+ *
+ * The new page will have replaced the old page if this function
+ * is successful.
+ *
+ * Return value:
+ * < 0 - error code
+ * MIGRATEPAGE_SUCCESS - success
+ */
+static int move_to_new_folio(struct folio *dst, struct folio *src,
+ enum migrate_mode mode)
+{
+ int rc;
+
+ rc = _move_to_new_folio_prep(dst, src, mode);
+
+ _move_to_new_folio_finalize(dst, src, rc);
+
return rc;
}
@@ -1341,29 +1358,9 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
return rc;
}
-/* Migrate the folio to the newly allocated folio in dst. */
-static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
- struct folio *src, struct folio *dst,
- enum migrate_mode mode, enum migrate_reason reason,
- struct list_head *ret)
+static void _migrate_folio_move_finalize1(struct folio *src, struct folio *dst,
+ int old_page_state)
{
- int rc;
- int old_page_state = 0;
- struct anon_vma *anon_vma = NULL;
- bool is_lru = !__folio_test_movable(src);
- struct list_head *prev;
-
- __migrate_folio_extract(dst, &old_page_state, &anon_vma);
- prev = dst->lru.prev;
- list_del(&dst->lru);
-
- rc = move_to_new_folio(dst, src, mode);
- if (rc)
- goto out;
-
- if (unlikely(!is_lru))
- goto out_unlock_both;
-
/*
* When successful, push dst to LRU immediately: so that if it
* turns out to be an mlocked page, remove_migration_ptes() will
@@ -1379,8 +1376,12 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
if (old_page_state & PAGE_WAS_MAPPED)
remove_migration_ptes(src, dst, 0);
+}
-out_unlock_both:
+static void _migrate_folio_move_finalize2(struct folio *src, struct folio *dst,
+ enum migrate_reason reason,
+ struct anon_vma *anon_vma)
+{
folio_unlock(dst);
set_page_owner_migrate_reason(&dst->page, reason);
/*
@@ -1400,6 +1401,34 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
put_anon_vma(anon_vma);
folio_unlock(src);
migrate_folio_done(src, reason);
+}
+
+/* Migrate the folio to the newly allocated folio in dst. */
+static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
+ struct folio *src, struct folio *dst,
+ enum migrate_mode mode, enum migrate_reason reason,
+ struct list_head *ret)
+{
+ int rc;
+ int old_page_state = 0;
+ struct anon_vma *anon_vma = NULL;
+ bool is_lru = !__folio_test_movable(src);
+ struct list_head *prev;
+
+ __migrate_folio_extract(dst, &old_page_state, &anon_vma);
+ prev = dst->lru.prev;
+ list_del(&dst->lru);
+
+ rc = move_to_new_folio(dst, src, mode);
+ if (rc)
+ goto out;
+
+ if (unlikely(!is_lru))
+ goto out_unlock_both;
+
+ _migrate_folio_move_finalize1(src, dst, old_page_state);
+out_unlock_both:
+ _migrate_folio_move_finalize2(src, dst, reason, anon_vma);
return rc;
out:
--
2.34.1
^ permalink raw reply [flat|nested] 10+ messages in thread* [PATCH RFC V2 2/9] mm/migrate: revive MIGRATE_NO_COPY in migrate_mode.
2025-03-19 19:22 [PATCH RFC V2 0/9] Enhancements to Page Migration with Multi-threading and Batch Offloading to DMA Shivank Garg
2025-03-19 19:22 ` [PATCH RFC V2 1/9] mm/migrate: factor out code in move_to_new_folio() and migrate_folio_move() Shivank Garg
@ 2025-03-19 19:22 ` Shivank Garg
2025-03-19 19:22 ` [PATCH RFC V2 3/9] mm: batch folio copying during migration Shivank Garg
` (6 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Shivank Garg @ 2025-03-19 19:22 UTC (permalink / raw)
To: akpm, linux-mm, ziy
Cc: AneeshKumar.KizhakeVeetil, baolin.wang, bharata, david,
gregory.price, honggyu.kim, jane.chu, jhubbard, jon.grimm,
k.shutemov, leesuyeon0506, leillc, liam.howlett, linux-kernel,
mel.gorman, Michael.Day, Raghavendra.KodsaraThimmappa, riel,
rientjes, santosh.shukla, shivankg, shy828301, sj,
wangkefeng.wang, weixugc, willy, ying.huang, anannara,
wei.huang2, Jonathan.Cameron, hyeonggon.yoo, byungchul
From: Zi Yan <ziy@nvidia.com>
It is a preparation patch. The added MIGRATE_NO_COPY will be used by the
following patches to implement batched page copy functions by skipping
folio copy process in __migrate_folio() and copying folios in one shot
at the end.
Signed-off-by: Zi Yan <ziy@nvidia.com>
Signed-off-by: Shivank Garg <shivankg@amd.com>
---
include/linux/migrate_mode.h | 2 ++
mm/migrate.c | 8 +++++---
2 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/include/linux/migrate_mode.h b/include/linux/migrate_mode.h
index 265c4328b36a..9af6c949a057 100644
--- a/include/linux/migrate_mode.h
+++ b/include/linux/migrate_mode.h
@@ -7,11 +7,13 @@
* on most operations but not ->writepage as the potential stall time
* is too significant
* MIGRATE_SYNC will block when migrating pages
+ * MIGRATE_NO_COPY will not copy page content
*/
enum migrate_mode {
MIGRATE_ASYNC,
MIGRATE_SYNC_LIGHT,
MIGRATE_SYNC,
+ MIGRATE_NO_COPY,
};
enum migrate_reason {
diff --git a/mm/migrate.c b/mm/migrate.c
index ce7ddc56e878..0d40ac069cea 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -757,9 +757,11 @@ static int __migrate_folio(struct address_space *mapping, struct folio *dst,
if (folio_ref_count(src) != expected_count)
return -EAGAIN;
- rc = folio_mc_copy(dst, src);
- if (unlikely(rc))
- return rc;
+ if (mode != MIGRATE_NO_COPY) {
+ rc = folio_mc_copy(dst, src);
+ if (unlikely(rc))
+ return rc;
+ }
rc = __folio_migrate_mapping(mapping, dst, src, expected_count);
if (rc != MIGRATEPAGE_SUCCESS)
--
2.34.1
^ permalink raw reply [flat|nested] 10+ messages in thread* [PATCH RFC V2 3/9] mm: batch folio copying during migration
2025-03-19 19:22 [PATCH RFC V2 0/9] Enhancements to Page Migration with Multi-threading and Batch Offloading to DMA Shivank Garg
2025-03-19 19:22 ` [PATCH RFC V2 1/9] mm/migrate: factor out code in move_to_new_folio() and migrate_folio_move() Shivank Garg
2025-03-19 19:22 ` [PATCH RFC V2 2/9] mm/migrate: revive MIGRATE_NO_COPY in migrate_mode Shivank Garg
@ 2025-03-19 19:22 ` Shivank Garg
2025-03-19 19:22 ` [PATCH RFC V2 4/9] mm/migrate: add migrate_folios_batch_move to batch the folio move operations Shivank Garg
` (5 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Shivank Garg @ 2025-03-19 19:22 UTC (permalink / raw)
To: akpm, linux-mm, ziy
Cc: AneeshKumar.KizhakeVeetil, baolin.wang, bharata, david,
gregory.price, honggyu.kim, jane.chu, jhubbard, jon.grimm,
k.shutemov, leesuyeon0506, leillc, liam.howlett, linux-kernel,
mel.gorman, Michael.Day, Raghavendra.KodsaraThimmappa, riel,
rientjes, santosh.shukla, shivankg, shy828301, sj,
wangkefeng.wang, weixugc, willy, ying.huang, anannara,
wei.huang2, Jonathan.Cameron, hyeonggon.yoo, byungchul
Introduce the folios_copy() and folios_mc_copy() to copy the folio content
from the list of src folios to the list of dst folios.
This is preparatory patch for batch page migration offloading.
Signed-off-by: Shivank Garg <shivankg@amd.com>
---
include/linux/mm.h | 4 ++++
mm/util.c | 41 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 45 insertions(+)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8483e09aeb2c..612cba3d3dac 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1301,7 +1301,11 @@ void __folio_put(struct folio *folio);
void split_page(struct page *page, unsigned int order);
void folio_copy(struct folio *dst, struct folio *src);
+void folios_copy(struct list_head *dst_list, struct list_head *src_list,
+ int __maybe_unused folios_cnt);
int folio_mc_copy(struct folio *dst, struct folio *src);
+int folios_mc_copy(struct list_head *dst_list, struct list_head *src_list,
+ int __maybe_unused folios_cnt);
unsigned long nr_free_buffer_pages(void);
diff --git a/mm/util.c b/mm/util.c
index 8c965474d329..5d00d4c5b2dd 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -908,6 +908,47 @@ int folio_mc_copy(struct folio *dst, struct folio *src)
}
EXPORT_SYMBOL(folio_mc_copy);
+/**
+ * folios_copy - Copy the contents of list of folios.
+ * @dst_list: Folios to copy to.
+ * @src_list: Folios to copy from.
+ *
+ * The folio contents are copied from @src_list to @dst_list.
+ * Assume the caller has validated that lists are not empty and both lists
+ * have equal number of folios. This may sleep.
+ */
+void folios_copy(struct list_head *dst_list, struct list_head *src_list,
+ int __maybe_unused folios_cnt)
+{
+ struct folio *src, *dst;
+
+ dst = list_first_entry(dst_list, struct folio, lru);
+ list_for_each_entry(src, src_list, lru) {
+ cond_resched();
+ folio_copy(dst, src);
+ dst = list_next_entry(dst, lru);
+ }
+}
+
+int folios_mc_copy(struct list_head *dst_list, struct list_head *src_list,
+ int __maybe_unused folios_cnt)
+{
+ struct folio *src, *dst;
+ int ret;
+
+ dst = list_first_entry(dst_list, struct folio, lru);
+ list_for_each_entry(src, src_list, lru) {
+ cond_resched();
+ ret = folio_mc_copy(dst, src);
+ if (ret)
+ return ret;
+ dst = list_next_entry(dst, lru);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(folios_mc_copy);
+
int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS;
int sysctl_overcommit_ratio __read_mostly = 50;
unsigned long sysctl_overcommit_kbytes __read_mostly;
--
2.34.1
^ permalink raw reply [flat|nested] 10+ messages in thread* [PATCH RFC V2 4/9] mm/migrate: add migrate_folios_batch_move to batch the folio move operations
2025-03-19 19:22 [PATCH RFC V2 0/9] Enhancements to Page Migration with Multi-threading and Batch Offloading to DMA Shivank Garg
` (2 preceding siblings ...)
2025-03-19 19:22 ` [PATCH RFC V2 3/9] mm: batch folio copying during migration Shivank Garg
@ 2025-03-19 19:22 ` Shivank Garg
2025-03-19 19:22 ` [PATCH RFC V2 5/9] mm: add support for copy offload for folio Migration Shivank Garg
` (4 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Shivank Garg @ 2025-03-19 19:22 UTC (permalink / raw)
To: akpm, linux-mm, ziy
Cc: AneeshKumar.KizhakeVeetil, baolin.wang, bharata, david,
gregory.price, honggyu.kim, jane.chu, jhubbard, jon.grimm,
k.shutemov, leesuyeon0506, leillc, liam.howlett, linux-kernel,
mel.gorman, Michael.Day, Raghavendra.KodsaraThimmappa, riel,
rientjes, santosh.shukla, shivankg, shy828301, sj,
wangkefeng.wang, weixugc, willy, ying.huang, anannara,
wei.huang2, Jonathan.Cameron, hyeonggon.yoo, byungchul
This is a preparatory patch that enables batch copying for folios
undergoing migration. By enabling batch copying the folio content, we can
efficiently utilize the capabilities of DMA hardware or multi-threaded
folio copy. It uses MIGRATE_NO_COPY to skip folio copy during metadata
copy process and performed the copies in a batch later.
Currently, the folio move operation is performed individually for each
folio in sequential manner:
for_each_folio() {
Copy folio metadata like flags and mappings
Copy the folio content from src to dst
Update page tables with dst folio
}
With this patch, we transition to a batch processing approach as shown
below:
for_each_folio() {
Copy folio metadata like flags and mappings
}
Batch copy all src folios to dst
for_each_folio() {
Update page tables with dst folios
}
dst->private is used to store page states and possible anon_vma value,
thus needs to be cleared during metadata copy process. To avoid additional
memory allocation to store the data during batch copy process, src->private
is used to store the data after metadata copy process, since src is no
longer used.
[Zi Yan: Refactor the patch. Improved the original patch by removing the
need for an extra mig_info allocation (for storing anon_vma and old page
state). Instead, reuse src->private to store the data, making the
implementation simpler and efficient.]
Signed-off-by: Shivank Garg <shivankg@amd.com>
Signed-off-by: Zi Yan <ziy@nvidia.com>
---
mm/migrate.c | 204 +++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 198 insertions(+), 6 deletions(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index 0d40ac069cea..8b6cfb60087c 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -752,12 +752,15 @@ static int __migrate_folio(struct address_space *mapping, struct folio *dst,
enum migrate_mode mode)
{
int rc, expected_count = folio_expected_refs(mapping, src);
+ unsigned long dst_private = (unsigned long)dst->private;
/* Check whether src does not have extra refs before we do more work */
if (folio_ref_count(src) != expected_count)
return -EAGAIN;
- if (mode != MIGRATE_NO_COPY) {
+ if (mode == MIGRATE_NO_COPY)
+ dst->private = NULL;
+ else {
rc = folio_mc_copy(dst, src);
if (unlikely(rc))
return rc;
@@ -771,6 +774,10 @@ static int __migrate_folio(struct address_space *mapping, struct folio *dst,
folio_attach_private(dst, folio_detach_private(src));
folio_migrate_flags(dst, src);
+
+ if (mode == MIGRATE_NO_COPY)
+ src->private = (void *)dst_private;
+
return MIGRATEPAGE_SUCCESS;
}
@@ -1044,7 +1051,7 @@ static int _move_to_new_folio_prep(struct folio *dst, struct folio *src,
mode);
else
rc = fallback_migrate_folio(mapping, dst, src, mode);
- } else {
+ } else if (mode != MIGRATE_NO_COPY) {
const struct movable_operations *mops;
/*
@@ -1062,7 +1069,8 @@ static int _move_to_new_folio_prep(struct folio *dst, struct folio *src,
rc = mops->migrate_page(&dst->page, &src->page, mode);
WARN_ON_ONCE(rc == MIGRATEPAGE_SUCCESS &&
!folio_test_isolated(src));
- }
+ } else
+ rc = -EAGAIN;
out:
return rc;
}
@@ -1140,7 +1148,7 @@ static void __migrate_folio_record(struct folio *dst,
dst->private = (void *)anon_vma + old_page_state;
}
-static void __migrate_folio_extract(struct folio *dst,
+static void __migrate_folio_read(struct folio *dst,
int *old_page_state,
struct anon_vma **anon_vmap)
{
@@ -1148,6 +1156,13 @@ static void __migrate_folio_extract(struct folio *dst,
*anon_vmap = (struct anon_vma *)(private & ~PAGE_OLD_STATES);
*old_page_state = private & PAGE_OLD_STATES;
+}
+
+static void __migrate_folio_extract(struct folio *dst,
+ int *old_page_state,
+ struct anon_vma **anon_vmap)
+{
+ __migrate_folio_read(dst, old_page_state, anon_vmap);
dst->private = NULL;
}
@@ -1770,6 +1785,178 @@ static void migrate_folios_move(struct list_head *src_folios,
}
}
+static void migrate_folios_batch_move(struct list_head *src_folios,
+ struct list_head *dst_folios,
+ free_folio_t put_new_folio, unsigned long private,
+ enum migrate_mode mode, int reason,
+ struct list_head *ret_folios,
+ struct migrate_pages_stats *stats,
+ int *retry, int *thp_retry, int *nr_failed,
+ int *nr_retry_pages)
+{
+ struct folio *folio, *folio2, *dst, *dst2;
+ int rc, nr_pages = 0, nr_batched_folios = 0;
+ int old_page_state = 0;
+ struct anon_vma *anon_vma = NULL;
+ int is_thp = 0;
+ LIST_HEAD(err_src);
+ LIST_HEAD(err_dst);
+
+ if (mode != MIGRATE_ASYNC) {
+ *retry += 1;
+ return;
+ }
+
+ /*
+ * Iterate over the list of locked src/dst folios to copy the metadata
+ */
+ dst = list_first_entry(dst_folios, struct folio, lru);
+ dst2 = list_next_entry(dst, lru);
+ list_for_each_entry_safe(folio, folio2, src_folios, lru) {
+ is_thp = folio_test_large(folio) && folio_test_pmd_mappable(folio);
+ nr_pages = folio_nr_pages(folio);
+
+ /*
+ * dst->private is not cleared here. It is cleared and moved to
+ * src->private in __migrate_folio().
+ */
+ __migrate_folio_read(dst, &old_page_state, &anon_vma);
+
+ /*
+ * Use MIGRATE_NO_COPY mode in migrate_folio family functions
+ * to copy the flags, mapping and some other ancillary information.
+ * This does everything except the page copy. The actual page copy
+ * is handled later in a batch manner.
+ */
+ rc = _move_to_new_folio_prep(dst, folio, MIGRATE_NO_COPY);
+
+ /*
+ * The rules are:
+ * Success: folio will be copied in batch
+ * -EAGAIN: move src/dst folios to tmp lists for
+ * non-batch retry
+ * Other errno: put src folio on ret_folios list, restore
+ * the dst folio
+ */
+ if (rc == -EAGAIN) {
+ *retry += 1;
+ *thp_retry += is_thp;
+ *nr_retry_pages += nr_pages;
+
+ list_move_tail(&folio->lru, &err_src);
+ list_move_tail(&dst->lru, &err_dst);
+ __migrate_folio_record(dst, old_page_state, anon_vma);
+ } else if (rc != MIGRATEPAGE_SUCCESS) {
+ *nr_failed += 1;
+ stats->nr_thp_failed += is_thp;
+ stats->nr_failed_pages += nr_pages;
+
+ list_del(&dst->lru);
+ migrate_folio_undo_src(folio,
+ old_page_state & PAGE_WAS_MAPPED,
+ anon_vma, true, ret_folios);
+ migrate_folio_undo_dst(dst, true, put_new_folio, private);
+ } else /* MIGRATEPAGE_SUCCESS */
+ nr_batched_folios++;
+
+ dst = dst2;
+ dst2 = list_next_entry(dst, lru);
+ }
+
+ /* Exit if folio list for batch migration is empty */
+ if (!nr_batched_folios)
+ goto out;
+
+ /* Batch copy the folios */
+ rc = folios_mc_copy(dst_folios, src_folios, nr_batched_folios);
+
+ /* TODO: Is there a better way of handling the poison
+ * recover for batch copy, instead of falling back to serial copy?
+ */
+ /* fallback to serial page copy if needed */
+ if (rc) {
+ dst = list_first_entry(dst_folios, struct folio, lru);
+ dst2 = list_next_entry(dst, lru);
+ list_for_each_entry_safe(folio, folio2, src_folios, lru) {
+ is_thp = folio_test_large(folio) &&
+ folio_test_pmd_mappable(folio);
+ nr_pages = folio_nr_pages(folio);
+ rc = folio_mc_copy(dst, folio);
+
+ if (rc) {
+ /*
+ * dst->private is moved to src->private in
+ * __migrate_folio(), so page state and anon_vma
+ * values can be extracted from (src) folio.
+ */
+ __migrate_folio_extract(folio, &old_page_state,
+ &anon_vma);
+ migrate_folio_undo_src(folio,
+ old_page_state & PAGE_WAS_MAPPED,
+ anon_vma, true, ret_folios);
+ list_del(&dst->lru);
+ migrate_folio_undo_dst(dst, true, put_new_folio,
+ private);
+ }
+
+ switch (rc) {
+ case MIGRATEPAGE_SUCCESS:
+ stats->nr_succeeded += nr_pages;
+ stats->nr_thp_succeeded += is_thp;
+ break;
+ default:
+ *nr_failed += 1;
+ stats->nr_thp_failed += is_thp;
+ stats->nr_failed_pages += nr_pages;
+ break;
+ }
+
+ dst = dst2;
+ dst2 = list_next_entry(dst, lru);
+ }
+ }
+
+ /*
+ * Iterate the folio lists to remove migration pte and restore them
+ * as working pte. Unlock the folios, add/remove them to LRU lists (if
+ * applicable) and release the src folios.
+ */
+ dst = list_first_entry(dst_folios, struct folio, lru);
+ dst2 = list_next_entry(dst, lru);
+ list_for_each_entry_safe(folio, folio2, src_folios, lru) {
+ is_thp = folio_test_large(folio) && folio_test_pmd_mappable(folio);
+ nr_pages = folio_nr_pages(folio);
+ /*
+ * dst->private is moved to src->private in __migrate_folio(),
+ * so page state and anon_vma values can be extracted from
+ * (src) folio.
+ */
+ __migrate_folio_extract(folio, &old_page_state, &anon_vma);
+ list_del(&dst->lru);
+
+ _move_to_new_folio_finalize(dst, folio, MIGRATEPAGE_SUCCESS);
+
+ /*
+ * Below few steps are only applicable for lru pages which is
+ * ensured as we have removed the non-lru pages from our list.
+ */
+ _migrate_folio_move_finalize1(folio, dst, old_page_state);
+
+ _migrate_folio_move_finalize2(folio, dst, reason, anon_vma);
+
+ /* Page migration successful, increase stat counter */
+ stats->nr_succeeded += nr_pages;
+ stats->nr_thp_succeeded += is_thp;
+
+ dst = dst2;
+ dst2 = list_next_entry(dst, lru);
+ }
+out:
+ /* Add tmp folios back to the list to re-attempt migration. */
+ list_splice(&err_src, src_folios);
+ list_splice(&err_dst, dst_folios);
+}
+
static void migrate_folios_undo(struct list_head *src_folios,
struct list_head *dst_folios,
free_folio_t put_new_folio, unsigned long private,
@@ -1980,13 +2167,18 @@ static int migrate_pages_batch(struct list_head *from,
/* Flush TLBs for all unmapped folios */
try_to_unmap_flush();
- retry = 1;
+ retry = 0;
+ /* Batch move the unmapped folios */
+ migrate_folios_batch_move(&unmap_folios, &dst_folios, put_new_folio,
+ private, mode, reason, ret_folios, stats, &retry,
+ &thp_retry, &nr_failed, &nr_retry_pages);
+
for (pass = 0; pass < nr_pass && retry; pass++) {
retry = 0;
thp_retry = 0;
nr_retry_pages = 0;
- /* Move the unmapped folios */
+ /* Move the remaining unmapped folios */
migrate_folios_move(&unmap_folios, &dst_folios,
put_new_folio, private, mode, reason,
ret_folios, stats, &retry, &thp_retry,
--
2.34.1
^ permalink raw reply [flat|nested] 10+ messages in thread* [PATCH RFC V2 5/9] mm: add support for copy offload for folio Migration
2025-03-19 19:22 [PATCH RFC V2 0/9] Enhancements to Page Migration with Multi-threading and Batch Offloading to DMA Shivank Garg
` (3 preceding siblings ...)
2025-03-19 19:22 ` [PATCH RFC V2 4/9] mm/migrate: add migrate_folios_batch_move to batch the folio move operations Shivank Garg
@ 2025-03-19 19:22 ` Shivank Garg
2025-03-19 19:22 ` [PATCH RFC V2 6/9] mm/migrate: introduce multi-threaded page copy routine Shivank Garg
` (3 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Shivank Garg @ 2025-03-19 19:22 UTC (permalink / raw)
To: akpm, linux-mm, ziy
Cc: AneeshKumar.KizhakeVeetil, baolin.wang, bharata, david,
gregory.price, honggyu.kim, jane.chu, jhubbard, jon.grimm,
k.shutemov, leesuyeon0506, leillc, liam.howlett, linux-kernel,
mel.gorman, Michael.Day, Raghavendra.KodsaraThimmappa, riel,
rientjes, santosh.shukla, shivankg, shy828301, sj,
wangkefeng.wang, weixugc, willy, ying.huang, anannara,
wei.huang2, Jonathan.Cameron, hyeonggon.yoo, byungchul, Mike Day
From: Mike Day <michael.day@amd.com>
Offload-Copy drivers should implement following functions to enable folio
migration offloading:
migrate_offc() - This function takes src and dst folios list undergoing
migration. It is responsible for transfer of page content between the
src and dst folios.
can_migrate_offc() - It performs necessary checks if offload copying
migration is supported for the give src and dst folios.
Offload-Copy driver should include a mechanism to call start_offloading and
stop_offloading for enabling and disabling migration offload respectively.
[Shivank: Rename the APIs and files to generalize the original DMA-specific
offload implementation to support various copy offloading mechanisms such as
DMA engines, CPU multi-threading, or other
hardware accelerators.]
Signed-off-by: Mike Day <michael.day@amd.com>
Signed-off-by: Shivank Garg <shivankg@amd.com>
---
include/linux/migrate_offc.h | 36 +++++++++++++++++++++++++
mm/Kconfig | 8 ++++++
mm/Makefile | 1 +
mm/migrate.c | 43 ++++++++++++++++++++++++++++--
mm/migrate_offc.c | 51 ++++++++++++++++++++++++++++++++++++
5 files changed, 137 insertions(+), 2 deletions(-)
create mode 100644 include/linux/migrate_offc.h
create mode 100644 mm/migrate_offc.c
diff --git a/include/linux/migrate_offc.h b/include/linux/migrate_offc.h
new file mode 100644
index 000000000000..908f81ebd621
--- /dev/null
+++ b/include/linux/migrate_offc.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _MIGRATE_OFFC_H
+#define _MIGRATE_OFFC_H
+#include <linux/migrate_mode.h>
+
+#define MIGRATOR_NAME_LEN 32
+struct migrator {
+ char name[MIGRATOR_NAME_LEN];
+ int (*migrate_offc)(struct list_head *dst_list, struct list_head *src_list, int folio_cnt);
+ bool (*can_migrate_offc)(struct folio *dst, struct folio *src);
+ struct rcu_head srcu_head;
+ struct module *owner;
+};
+
+extern struct migrator migrator;
+extern struct mutex migrator_mut;
+extern struct srcu_struct mig_srcu;
+
+#ifdef CONFIG_OFFC_MIGRATION
+void srcu_mig_cb(struct rcu_head *head);
+void offc_update_migrator(struct migrator *mig);
+unsigned char *get_active_migrator_name(void);
+bool can_offc_migrate(struct folio *dst, struct folio *src);
+void start_offloading(struct migrator *migrator);
+void stop_offloading(void);
+#else
+static inline void srcu_mig_cb(struct rcu_head *head) { };
+static inline void offc_update_migrator(struct migrator *mig) { };
+static inline unsigned char *get_active_migrator_name(void) { return NULL; };
+static inline bool can_offc_migrate(struct folio *dst, struct folio *src) {return true; };
+static inline void start_offloading(struct migrator *migrator) { };
+static inline void stop_offloading(void) { };
+#endif /* CONFIG_OFFC_MIGRATION */
+
+#endif /* _MIGRATE_OFFC_H */
diff --git a/mm/Kconfig b/mm/Kconfig
index 1b501db06417..7a0693c3be4e 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -722,6 +722,14 @@ config MIGRATION
config DEVICE_MIGRATION
def_bool MIGRATION && ZONE_DEVICE
+config OFFC_MIGRATION
+ bool "Migrate Pages offloading copy"
+ def_bool n
+ depends on MIGRATION
+ help
+ An interface allowing external modules or driver to offload
+ page copying in page migration.
+
config ARCH_ENABLE_HUGEPAGE_MIGRATION
bool
diff --git a/mm/Makefile b/mm/Makefile
index 850386a67b3e..010142414176 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -93,6 +93,7 @@ obj-$(CONFIG_FAILSLAB) += failslab.o
obj-$(CONFIG_FAIL_PAGE_ALLOC) += fail_page_alloc.o
obj-$(CONFIG_MEMTEST) += memtest.o
obj-$(CONFIG_MIGRATION) += migrate.o
+obj-$(CONFIG_OFFC_MIGRATION) += migrate_offc.o
obj-$(CONFIG_NUMA) += memory-tiers.o
obj-$(CONFIG_DEVICE_MIGRATION) += migrate_device.o
obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o
diff --git a/mm/migrate.c b/mm/migrate.c
index 8b6cfb60087c..862a3d1eff60 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -44,6 +44,7 @@
#include <linux/sched/sysctl.h>
#include <linux/memory-tiers.h>
#include <linux/pagewalk.h>
+#include <linux/migrate_offc.h>
#include <asm/tlbflush.h>
@@ -743,6 +744,37 @@ void folio_migrate_flags(struct folio *newfolio, struct folio *folio)
}
EXPORT_SYMBOL(folio_migrate_flags);
+DEFINE_STATIC_CALL(_folios_copy, folios_mc_copy);
+DEFINE_STATIC_CALL(_can_offc_migrate, can_offc_migrate);
+
+#ifdef CONFIG_OFFC_MIGRATION
+void srcu_mig_cb(struct rcu_head *head)
+{
+ static_call_query(_folios_copy);
+}
+
+void offc_update_migrator(struct migrator *mig)
+{
+ int index;
+
+ mutex_lock(&migrator_mut);
+ index = srcu_read_lock(&mig_srcu);
+ strscpy(migrator.name, mig ? mig->name : "kernel", MIGRATOR_NAME_LEN);
+ static_call_update(_folios_copy, mig ? mig->migrate_offc : folios_mc_copy);
+ static_call_update(_can_offc_migrate, mig ? mig->can_migrate_offc : can_offc_migrate);
+ if (READ_ONCE(migrator.owner))
+ module_put(migrator.owner);
+ xchg(&migrator.owner, mig ? mig->owner : NULL);
+ if (READ_ONCE(migrator.owner))
+ try_module_get(migrator.owner);
+ srcu_read_unlock(&mig_srcu, index);
+ mutex_unlock(&migrator_mut);
+ call_srcu(&mig_srcu, &migrator.srcu_head, srcu_mig_cb);
+ srcu_barrier(&mig_srcu);
+}
+
+#endif /* CONFIG_OFFC_MIGRATION */
+
/************************************************************
* Migration functions
***********************************************************/
@@ -1028,11 +1060,15 @@ static int _move_to_new_folio_prep(struct folio *dst, struct folio *src,
{
int rc = -EAGAIN;
bool is_lru = !__folio_test_movable(src);
+ bool can_migrate;
VM_BUG_ON_FOLIO(!folio_test_locked(src), src);
VM_BUG_ON_FOLIO(!folio_test_locked(dst), dst);
- if (likely(is_lru)) {
+ can_migrate = static_call(_can_offc_migrate)(dst, src);
+ if (unlikely(!can_migrate))
+ rc = -EAGAIN;
+ else if (likely(is_lru)) {
struct address_space *mapping = folio_mapping(src);
if (!mapping)
@@ -1868,7 +1904,10 @@ static void migrate_folios_batch_move(struct list_head *src_folios,
goto out;
/* Batch copy the folios */
- rc = folios_mc_copy(dst_folios, src_folios, nr_batched_folios);
+ rc = static_call(_folios_copy)(dst_folios, src_folios, nr_batched_folios);
+ /* TODO: Is there a better way of handling the poison
+ * recover for batch copy and falling back to serial copy?
+ */
/* TODO: Is there a better way of handling the poison
* recover for batch copy, instead of falling back to serial copy?
diff --git a/mm/migrate_offc.c b/mm/migrate_offc.c
new file mode 100644
index 000000000000..c632928a7c27
--- /dev/null
+++ b/mm/migrate_offc.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/migrate.h>
+#include <linux/migrate_offc.h>
+#include <linux/rculist.h>
+#include <linux/static_call.h>
+
+atomic_t dispatch_to_offc = ATOMIC_INIT(0);
+EXPORT_SYMBOL_GPL(dispatch_to_offc);
+
+DEFINE_MUTEX(migrator_mut);
+DEFINE_SRCU(mig_srcu);
+
+struct migrator migrator = {
+ .name = "kernel",
+ .migrate_offc = folios_mc_copy,
+ .can_migrate_offc = can_offc_migrate,
+ .srcu_head.func = srcu_mig_cb,
+ .owner = NULL,
+};
+
+bool can_offc_migrate(struct folio *dst, struct folio *src)
+{
+ return true;
+}
+EXPORT_SYMBOL_GPL(can_offc_migrate);
+
+void start_offloading(struct migrator *m)
+{
+ int offloading = 0;
+
+ pr_info("starting migration offload by %s\n", m->name);
+ offc_update_migrator(m);
+ atomic_try_cmpxchg(&dispatch_to_offc, &offloading, 1);
+}
+EXPORT_SYMBOL_GPL(start_offloading);
+
+void stop_offloading(void)
+{
+ int offloading = 1;
+
+ pr_info("stopping migration offload by %s\n", migrator.name);
+ offc_update_migrator(NULL);
+ atomic_try_cmpxchg(&dispatch_to_offc, &offloading, 0);
+}
+EXPORT_SYMBOL_GPL(stop_offloading);
+
+unsigned char *get_active_migrator_name(void)
+{
+ return migrator.name;
+}
+EXPORT_SYMBOL_GPL(get_active_migrator_name);
--
2.34.1
^ permalink raw reply [flat|nested] 10+ messages in thread* [PATCH RFC V2 6/9] mm/migrate: introduce multi-threaded page copy routine
2025-03-19 19:22 [PATCH RFC V2 0/9] Enhancements to Page Migration with Multi-threading and Batch Offloading to DMA Shivank Garg
` (4 preceding siblings ...)
2025-03-19 19:22 ` [PATCH RFC V2 5/9] mm: add support for copy offload for folio Migration Shivank Garg
@ 2025-03-19 19:22 ` Shivank Garg
2025-03-19 19:22 ` [PATCH RFC V2 7/9] dcbm: add dma core batch migrator for batch page offloading Shivank Garg
` (2 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Shivank Garg @ 2025-03-19 19:22 UTC (permalink / raw)
To: akpm, linux-mm, ziy
Cc: AneeshKumar.KizhakeVeetil, baolin.wang, bharata, david,
gregory.price, honggyu.kim, jane.chu, jhubbard, jon.grimm,
k.shutemov, leesuyeon0506, leillc, liam.howlett, linux-kernel,
mel.gorman, Michael.Day, Raghavendra.KodsaraThimmappa, riel,
rientjes, santosh.shukla, shivankg, shy828301, sj,
wangkefeng.wang, weixugc, willy, ying.huang, anannara,
wei.huang2, Jonathan.Cameron, hyeonggon.yoo, byungchul
From: Zi Yan <ziy@nvidia.com>
Now page copies are batched, multi-threaded page copy can be used to
increase page copy throughput.
Enable using:
echo 1 > /sys/kernel/cpu_mt/offloading
echo NR_THREADS > /sys/kernel/cpu_mt/threads
Disable:
echo 0 > /sys/kernel/cpu_mt/offloading
[Shivank: Convert the original MT copy_pages implementation into a
module, leveraging migrate offload infrastructure and sysfs interface.]
Signed-off-by: Zi Yan <ziy@nvidia.com>
Signed-off-by: Shivank Garg <shivankg@amd.com>
---
drivers/Kconfig | 2 +
drivers/Makefile | 3 +
drivers/migoffcopy/Kconfig | 9 +
drivers/migoffcopy/Makefile | 1 +
drivers/migoffcopy/mtcopy/Makefile | 1 +
drivers/migoffcopy/mtcopy/copy_pages.c | 337 +++++++++++++++++++++++++
mm/migrate.c | 11 +-
7 files changed, 357 insertions(+), 7 deletions(-)
create mode 100644 drivers/migoffcopy/Kconfig
create mode 100644 drivers/migoffcopy/Makefile
create mode 100644 drivers/migoffcopy/mtcopy/Makefile
create mode 100644 drivers/migoffcopy/mtcopy/copy_pages.c
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 7bdad836fc62..2e20eb83cd0b 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -245,4 +245,6 @@ source "drivers/cdx/Kconfig"
source "drivers/dpll/Kconfig"
+source "drivers/migoffcopy/Kconfig"
+
endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index 45d1c3e630f7..4df928a36ea3 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -42,6 +42,9 @@ obj-y += clk/
# really early.
obj-$(CONFIG_DMADEVICES) += dma/
+# Migration copy Offload
+obj-$(CONFIG_OFFC_MIGRATION) += migoffcopy/
+
# SOC specific infrastructure drivers.
obj-y += soc/
obj-$(CONFIG_PM_GENERIC_DOMAINS) += pmdomain/
diff --git a/drivers/migoffcopy/Kconfig b/drivers/migoffcopy/Kconfig
new file mode 100644
index 000000000000..e73698af3e72
--- /dev/null
+++ b/drivers/migoffcopy/Kconfig
@@ -0,0 +1,9 @@
+config MTCOPY_CPU
+ bool "Multi-Threaded Copy with CPU"
+ depends on OFFC_MIGRATION
+ default n
+ help
+ Interface MT COPY CPU driver for batch page migration
+ offloading. Say Y if you want to try offloading with
+ MultiThreaded CPU copy APIs.
+
diff --git a/drivers/migoffcopy/Makefile b/drivers/migoffcopy/Makefile
new file mode 100644
index 000000000000..0a3c356d67e6
--- /dev/null
+++ b/drivers/migoffcopy/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_MTCOPY_CPU) += mtcopy/
diff --git a/drivers/migoffcopy/mtcopy/Makefile b/drivers/migoffcopy/mtcopy/Makefile
new file mode 100644
index 000000000000..b4d7da85eda9
--- /dev/null
+++ b/drivers/migoffcopy/mtcopy/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_MTCOPY_CPU) += copy_pages.o
diff --git a/drivers/migoffcopy/mtcopy/copy_pages.c b/drivers/migoffcopy/mtcopy/copy_pages.c
new file mode 100644
index 000000000000..4c9c7d90c9fd
--- /dev/null
+++ b/drivers/migoffcopy/mtcopy/copy_pages.c
@@ -0,0 +1,337 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Parallel page copy routine.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/init.h>
+#include <linux/sysctl.h>
+#include <linux/sysfs.h>
+#include <linux/highmem.h>
+#include <linux/workqueue.h>
+#include <linux/slab.h>
+#include <linux/migrate.h>
+#include <linux/migrate_offc.h>
+
+#define MAX_NUM_COPY_THREADS 64
+
+unsigned int limit_mt_num = 4;
+static int is_dispatching;
+
+static int copy_page_lists_mt(struct list_head *dst_folios,
+ struct list_head *src_folios, int nr_items);
+static bool can_migrate_mt(struct folio *dst, struct folio *src);
+
+static DEFINE_MUTEX(migratecfg_mutex);
+
+/* CPU Multithreaded Batch Migrator */
+struct migrator cpu_migrator = {
+ .name = "CPU_MT_COPY\0",
+ .migrate_offc = copy_page_lists_mt,
+ .can_migrate_offc = can_migrate_mt,
+ .owner = THIS_MODULE,
+};
+
+struct copy_item {
+ char *to;
+ char *from;
+ unsigned long chunk_size;
+};
+
+struct copy_page_info {
+ struct work_struct copy_page_work;
+ int ret;
+ unsigned long num_items;
+ struct copy_item item_list[];
+};
+
+static unsigned long copy_page_routine(char *vto, char *vfrom,
+ unsigned long chunk_size)
+{
+ return copy_mc_to_kernel(vto, vfrom, chunk_size);
+}
+
+static void copy_page_work_queue_thread(struct work_struct *work)
+{
+ struct copy_page_info *my_work = (struct copy_page_info *)work;
+ int i;
+
+ my_work->ret = 0;
+ for (i = 0; i < my_work->num_items; ++i)
+ my_work->ret |= !!copy_page_routine(my_work->item_list[i].to,
+ my_work->item_list[i].from,
+ my_work->item_list[i].chunk_size);
+}
+
+static ssize_t mt_offloading_set(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ int ccode;
+ int action;
+
+ ccode = kstrtoint(buf, 0, &action);
+ if (ccode) {
+ pr_debug("(%s:) error parsing input %s\n", __func__, buf);
+ return ccode;
+ }
+
+ /*
+ * action is 0: User wants to disable MT offloading.
+ * action is 1: User wants to enable MT offloading.
+ */
+ switch (action) {
+ case 0:
+ mutex_lock(&migratecfg_mutex);
+ if (is_dispatching == 1) {
+ stop_offloading();
+ is_dispatching = 0;
+ } else
+ pr_debug("MT migration offloading is already OFF\n");
+ mutex_unlock(&migratecfg_mutex);
+ break;
+ case 1:
+ mutex_lock(&migratecfg_mutex);
+ if (is_dispatching == 0) {
+ start_offloading(&cpu_migrator);
+ is_dispatching = 1;
+ } else
+ pr_debug("MT migration offloading is already ON\n");
+ mutex_unlock(&migratecfg_mutex);
+ break;
+ default:
+ pr_debug("input should be zero or one, parsed as %d\n", action);
+ }
+ return sizeof(action);
+}
+
+static ssize_t mt_offloading_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", is_dispatching);
+}
+
+static ssize_t mt_threads_set(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ int ccode;
+ unsigned int threads;
+
+ ccode = kstrtouint(buf, 0, &threads);
+ if (ccode) {
+ pr_debug("(%s:) error parsing input %s\n", __func__, buf);
+ return ccode;
+ }
+
+ if (threads > 0 && threads <= MAX_NUM_COPY_THREADS) {
+ mutex_lock(&migratecfg_mutex);
+ limit_mt_num = threads;
+ mutex_unlock(&migratecfg_mutex);
+ pr_debug("MT threads set to %u\n", limit_mt_num);
+ } else {
+ pr_debug("Invalid thread count. Must be between 1 and %d\n",MAX_NUM_COPY_THREADS);
+ return -EINVAL;
+ }
+
+ return count;
+}
+
+static ssize_t mt_threads_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%u\n", limit_mt_num);
+}
+
+static bool can_migrate_mt(struct folio *dst, struct folio *src)
+{
+ return true;
+}
+
+int copy_page_lists_mt(struct list_head *dst_folios,
+ struct list_head *src_folios, int nr_items)
+{
+ struct copy_page_info *work_items[MAX_NUM_COPY_THREADS] = {0};
+ unsigned int total_mt_num = limit_mt_num;
+ struct folio *src, *src2, *dst, *dst2;
+ int max_items_per_thread;
+ int item_idx;
+ int err = 0;
+ int cpu;
+ int i;
+
+ if (IS_ENABLED(CONFIG_HIGHMEM))
+ return -ENOTSUPP;
+
+ if (total_mt_num > MAX_NUM_COPY_THREADS)
+ total_mt_num = MAX_NUM_COPY_THREADS;
+
+ /* Each threads get part of each page, if nr_items < totla_mt_num */
+ if (nr_items < total_mt_num)
+ max_items_per_thread = nr_items;
+ else
+ max_items_per_thread = (nr_items / total_mt_num) +
+ ((nr_items % total_mt_num) ? 1 : 0);
+
+
+ for (cpu = 0; cpu < total_mt_num; ++cpu) {
+ work_items[cpu] = kzalloc(sizeof(struct copy_page_info) +
+ sizeof(struct copy_item) *
+ max_items_per_thread,
+ GFP_NOWAIT);
+ if (!work_items[cpu]) {
+ err = -ENOMEM;
+ goto free_work_items;
+ }
+ }
+
+ if (nr_items < total_mt_num) {
+ for (cpu = 0; cpu < total_mt_num; ++cpu) {
+ INIT_WORK((struct work_struct *)work_items[cpu],
+ copy_page_work_queue_thread);
+ work_items[cpu]->num_items = max_items_per_thread;
+ }
+
+ item_idx = 0;
+ dst = list_first_entry(dst_folios, struct folio, lru);
+ dst2 = list_next_entry(dst, lru);
+ list_for_each_entry_safe(src, src2, src_folios, lru) {
+ unsigned long chunk_size = PAGE_SIZE * folio_nr_pages(src) / total_mt_num;
+ char *vfrom = page_address(&src->page);
+ char *vto = page_address(&dst->page);
+
+ VM_WARN_ON(PAGE_SIZE * folio_nr_pages(src) % total_mt_num);
+ VM_WARN_ON(folio_nr_pages(dst) != folio_nr_pages(src));
+
+ for (cpu = 0; cpu < total_mt_num; ++cpu) {
+ work_items[cpu]->item_list[item_idx].to =
+ vto + chunk_size * cpu;
+ work_items[cpu]->item_list[item_idx].from =
+ vfrom + chunk_size * cpu;
+ work_items[cpu]->item_list[item_idx].chunk_size =
+ chunk_size;
+ }
+
+ item_idx++;
+ dst = dst2;
+ dst2 = list_next_entry(dst, lru);
+ }
+
+ for (cpu = 0; cpu < total_mt_num; ++cpu)
+ queue_work(system_unbound_wq,
+ (struct work_struct *)work_items[cpu]);
+ } else {
+ int num_xfer_per_thread = nr_items / total_mt_num;
+ int per_cpu_item_idx;
+
+
+ for (cpu = 0; cpu < total_mt_num; ++cpu) {
+ INIT_WORK((struct work_struct *)work_items[cpu],
+ copy_page_work_queue_thread);
+
+ work_items[cpu]->num_items = num_xfer_per_thread +
+ (cpu < (nr_items % total_mt_num));
+ }
+
+ cpu = 0;
+ per_cpu_item_idx = 0;
+ item_idx = 0;
+ dst = list_first_entry(dst_folios, struct folio, lru);
+ dst2 = list_next_entry(dst, lru);
+ list_for_each_entry_safe(src, src2, src_folios, lru) {
+ work_items[cpu]->item_list[per_cpu_item_idx].to =
+ page_address(&dst->page);
+ work_items[cpu]->item_list[per_cpu_item_idx].from =
+ page_address(&src->page);
+ work_items[cpu]->item_list[per_cpu_item_idx].chunk_size =
+ PAGE_SIZE * folio_nr_pages(src);
+
+ VM_WARN_ON(folio_nr_pages(dst) !=
+ folio_nr_pages(src));
+
+ per_cpu_item_idx++;
+ item_idx++;
+ dst = dst2;
+ dst2 = list_next_entry(dst, lru);
+
+ if (per_cpu_item_idx == work_items[cpu]->num_items) {
+ queue_work(system_unbound_wq,
+ (struct work_struct *)work_items[cpu]);
+ per_cpu_item_idx = 0;
+ cpu++;
+ }
+ }
+ if (item_idx != nr_items)
+ pr_warn("%s: only %d out of %d pages are transferred\n",
+ __func__, item_idx - 1, nr_items);
+ }
+
+ /* Wait until it finishes */
+ for (i = 0; i < total_mt_num; ++i) {
+ flush_work((struct work_struct *)work_items[i]);
+ /* retry if any copy fails */
+ if (work_items[i]->ret)
+ err = -EAGAIN;
+ }
+
+free_work_items:
+ for (cpu = 0; cpu < total_mt_num; ++cpu)
+ kfree(work_items[cpu]);
+
+ return err;
+}
+
+static struct kobject *mt_kobj_ref;
+static struct kobj_attribute mt_offloading_attribute = __ATTR(offloading, 0664,
+ mt_offloading_show, mt_offloading_set);
+static struct kobj_attribute mt_threads_attribute = __ATTR(threads, 0664,
+ mt_threads_show, mt_threads_set);
+
+static int __init cpu_mt_module_init(void)
+{
+ int ret = 0;
+
+ mt_kobj_ref = kobject_create_and_add("cpu_mt", kernel_kobj);
+ if (!mt_kobj_ref)
+ return -ENOMEM;
+
+ ret = sysfs_create_file(mt_kobj_ref, &mt_offloading_attribute.attr);
+ if (ret)
+ goto out_offloading;
+
+ ret = sysfs_create_file(mt_kobj_ref, &mt_threads_attribute.attr);
+ if (ret)
+ goto out_threads;
+
+ is_dispatching = 0;
+
+ return 0;
+
+out_threads:
+ sysfs_remove_file(mt_kobj_ref, &mt_offloading_attribute.attr);
+out_offloading:
+ kobject_put(mt_kobj_ref);
+ return ret;
+}
+
+static void __exit cpu_mt_module_exit(void)
+{
+ /* Stop the MT offloading to unload the module */
+ mutex_lock(&migratecfg_mutex);
+ if (is_dispatching == 1) {
+ stop_offloading();
+ is_dispatching = 0;
+ }
+ mutex_unlock(&migratecfg_mutex);
+
+ sysfs_remove_file(mt_kobj_ref, &mt_threads_attribute.attr);
+ sysfs_remove_file(mt_kobj_ref, &mt_offloading_attribute.attr);
+ kobject_put(mt_kobj_ref);
+}
+
+module_init(cpu_mt_module_init);
+module_exit(cpu_mt_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Zi Yan");
+MODULE_DESCRIPTION("CPU_MT_COPY"); /* CPU Multithreaded Batch Migrator */
diff --git a/mm/migrate.c b/mm/migrate.c
index 862a3d1eff60..e74dbc7a4758 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1831,18 +1831,13 @@ static void migrate_folios_batch_move(struct list_head *src_folios,
int *nr_retry_pages)
{
struct folio *folio, *folio2, *dst, *dst2;
- int rc, nr_pages = 0, nr_batched_folios = 0;
+ int rc, nr_pages = 0, total_nr_pages = 0, nr_batched_folios = 0;
int old_page_state = 0;
struct anon_vma *anon_vma = NULL;
int is_thp = 0;
LIST_HEAD(err_src);
LIST_HEAD(err_dst);
- if (mode != MIGRATE_ASYNC) {
- *retry += 1;
- return;
- }
-
/*
* Iterate over the list of locked src/dst folios to copy the metadata
*/
@@ -1892,8 +1887,10 @@ static void migrate_folios_batch_move(struct list_head *src_folios,
old_page_state & PAGE_WAS_MAPPED,
anon_vma, true, ret_folios);
migrate_folio_undo_dst(dst, true, put_new_folio, private);
- } else /* MIGRATEPAGE_SUCCESS */
+ } else { /* MIGRATEPAGE_SUCCESS */
+ total_nr_pages += nr_pages;
nr_batched_folios++;
+ }
dst = dst2;
dst2 = list_next_entry(dst, lru);
--
2.34.1
^ permalink raw reply [flat|nested] 10+ messages in thread* [PATCH RFC V2 7/9] dcbm: add dma core batch migrator for batch page offloading
2025-03-19 19:22 [PATCH RFC V2 0/9] Enhancements to Page Migration with Multi-threading and Batch Offloading to DMA Shivank Garg
` (5 preceding siblings ...)
2025-03-19 19:22 ` [PATCH RFC V2 6/9] mm/migrate: introduce multi-threaded page copy routine Shivank Garg
@ 2025-03-19 19:22 ` Shivank Garg
2025-03-19 19:22 ` [PATCH RFC V2 8/9] adjust NR_MAX_BATCHED_MIGRATION for testing Shivank Garg
2025-03-19 19:22 ` [PATCH RFC V2 9/9] mtcopy: spread threads across die " Shivank Garg
8 siblings, 0 replies; 10+ messages in thread
From: Shivank Garg @ 2025-03-19 19:22 UTC (permalink / raw)
To: akpm, linux-mm, ziy
Cc: AneeshKumar.KizhakeVeetil, baolin.wang, bharata, david,
gregory.price, honggyu.kim, jane.chu, jhubbard, jon.grimm,
k.shutemov, leesuyeon0506, leillc, liam.howlett, linux-kernel,
mel.gorman, Michael.Day, Raghavendra.KodsaraThimmappa, riel,
rientjes, santosh.shukla, shivankg, shy828301, sj,
wangkefeng.wang, weixugc, willy, ying.huang, anannara,
wei.huang2, Jonathan.Cameron, hyeonggon.yoo, byungchul
The dcbm (DMA core batch migrator) provides a generic interface using
DMAEngine for end-to-end testing of the batch page migration offload
feature.
Enable DCBM offload:
echo 1 > /sys/kernel/dcbm/offloading
echo NR_DMA_CHAN_TO_USE > /sys/kernel/dcbm/nr_dma_chan
Disable DCBM offload:
echo 0 > /sys/kernel/dcbm/offloading
Signed-off-by: Shivank Garg <shivankg@amd.com>
---
drivers/migoffcopy/Kconfig | 8 +
drivers/migoffcopy/Makefile | 1 +
drivers/migoffcopy/dcbm/Makefile | 1 +
drivers/migoffcopy/dcbm/dcbm.c | 393 +++++++++++++++++++++++++++++++
4 files changed, 403 insertions(+)
create mode 100644 drivers/migoffcopy/dcbm/Makefile
create mode 100644 drivers/migoffcopy/dcbm/dcbm.c
diff --git a/drivers/migoffcopy/Kconfig b/drivers/migoffcopy/Kconfig
index e73698af3e72..c1b2eff7650d 100644
--- a/drivers/migoffcopy/Kconfig
+++ b/drivers/migoffcopy/Kconfig
@@ -6,4 +6,12 @@ config MTCOPY_CPU
Interface MT COPY CPU driver for batch page migration
offloading. Say Y if you want to try offloading with
MultiThreaded CPU copy APIs.
+config DCBM_DMA
+ bool "DMA Core Batch Migrator"
+ depends on OFFC_MIGRATION && DMA_ENGINE
+ default n
+ help
+ Interface DMA driver for batch page migration offloading.
+ Say Y if you want to try offloading with DMAEngine APIs
+ based driver.
diff --git a/drivers/migoffcopy/Makefile b/drivers/migoffcopy/Makefile
index 0a3c356d67e6..dedc86ff54c1 100644
--- a/drivers/migoffcopy/Makefile
+++ b/drivers/migoffcopy/Makefile
@@ -1 +1,2 @@
obj-$(CONFIG_MTCOPY_CPU) += mtcopy/
+obj-$(CONFIG_DCBM_DMA) += dcbm/
diff --git a/drivers/migoffcopy/dcbm/Makefile b/drivers/migoffcopy/dcbm/Makefile
new file mode 100644
index 000000000000..56ba47cce0f1
--- /dev/null
+++ b/drivers/migoffcopy/dcbm/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_DCBM_DMA) += dcbm.o
diff --git a/drivers/migoffcopy/dcbm/dcbm.c b/drivers/migoffcopy/dcbm/dcbm.c
new file mode 100644
index 000000000000..185d8d2502fd
--- /dev/null
+++ b/drivers/migoffcopy/dcbm/dcbm.c
@@ -0,0 +1,393 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * DMA batch-offlading interface driver
+ *
+ * Copyright (C) 2024 Advanced Micro Devices, Inc.
+ */
+
+/*
+ * This code exemplifies how to leverage mm layer's migration offload support
+ * for batch page offloading using DMA Engine APIs.
+ * Developers can use this template to write interface for custom hardware
+ * accelerators with specialized capabilities for batch page migration.
+ * This interface driver is end-to-end working and can be used for testing the
+ * patch series without special hardware given DMAEngine support is available.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/sysfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/migrate.h>
+#include <linux/migrate_offc.h>
+#include <linux/printk.h>
+#include <linux/sysfs.h>
+
+#define MAX_DMA_CHANNELS 16
+
+static int is_dispatching;
+static int nr_dma_chan;
+
+static int folios_copy_dma(struct list_head *dst_list, struct list_head *src_list, int folios_cnt);
+static int folios_copy_dma_parallel(struct list_head *dst_list, struct list_head *src_list, int folios_cnt, int thread_count);
+static bool can_migrate_dma(struct folio *dst, struct folio *src);
+
+static DEFINE_MUTEX(migratecfg_mutex);
+
+/* DMA Core Batch Migrator */
+struct migrator dmigrator = {
+ .name = "DCBM\0",
+ .migrate_offc = folios_copy_dma,
+ .can_migrate_offc = can_migrate_dma,
+ .owner = THIS_MODULE,
+};
+
+static ssize_t offloading_set(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ int ccode;
+ int action;
+
+ ccode = kstrtoint(buf, 0, &action);
+ if (ccode) {
+ pr_debug("(%s:) error parsing input %s\n", __func__, buf);
+ return ccode;
+ }
+
+ /*
+ * action is 0: User wants to disable DMA offloading.
+ * action is 1: User wants to enable DMA offloading.
+ */
+ switch (action) {
+ case 0:
+ mutex_lock(&migratecfg_mutex);
+ if (is_dispatching == 1) {
+ stop_offloading();
+ is_dispatching = 0;
+ } else
+ pr_debug("migration offloading is already OFF\n");
+ mutex_unlock(&migratecfg_mutex);
+ break;
+ case 1:
+ mutex_lock(&migratecfg_mutex);
+ if (is_dispatching == 0) {
+ start_offloading(&dmigrator);
+ is_dispatching = 1;
+ } else
+ pr_debug("migration offloading is already ON\n");
+ mutex_unlock(&migratecfg_mutex);
+ break;
+ default:
+ pr_debug("input should be zero or one, parsed as %d\n", action);
+ }
+ return sizeof(action);
+}
+
+static ssize_t offloading_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", is_dispatching);
+}
+
+static ssize_t nr_dma_chan_set(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ int ccode;
+ int action;
+
+ ccode = kstrtoint(buf, 0, &action);
+ if (ccode) {
+ pr_err("(%s:) error parsing input %s\n", __func__, buf);
+ return ccode;
+ }
+
+ if (action < 1) {
+ pr_err("%s: invalid value, at least 1 channel\n",__func__);
+ return -EINVAL;
+ }
+ if (action >= MAX_DMA_CHANNELS)
+ action = MAX_DMA_CHANNELS;
+
+ mutex_lock(&migratecfg_mutex);
+ nr_dma_chan = action;
+ mutex_unlock(&migratecfg_mutex);
+
+ return sizeof(action);
+}
+
+static ssize_t nr_dma_chan_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", nr_dma_chan);
+}
+
+static bool can_migrate_dma(struct folio *dst, struct folio *src)
+{
+
+// printk("folio_size %d\n",folio_size(src));
+ if (folio_test_hugetlb(src) || folio_test_hugetlb(dst) ||
+ folio_has_private(src) || folio_has_private(dst) ||
+ (folio_nr_pages(src) != folio_nr_pages(dst))) {
+ pr_err("can NOT DMA migrate this folio %p\n",src);
+ return false;
+ }
+ return true;
+}
+
+/**
+ * DMA channel and track its transfers
+ */
+struct dma_channel_work {
+ struct dma_chan *chan;
+ struct completion done;
+ int active_transfers;
+ spinlock_t lock;
+};
+
+/**
+ * Callback for DMA completion
+ */
+static void folios_dma_completion_callback(void *param)
+{
+ struct dma_channel_work *chan_work = param;
+
+ spin_lock(&chan_work->lock);
+ chan_work->active_transfers--;
+ if (chan_work->active_transfers == 0)
+ complete(&chan_work->done);
+ spin_unlock(&chan_work->lock);
+}
+
+/**
+ * process dma transfer: preparation part: map, prep_memcpy
+ */
+static int process_folio_dma_transfer(struct dma_channel_work *chan_work,
+ struct folio *src, struct folio *dst)
+{
+ struct dma_chan *chan = chan_work->chan;
+ struct dma_device *dev = chan->device;
+ struct device *dma_dev = dmaengine_get_dma_device(chan);
+ dma_cookie_t cookie;
+ struct dma_async_tx_descriptor *tx;
+ enum dma_ctrl_flags flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
+ dma_addr_t srcdma_handle, dstdma_handle;
+ size_t data_size = folio_size(src);
+
+ /* Map source and destination pages */
+ srcdma_handle = dma_map_page(dma_dev, &src->page, 0, data_size, DMA_TO_DEVICE);
+ if (dma_mapping_error(dma_dev, srcdma_handle)) {
+ pr_err("src mapping error\n");
+ return -ENOMEM;
+ }
+
+ dstdma_handle = dma_map_page(dma_dev, &dst->page, 0, data_size, DMA_FROM_DEVICE);
+ if (dma_mapping_error(dma_dev, dstdma_handle)) {
+ pr_err("dst mapping error\n");
+ dma_unmap_page(dma_dev, srcdma_handle, data_size, DMA_TO_DEVICE);
+ return -ENOMEM;
+ }
+
+ /* Prepare DMA descriptor */
+ tx = dev->device_prep_dma_memcpy(chan, dstdma_handle, srcdma_handle,
+ data_size, flags);
+ if (unlikely(!tx)) {
+ pr_err("prep_dma_memcpy error\n");
+ dma_unmap_page(dma_dev, dstdma_handle, data_size, DMA_FROM_DEVICE);
+ dma_unmap_page(dma_dev, srcdma_handle, data_size, DMA_TO_DEVICE);
+ return -EBUSY;
+ }
+
+ /* Set up completion callback */
+ tx->callback = folios_dma_completion_callback;
+ tx->callback_param = chan_work;
+
+ /* Submit DMA transaction */
+ spin_lock(&chan_work->lock);
+ chan_work->active_transfers++;
+ spin_unlock(&chan_work->lock);
+
+ cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ pr_err("dma_submit_error\n");
+ spin_lock(&chan_work->lock);
+ chan_work->active_transfers--;
+ spin_unlock(&chan_work->lock);
+ dma_unmap_page(dma_dev, dstdma_handle, data_size, DMA_FROM_DEVICE);
+ dma_unmap_page(dma_dev, srcdma_handle, data_size, DMA_TO_DEVICE);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * Copy folios using DMA in parallel.
+ * Divide into chunks, submit to DMA channels.
+ * if error, falls back to CPU
+ * Note: return 0 for all cases as error is taken care.
+ * TODO: Add poison recovery support.
+ */
+int folios_copy_dma_parallel(struct list_head *dst_list,
+ struct list_head *src_list,
+ int folios_cnt_total, int thread_count)
+{
+ struct dma_channel_work *chan_works;
+ struct dma_chan **channels;
+ int i, actual_channels = 0;
+ struct folio *src, *dst;
+ dma_cap_mask_t mask;
+ int channel_idx = 0;
+ int failed = 0;
+ int ret;
+
+ /* TODO: optimise actual number of channels needed
+ at what point DMA set-up overheads < mig cost for N folio*/
+ thread_count = min(thread_count, folios_cnt_total);
+
+ /* Allocate memory for channels */
+ channels = kmalloc_array(thread_count, sizeof(struct dma_chan *), GFP_KERNEL);
+ if (unlikely(!channels)) {
+ pr_err("failed to allocate memory for channels\n");
+ folios_copy(dst_list, src_list, folios_cnt_total);
+ return 0;
+ }
+
+ /* Request DMA channels */
+ dma_cap_zero(mask);
+ dma_cap_set(DMA_MEMCPY, mask);
+ for (i = 0; i < thread_count; i++) {
+ channels[i] = dma_request_channel(mask, NULL, NULL);
+ if (!channels[i]) {
+ pr_err("could only allocate %d DMA channels\n", i);
+ break;
+ }
+ actual_channels++;
+ }
+
+ if (unlikely(actual_channels == 0)) {
+ pr_err("couldn't allocate any DMA channels, falling back to CPU copy\n");
+ kfree(channels);
+ folios_copy(dst_list, src_list, folios_cnt_total);
+ return 0;
+ }
+
+ /* Allocate work structures */
+ chan_works = kmalloc_array(actual_channels, sizeof(*chan_works), GFP_KERNEL);
+ if (unlikely(!chan_works)) {
+ pr_err("failed to allocate memory for work structures\n");
+ for (i = 0; i < actual_channels; i++)
+ dma_release_channel(channels[i]);
+ kfree(channels);
+ folios_copy(dst_list, src_list, folios_cnt_total);
+ return 0;
+ }
+
+ /* Initialize work structures */
+ for (i = 0; i < actual_channels; i++) {
+ chan_works[i].chan = channels[i];
+ init_completion(&chan_works[i].done);
+ chan_works[i].active_transfers = 0;
+ spin_lock_init(&chan_works[i].lock);
+ }
+
+ /* STEP 1: Submit all DMA transfers across all channels */
+ dst = list_first_entry(dst_list, struct folio, lru);
+ list_for_each_entry(src, src_list, lru) {
+ ret = process_folio_dma_transfer(&chan_works[channel_idx], src, dst);
+ if (unlikely(ret)) {
+ /* Fallback to CPU */
+ folio_copy(dst, src);
+ failed++;
+ }
+
+ channel_idx = (channel_idx + 1) % actual_channels;
+
+ dst = list_next_entry(dst, lru);
+ }
+
+ /* STEP 2: Issue all pending DMA requests */
+ for (i = 0; i < actual_channels; i++) {
+ dma_async_issue_pending(chan_works[i].chan);
+ }
+
+ /* STEP 3: Wait for all DMA operations to complete */
+ for (i = 0; i < actual_channels; i++) {
+ wait_for_completion(&chan_works[i].done);
+ }
+
+ if (failed)
+ pr_err("processed %d fallback with CPU\n", failed);
+
+ /* Release all resources */
+ for (i = 0; i < actual_channels; i++) {
+ dma_release_channel(channels[i]);
+ }
+
+ kfree(chan_works);
+ kfree(channels);
+
+ return 0;
+}
+
+/**
+ * Similar to folios_copy but use dma.
+ */
+static int folios_copy_dma(struct list_head *dst_list,
+ struct list_head *src_list,
+ int folios_cnt)
+{
+ return folios_copy_dma_parallel(dst_list, src_list, folios_cnt, nr_dma_chan);
+}
+
+static struct kobject *kobj_ref;
+static struct kobj_attribute offloading_attribute = __ATTR(offloading, 0664,
+ offloading_show, offloading_set);
+static struct kobj_attribute nr_dma_chan_attribute = __ATTR(nr_dma_chan, 0664,
+ nr_dma_chan_show, nr_dma_chan_set);
+
+static int __init dma_module_init(void)
+{
+ int ret = 0;
+
+ kobj_ref = kobject_create_and_add("dcbm", kernel_kobj);
+ if (!kobj_ref)
+ return -ENOMEM;
+
+ ret = sysfs_create_file(kobj_ref, &offloading_attribute.attr);
+ if (ret)
+ goto out;
+
+ ret = sysfs_create_file(kobj_ref, &nr_dma_chan_attribute.attr);
+ if (ret)
+ goto out;
+
+ is_dispatching = 0;
+ nr_dma_chan = 1;
+
+ return 0;
+out:
+ kobject_put(kobj_ref);
+ return ret;
+}
+
+static void __exit dma_module_exit(void)
+{
+ /* Stop the DMA offloading to unload the module */
+ sysfs_remove_file(kobj_ref, &offloading_attribute.attr);
+ sysfs_remove_file(kobj_ref, &nr_dma_chan_attribute.attr);
+ kobject_put(kobj_ref);
+}
+
+module_init(dma_module_init);
+module_exit(dma_module_exit);
+
+/* DMA Core Batch Migrator */
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Shivank Garg");
+MODULE_DESCRIPTION("DCBM");
--
2.34.1
^ permalink raw reply [flat|nested] 10+ messages in thread* [PATCH RFC V2 8/9] adjust NR_MAX_BATCHED_MIGRATION for testing
2025-03-19 19:22 [PATCH RFC V2 0/9] Enhancements to Page Migration with Multi-threading and Batch Offloading to DMA Shivank Garg
` (6 preceding siblings ...)
2025-03-19 19:22 ` [PATCH RFC V2 7/9] dcbm: add dma core batch migrator for batch page offloading Shivank Garg
@ 2025-03-19 19:22 ` Shivank Garg
2025-03-19 19:22 ` [PATCH RFC V2 9/9] mtcopy: spread threads across die " Shivank Garg
8 siblings, 0 replies; 10+ messages in thread
From: Shivank Garg @ 2025-03-19 19:22 UTC (permalink / raw)
To: akpm, linux-mm, ziy
Cc: AneeshKumar.KizhakeVeetil, baolin.wang, bharata, david,
gregory.price, honggyu.kim, jane.chu, jhubbard, jon.grimm,
k.shutemov, leesuyeon0506, leillc, liam.howlett, linux-kernel,
mel.gorman, Michael.Day, Raghavendra.KodsaraThimmappa, riel,
rientjes, santosh.shukla, shivankg, shy828301, sj,
wangkefeng.wang, weixugc, willy, ying.huang, anannara,
wei.huang2, Jonathan.Cameron, hyeonggon.yoo, byungchul
From: Zi Yan <ziy@nvidia.com>
change NR_MAX_BATCHED_MIGRATION to HPAGE_PUD_NR to allow batching THP
copies.
These are for testing purpose only.
Signed-off-by: Zi Yan <ziy@nvidia.com>
Signed-off-by: Shivank Garg <shivankg@amd.com>
---
mm/migrate.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index e74dbc7a4758..f9eea16a975d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1653,7 +1653,7 @@ static inline int try_split_folio(struct folio *folio, struct list_head *split_f
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define NR_MAX_BATCHED_MIGRATION HPAGE_PMD_NR
+#define NR_MAX_BATCHED_MIGRATION HPAGE_PUD_NR
#else
#define NR_MAX_BATCHED_MIGRATION 512
#endif
--
2.34.1
^ permalink raw reply [flat|nested] 10+ messages in thread* [PATCH RFC V2 9/9] mtcopy: spread threads across die for testing
2025-03-19 19:22 [PATCH RFC V2 0/9] Enhancements to Page Migration with Multi-threading and Batch Offloading to DMA Shivank Garg
` (7 preceding siblings ...)
2025-03-19 19:22 ` [PATCH RFC V2 8/9] adjust NR_MAX_BATCHED_MIGRATION for testing Shivank Garg
@ 2025-03-19 19:22 ` Shivank Garg
8 siblings, 0 replies; 10+ messages in thread
From: Shivank Garg @ 2025-03-19 19:22 UTC (permalink / raw)
To: akpm, linux-mm, ziy
Cc: AneeshKumar.KizhakeVeetil, baolin.wang, bharata, david,
gregory.price, honggyu.kim, jane.chu, jhubbard, jon.grimm,
k.shutemov, leesuyeon0506, leillc, liam.howlett, linux-kernel,
mel.gorman, Michael.Day, Raghavendra.KodsaraThimmappa, riel,
rientjes, santosh.shukla, shivankg, shy828301, sj,
wangkefeng.wang, weixugc, willy, ying.huang, anannara,
wei.huang2, Jonathan.Cameron, hyeonggon.yoo, byungchul
Select CPUs using sysfs
For testing purpose only.
Signed-off-by: Shivank Garg <shivankg@amd.com>
---
drivers/migoffcopy/mtcopy/copy_pages.c | 77 +++++++++++++++++++++++++-
1 file changed, 74 insertions(+), 3 deletions(-)
diff --git a/drivers/migoffcopy/mtcopy/copy_pages.c b/drivers/migoffcopy/mtcopy/copy_pages.c
index 4c9c7d90c9fd..5178e6846890 100644
--- a/drivers/migoffcopy/mtcopy/copy_pages.c
+++ b/drivers/migoffcopy/mtcopy/copy_pages.c
@@ -15,11 +15,37 @@
#include <linux/migrate.h>
#include <linux/migrate_offc.h>
-#define MAX_NUM_COPY_THREADS 64
+#define MAX_NUM_COPY_THREADS 32
unsigned int limit_mt_num = 4;
static int is_dispatching;
+static int cpuselect = 0;
+
+// spread across die
+static const int cpu_id_list_0[] =
+ {0, 8, 16, 24,
+ 32, 40, 48, 56,
+ 64, 72, 80, 88,
+ 96, 104, 112, 120,
+ 128, 136, 144, 152,
+ 160, 168, 176, 184,
+ 192, 200, 208, 216,
+ 224, 232, 240, 248};
+
+// don't spread, fill the die
+static const int cpu_id_list_1[] =
+ {0, 1, 2, 3,
+ 4, 5, 6, 7,
+ 8, 9, 10, 11,
+ 12, 13, 14, 15,
+ 16, 17, 18, 19,
+ 20, 21, 22, 23,
+ 24, 25, 26, 27,
+ 28, 29, 30, 31};
+
+int cpu_id_list[32] = {0};
+
static int copy_page_lists_mt(struct list_head *dst_folios,
struct list_head *src_folios, int nr_items);
static bool can_migrate_mt(struct folio *dst, struct folio *src);
@@ -143,6 +169,40 @@ static ssize_t mt_threads_show(struct kobject *kobj,
return sysfs_emit(buf, "%u\n", limit_mt_num);
}
+static ssize_t mt_cpuselect_set(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ int ccode;
+ unsigned int cpuconfig;
+
+ ccode = kstrtouint(buf, 0, &cpuconfig);
+ if (ccode) {
+ pr_debug("(%s:) error parsing input %s\n", __func__, buf);
+ return ccode;
+ }
+ mutex_lock(&migratecfg_mutex);
+ cpuselect = cpuconfig;
+ switch (cpuselect) {
+ case 1:
+ memcpy(cpu_id_list, cpu_id_list_1, MAX_NUM_COPY_THREADS*sizeof(int));
+ break;
+ default:
+ memcpy(cpu_id_list, cpu_id_list_0, MAX_NUM_COPY_THREADS*sizeof(int));
+ break;
+ }
+
+ mutex_unlock(&migratecfg_mutex);
+
+ return count;
+}
+
+
+static ssize_t mt_cpuselect_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%u\n", cpuselect);
+}
+
static bool can_migrate_mt(struct folio *dst, struct folio *src)
{
return true;
@@ -218,7 +278,7 @@ int copy_page_lists_mt(struct list_head *dst_folios,
}
for (cpu = 0; cpu < total_mt_num; ++cpu)
- queue_work(system_unbound_wq,
+ queue_work_on(cpu_id_list[cpu], system_unbound_wq,
(struct work_struct *)work_items[cpu]);
} else {
int num_xfer_per_thread = nr_items / total_mt_num;
@@ -255,7 +315,7 @@ int copy_page_lists_mt(struct list_head *dst_folios,
dst2 = list_next_entry(dst, lru);
if (per_cpu_item_idx == work_items[cpu]->num_items) {
- queue_work(system_unbound_wq,
+ queue_work_on(cpu_id_list[cpu], system_unbound_wq,
(struct work_struct *)work_items[cpu]);
per_cpu_item_idx = 0;
cpu++;
@@ -286,6 +346,8 @@ static struct kobj_attribute mt_offloading_attribute = __ATTR(offloading, 0664,
mt_offloading_show, mt_offloading_set);
static struct kobj_attribute mt_threads_attribute = __ATTR(threads, 0664,
mt_threads_show, mt_threads_set);
+static struct kobj_attribute mt_cpuselect_attribute = __ATTR(cpuselect, 0664,
+ mt_cpuselect_show, mt_cpuselect_set);
static int __init cpu_mt_module_init(void)
{
@@ -303,10 +365,18 @@ static int __init cpu_mt_module_init(void)
if (ret)
goto out_threads;
+ ret = sysfs_create_file(mt_kobj_ref, &mt_cpuselect_attribute.attr);
+ if (ret)
+ goto out_cpuselect;
+
+ memcpy(cpu_id_list, cpu_id_list_0, MAX_NUM_COPY_THREADS*sizeof(int));
+
is_dispatching = 0;
return 0;
+out_cpuselect:
+ sysfs_remove_file(mt_kobj_ref, &mt_threads_attribute.attr);
out_threads:
sysfs_remove_file(mt_kobj_ref, &mt_offloading_attribute.attr);
out_offloading:
@@ -324,6 +394,7 @@ static void __exit cpu_mt_module_exit(void)
}
mutex_unlock(&migratecfg_mutex);
+ sysfs_remove_file(mt_kobj_ref, &mt_cpuselect_attribute.attr);
sysfs_remove_file(mt_kobj_ref, &mt_threads_attribute.attr);
sysfs_remove_file(mt_kobj_ref, &mt_offloading_attribute.attr);
kobject_put(mt_kobj_ref);
--
2.34.1
^ permalink raw reply [flat|nested] 10+ messages in thread