From: Raghavendra K T <raghavendra.kt@amd.com>
To: <raghavendra.kt@amd.com>
Cc: <AneeshKumar.KizhakeVeetil@arm.com>, <Hasan.Maruf@amd.com>,
<Michael.Day@amd.com>, <akpm@linux-foundation.org>,
<bharata@amd.com>, <dave.hansen@intel.com>, <david@redhat.com>,
<dongjoo.linux.dev@gmail.com>, <feng.tang@intel.com>,
<gourry@gourry.net>, <hannes@cmpxchg.org>, <honggyu.kim@sk.com>,
<hughd@google.com>, <jhubbard@nvidia.com>, <jon.grimm@amd.com>,
<k.shutemov@gmail.com>, <kbusch@meta.com>,
<kmanaouil.dev@gmail.com>, <leesuyeon0506@gmail.com>,
<leillc@google.com>, <liam.howlett@oracle.com>,
<linux-kernel@vger.kernel.org>, <linux-mm@kvack.org>,
<mgorman@techsingularity.net>, <mingo@redhat.com>,
<nadav.amit@gmail.com>, <nphamcs@gmail.com>,
<peterz@infradead.org>, <riel@surriel.com>, <rientjes@google.com>,
<rppt@kernel.org>, <santosh.shukla@amd.com>, <shivankg@amd.com>,
<shy828301@gmail.com>, <sj@kernel.org>, <vbabka@suse.cz>,
<weixugc@google.com>, <willy@infradead.org>,
<ying.huang@linux.alibaba.com>, <ziy@nvidia.com>,
<Jonathan.Cameron@huawei.com>, <dave@stgolabs.net>
Subject: [RFC PATCH V1 04/13] mm: Create a separate kernel thread for migration
Date: Wed, 19 Mar 2025 19:30:19 +0000 [thread overview]
Message-ID: <20250319193028.29514-5-raghavendra.kt@amd.com> (raw)
In-Reply-To: <20250319193028.29514-1-raghavendra.kt@amd.com>
Having independent thread helps in:
- Alleviating the need for multiple scanning threads
- Aids to control batch migration (TBD)
- Migration throttling (TBD)
Signed-off-by: Raghavendra K T <raghavendra.kt@amd.com>
---
mm/kmmscand.c | 157 +++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 154 insertions(+), 3 deletions(-)
diff --git a/mm/kmmscand.c b/mm/kmmscand.c
index a76a58bf37b2..6e96cfab5b85 100644
--- a/mm/kmmscand.c
+++ b/mm/kmmscand.c
@@ -4,6 +4,7 @@
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/mmu_notifier.h>
+#include <linux/migrate.h>
#include <linux/rmap.h>
#include <linux/pagewalk.h>
#include <linux/page_ext.h>
@@ -41,10 +42,26 @@ static unsigned long kmmscand_mms_to_scan __read_mostly = KMMSCAND_MMS_TO_SCAN;
bool kmmscand_scan_enabled = true;
static bool need_wakeup;
+static bool migrated_need_wakeup;
+
+/* How long to pause between two migration cycles */
+static unsigned int kmmmigrate_sleep_ms __read_mostly = 20;
+
+static struct task_struct *kmmmigrated_thread __read_mostly;
+static DEFINE_MUTEX(kmmmigrated_mutex);
+static DECLARE_WAIT_QUEUE_HEAD(kmmmigrated_wait);
+static unsigned long kmmmigrated_sleep_expire;
+
+/* mm of the migrating folio entry */
+static struct mm_struct *kmmscand_cur_migrate_mm;
+
+/* Migration list is manipulated underneath because of mm_exit */
+static bool kmmscand_migration_list_dirty;
static unsigned long kmmscand_sleep_expire;
static DEFINE_SPINLOCK(kmmscand_mm_lock);
+static DEFINE_SPINLOCK(kmmscand_migrate_lock);
static DECLARE_WAIT_QUEUE_HEAD(kmmscand_wait);
#define KMMSCAND_SLOT_HASH_BITS 10
@@ -80,6 +97,14 @@ struct kmmscand_scanctrl {
struct kmmscand_scanctrl kmmscand_scanctrl;
+struct kmmscand_migrate_list {
+ struct list_head migrate_head;
+};
+
+struct kmmscand_migrate_list kmmscand_migrate_list = {
+ .migrate_head = LIST_HEAD_INIT(kmmscand_migrate_list.migrate_head),
+};
+
/* Per folio information used for migration */
struct kmmscand_migrate_info {
struct list_head migrate_node;
@@ -101,6 +126,13 @@ static int kmmscand_has_work(void)
return !list_empty(&kmmscand_scan.mm_head);
}
+static int kmmmigrated_has_work(void)
+{
+ if (!list_empty(&kmmscand_migrate_list.migrate_head))
+ return true;
+ return false;
+}
+
static bool kmmscand_should_wakeup(void)
{
bool wakeup = kthread_should_stop() || need_wakeup ||
@@ -111,6 +143,16 @@ static bool kmmscand_should_wakeup(void)
return wakeup;
}
+static bool kmmmigrated_should_wakeup(void)
+{
+ bool wakeup = kthread_should_stop() || migrated_need_wakeup ||
+ time_after_eq(jiffies, kmmmigrated_sleep_expire);
+ if (migrated_need_wakeup)
+ migrated_need_wakeup = false;
+
+ return wakeup;
+}
+
static void kmmscand_wait_work(void)
{
const unsigned long scan_sleep_jiffies =
@@ -126,6 +168,19 @@ static void kmmscand_wait_work(void)
return;
}
+static void kmmmigrated_wait_work(void)
+{
+ const unsigned long migrate_sleep_jiffies =
+ msecs_to_jiffies(kmmmigrate_sleep_ms);
+
+ if (!migrate_sleep_jiffies)
+ return;
+
+ kmmmigrated_sleep_expire = jiffies + migrate_sleep_jiffies;
+ wait_event_timeout(kmmmigrated_wait,
+ kmmmigrated_should_wakeup(),
+ migrate_sleep_jiffies);
+}
static inline bool is_valid_folio(struct folio *folio)
{
@@ -238,7 +293,6 @@ static int hot_vma_idle_pte_entry(pte_t *pte,
folio_put(folio);
return 0;
}
- /* XXX: Leaking memory. TBD: consume info */
info = kzalloc(sizeof(struct kmmscand_migrate_info), GFP_NOWAIT);
if (info && scanctrl) {
@@ -282,6 +336,28 @@ static inline int kmmscand_test_exit(struct mm_struct *mm)
return atomic_read(&mm->mm_users) == 0;
}
+static void kmmscand_cleanup_migration_list(struct mm_struct *mm)
+{
+ struct kmmscand_migrate_info *info, *tmp;
+
+ spin_lock(&kmmscand_migrate_lock);
+ if (!list_empty(&kmmscand_migrate_list.migrate_head)) {
+ if (mm == READ_ONCE(kmmscand_cur_migrate_mm)) {
+ /* A folio in this mm is being migrated. wait */
+ WRITE_ONCE(kmmscand_migration_list_dirty, true);
+ }
+
+ list_for_each_entry_safe(info, tmp, &kmmscand_migrate_list.migrate_head,
+ migrate_node) {
+ if (info && (info->mm == mm)) {
+ info->mm = NULL;
+ WRITE_ONCE(kmmscand_migration_list_dirty, true);
+ }
+ }
+ }
+ spin_unlock(&kmmscand_migrate_lock);
+}
+
static void kmmscand_collect_mm_slot(struct kmmscand_mm_slot *mm_slot)
{
struct mm_slot *slot = &mm_slot->slot;
@@ -294,11 +370,17 @@ static void kmmscand_collect_mm_slot(struct kmmscand_mm_slot *mm_slot)
hash_del(&slot->hash);
list_del(&slot->mm_node);
+ kmmscand_cleanup_migration_list(mm);
+
mm_slot_free(kmmscand_slot_cache, mm_slot);
mmdrop(mm);
}
}
+static void kmmscand_migrate_folio(void)
+{
+}
+
static unsigned long kmmscand_scan_mm_slot(void)
{
bool next_mm = false;
@@ -347,9 +429,17 @@ static unsigned long kmmscand_scan_mm_slot(void)
if (vma_scanned_size >= kmmscand_scan_size) {
next_mm = true;
- /* TBD: Add scanned folios to migration list */
+ /* Add scanned folios to migration list */
+ spin_lock(&kmmscand_migrate_lock);
+ list_splice_tail_init(&kmmscand_scanctrl.scan_list,
+ &kmmscand_migrate_list.migrate_head);
+ spin_unlock(&kmmscand_migrate_lock);
break;
}
+ spin_lock(&kmmscand_migrate_lock);
+ list_splice_tail_init(&kmmscand_scanctrl.scan_list,
+ &kmmscand_migrate_list.migrate_head);
+ spin_unlock(&kmmscand_migrate_lock);
}
if (!vma)
@@ -478,7 +568,7 @@ void __kmmscand_exit(struct mm_struct *mm)
{
struct kmmscand_mm_slot *mm_slot;
struct mm_slot *slot;
- int free = 0;
+ int free = 0, serialize = 1;
spin_lock(&kmmscand_mm_lock);
slot = mm_slot_lookup(kmmscand_slots_hash, mm);
@@ -493,10 +583,15 @@ void __kmmscand_exit(struct mm_struct *mm)
free = 1;
/* TBD: Set the actual next slot */
kmmscand_scan.mm_slot = NULL;
+ } else if (mm_slot && kmmscand_scan.mm_slot == mm_slot && mm_slot->is_scanned) {
+ serialize = 0;
}
spin_unlock(&kmmscand_mm_lock);
+ if (serialize)
+ kmmscand_cleanup_migration_list(mm);
+
if (free) {
mm_slot_free(kmmscand_slot_cache, mm_slot);
mmdrop(mm);
@@ -546,10 +641,59 @@ static int stop_kmmscand(void)
return err;
}
+static int kmmmigrated(void *arg)
+{
+ for (;;) {
+ WRITE_ONCE(migrated_need_wakeup, false);
+ if (unlikely(kthread_should_stop()))
+ break;
+ if (kmmmigrated_has_work())
+ kmmscand_migrate_folio();
+ msleep(20);
+ kmmmigrated_wait_work();
+ }
+ return 0;
+}
+
+static int start_kmmmigrated(void)
+{
+ int err = 0;
+
+ guard(mutex)(&kmmmigrated_mutex);
+
+ /* Someone already succeeded in starting daemon */
+ if (kmmmigrated_thread)
+ goto end;
+
+ kmmmigrated_thread = kthread_run(kmmmigrated, NULL, "kmmmigrated");
+ if (IS_ERR(kmmmigrated_thread)) {
+ pr_err("kmmmigrated: kthread_run(kmmmigrated) failed\n");
+ err = PTR_ERR(kmmmigrated_thread);
+ kmmmigrated_thread = NULL;
+ goto end;
+ } else {
+ pr_info("kmmmigrated: Successfully started kmmmigrated");
+ }
+
+ wake_up_interruptible(&kmmmigrated_wait);
+end:
+ return err;
+}
+
+static int stop_kmmmigrated(void)
+{
+ guard(mutex)(&kmmmigrated_mutex);
+ kthread_stop(kmmmigrated_thread);
+ return 0;
+}
+
static void init_list(void)
{
+ INIT_LIST_HEAD(&kmmscand_migrate_list.migrate_head);
INIT_LIST_HEAD(&kmmscand_scanctrl.scan_list);
+ spin_lock_init(&kmmscand_migrate_lock);
init_waitqueue_head(&kmmscand_wait);
+ init_waitqueue_head(&kmmmigrated_wait);
}
static int __init kmmscand_init(void)
@@ -568,8 +712,15 @@ static int __init kmmscand_init(void)
if (err)
goto err_kmmscand;
+ err = start_kmmmigrated();
+ if (err)
+ goto err_kmmmigrated;
+
return 0;
+err_kmmmigrated:
+ stop_kmmmigrated();
+
err_kmmscand:
stop_kmmscand();
kmmscand_destroy();
--
2.34.1
next prev parent reply other threads:[~2025-03-19 19:31 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-03-19 19:30 [RFC PATCH V1 00/13] mm: slowtier page promotion based on PTE A bit Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 01/13] mm: Add kmmscand kernel daemon Raghavendra K T
2025-03-21 16:06 ` Jonathan Cameron
2025-03-24 15:09 ` Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 02/13] mm: Maintain mm_struct list in the system Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 03/13] mm: Scan the mm and create a migration list Raghavendra K T
2025-03-19 19:30 ` Raghavendra K T [this message]
2025-03-21 17:29 ` [RFC PATCH V1 04/13] mm: Create a separate kernel thread for migration Jonathan Cameron
2025-03-24 15:17 ` Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 05/13] mm/migration: Migrate accessed folios to toptier node Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 06/13] mm: Add throttling of mm scanning using scan_period Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 07/13] mm: Add throttling of mm scanning using scan_size Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 08/13] mm: Add initial scan delay Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 09/13] mm: Add heuristic to calculate target node Raghavendra K T
2025-03-21 17:42 ` Jonathan Cameron
2025-03-24 16:17 ` Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 10/13] sysfs: Add sysfs support to tune scanning Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 11/13] vmstat: Add vmstat counters Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 12/13] trace/kmmscand: Add tracing of scanning and migration Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 13/13] prctl: Introduce new prctl to control scanning Raghavendra K T
2025-03-19 23:00 ` [RFC PATCH V1 00/13] mm: slowtier page promotion based on PTE A bit Davidlohr Bueso
2025-03-20 8:51 ` Raghavendra K T
2025-03-20 19:11 ` Raghavendra K T
2025-03-21 20:35 ` Davidlohr Bueso
2025-03-25 6:36 ` Raghavendra K T
2025-03-20 21:50 ` Davidlohr Bueso
2025-03-21 6:48 ` Raghavendra K T
2025-03-21 15:52 ` Jonathan Cameron
[not found] ` <20250321105309.3521-1-hdanton@sina.com>
2025-03-23 18:14 ` [RFC PATCH V1 09/13] mm: Add heuristic to calculate target node Raghavendra K T
[not found] ` <20250324110543.3599-1-hdanton@sina.com>
2025-03-24 14:54 ` Raghavendra K T
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250319193028.29514-5-raghavendra.kt@amd.com \
--to=raghavendra.kt@amd.com \
--cc=AneeshKumar.KizhakeVeetil@arm.com \
--cc=Hasan.Maruf@amd.com \
--cc=Jonathan.Cameron@huawei.com \
--cc=Michael.Day@amd.com \
--cc=akpm@linux-foundation.org \
--cc=bharata@amd.com \
--cc=dave.hansen@intel.com \
--cc=dave@stgolabs.net \
--cc=david@redhat.com \
--cc=dongjoo.linux.dev@gmail.com \
--cc=feng.tang@intel.com \
--cc=gourry@gourry.net \
--cc=hannes@cmpxchg.org \
--cc=honggyu.kim@sk.com \
--cc=hughd@google.com \
--cc=jhubbard@nvidia.com \
--cc=jon.grimm@amd.com \
--cc=k.shutemov@gmail.com \
--cc=kbusch@meta.com \
--cc=kmanaouil.dev@gmail.com \
--cc=leesuyeon0506@gmail.com \
--cc=leillc@google.com \
--cc=liam.howlett@oracle.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mgorman@techsingularity.net \
--cc=mingo@redhat.com \
--cc=nadav.amit@gmail.com \
--cc=nphamcs@gmail.com \
--cc=peterz@infradead.org \
--cc=riel@surriel.com \
--cc=rientjes@google.com \
--cc=rppt@kernel.org \
--cc=santosh.shukla@amd.com \
--cc=shivankg@amd.com \
--cc=shy828301@gmail.com \
--cc=sj@kernel.org \
--cc=vbabka@suse.cz \
--cc=weixugc@google.com \
--cc=willy@infradead.org \
--cc=ying.huang@linux.alibaba.com \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox