linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Raghavendra K T <raghavendra.kt@amd.com>
To: <raghavendra.kt@amd.com>
Cc: <AneeshKumar.KizhakeVeetil@arm.com>, <Hasan.Maruf@amd.com>,
	<Michael.Day@amd.com>, <akpm@linux-foundation.org>,
	<bharata@amd.com>, <dave.hansen@intel.com>, <david@redhat.com>,
	<dongjoo.linux.dev@gmail.com>, <feng.tang@intel.com>,
	<gourry@gourry.net>, <hannes@cmpxchg.org>, <honggyu.kim@sk.com>,
	<hughd@google.com>, <jhubbard@nvidia.com>, <jon.grimm@amd.com>,
	<k.shutemov@gmail.com>, <kbusch@meta.com>,
	<kmanaouil.dev@gmail.com>, <leesuyeon0506@gmail.com>,
	<leillc@google.com>, <liam.howlett@oracle.com>,
	<linux-kernel@vger.kernel.org>, <linux-mm@kvack.org>,
	<mgorman@techsingularity.net>, <mingo@redhat.com>,
	<nadav.amit@gmail.com>, <nphamcs@gmail.com>,
	<peterz@infradead.org>, <riel@surriel.com>, <rientjes@google.com>,
	<rppt@kernel.org>, <santosh.shukla@amd.com>, <shivankg@amd.com>,
	<shy828301@gmail.com>, <sj@kernel.org>, <vbabka@suse.cz>,
	<weixugc@google.com>, <willy@infradead.org>,
	<ying.huang@linux.alibaba.com>, <ziy@nvidia.com>,
	<Jonathan.Cameron@huawei.com>, <dave@stgolabs.net>
Subject: [RFC PATCH V1 04/13] mm: Create a separate kernel thread for migration
Date: Wed, 19 Mar 2025 19:30:19 +0000	[thread overview]
Message-ID: <20250319193028.29514-5-raghavendra.kt@amd.com> (raw)
In-Reply-To: <20250319193028.29514-1-raghavendra.kt@amd.com>

Having independent thread helps in:
 - Alleviating the need for multiple scanning threads
 - Aids to control batch migration (TBD)
 - Migration throttling (TBD)

Signed-off-by: Raghavendra K T <raghavendra.kt@amd.com>
---
 mm/kmmscand.c | 157 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 154 insertions(+), 3 deletions(-)

diff --git a/mm/kmmscand.c b/mm/kmmscand.c
index a76a58bf37b2..6e96cfab5b85 100644
--- a/mm/kmmscand.c
+++ b/mm/kmmscand.c
@@ -4,6 +4,7 @@
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
 #include <linux/mmu_notifier.h>
+#include <linux/migrate.h>
 #include <linux/rmap.h>
 #include <linux/pagewalk.h>
 #include <linux/page_ext.h>
@@ -41,10 +42,26 @@ static unsigned long kmmscand_mms_to_scan __read_mostly = KMMSCAND_MMS_TO_SCAN;
 
 bool kmmscand_scan_enabled = true;
 static bool need_wakeup;
+static bool migrated_need_wakeup;
+
+/* How long to pause between two migration cycles */
+static unsigned int kmmmigrate_sleep_ms __read_mostly = 20;
+
+static struct task_struct *kmmmigrated_thread __read_mostly;
+static DEFINE_MUTEX(kmmmigrated_mutex);
+static DECLARE_WAIT_QUEUE_HEAD(kmmmigrated_wait);
+static unsigned long kmmmigrated_sleep_expire;
+
+/* mm of the migrating folio entry */
+static struct mm_struct *kmmscand_cur_migrate_mm;
+
+/* Migration list is manipulated underneath because of mm_exit */
+static bool  kmmscand_migration_list_dirty;
 
 static unsigned long kmmscand_sleep_expire;
 
 static DEFINE_SPINLOCK(kmmscand_mm_lock);
+static DEFINE_SPINLOCK(kmmscand_migrate_lock);
 static DECLARE_WAIT_QUEUE_HEAD(kmmscand_wait);
 
 #define KMMSCAND_SLOT_HASH_BITS 10
@@ -80,6 +97,14 @@ struct kmmscand_scanctrl {
 
 struct kmmscand_scanctrl kmmscand_scanctrl;
 
+struct kmmscand_migrate_list {
+	struct list_head migrate_head;
+};
+
+struct kmmscand_migrate_list kmmscand_migrate_list = {
+	.migrate_head = LIST_HEAD_INIT(kmmscand_migrate_list.migrate_head),
+};
+
 /* Per folio information used for migration */
 struct kmmscand_migrate_info {
 	struct list_head migrate_node;
@@ -101,6 +126,13 @@ static int kmmscand_has_work(void)
 	return !list_empty(&kmmscand_scan.mm_head);
 }
 
+static int kmmmigrated_has_work(void)
+{
+	if (!list_empty(&kmmscand_migrate_list.migrate_head))
+		return true;
+	return false;
+}
+
 static bool kmmscand_should_wakeup(void)
 {
 	bool wakeup =  kthread_should_stop() || need_wakeup ||
@@ -111,6 +143,16 @@ static bool kmmscand_should_wakeup(void)
 	return wakeup;
 }
 
+static bool kmmmigrated_should_wakeup(void)
+{
+	bool wakeup =  kthread_should_stop() || migrated_need_wakeup ||
+	       time_after_eq(jiffies, kmmmigrated_sleep_expire);
+	if (migrated_need_wakeup)
+		migrated_need_wakeup = false;
+
+	return wakeup;
+}
+
 static void kmmscand_wait_work(void)
 {
 	const unsigned long scan_sleep_jiffies =
@@ -126,6 +168,19 @@ static void kmmscand_wait_work(void)
 	return;
 }
 
+static void kmmmigrated_wait_work(void)
+{
+	const unsigned long migrate_sleep_jiffies =
+		msecs_to_jiffies(kmmmigrate_sleep_ms);
+
+	if (!migrate_sleep_jiffies)
+		return;
+
+	kmmmigrated_sleep_expire = jiffies + migrate_sleep_jiffies;
+	wait_event_timeout(kmmmigrated_wait,
+			kmmmigrated_should_wakeup(),
+			migrate_sleep_jiffies);
+}
 
 static inline bool is_valid_folio(struct folio *folio)
 {
@@ -238,7 +293,6 @@ static int hot_vma_idle_pte_entry(pte_t *pte,
 			folio_put(folio);
 			return 0;
 		}
-		/* XXX: Leaking memory. TBD: consume info */
 		info = kzalloc(sizeof(struct kmmscand_migrate_info), GFP_NOWAIT);
 		if (info && scanctrl) {
 
@@ -282,6 +336,28 @@ static inline int kmmscand_test_exit(struct mm_struct *mm)
 	return atomic_read(&mm->mm_users) == 0;
 }
 
+static void kmmscand_cleanup_migration_list(struct mm_struct *mm)
+{
+	struct kmmscand_migrate_info *info, *tmp;
+
+	spin_lock(&kmmscand_migrate_lock);
+	if (!list_empty(&kmmscand_migrate_list.migrate_head)) {
+		if (mm == READ_ONCE(kmmscand_cur_migrate_mm)) {
+			/* A folio in this mm is being migrated. wait */
+			WRITE_ONCE(kmmscand_migration_list_dirty, true);
+		}
+
+		list_for_each_entry_safe(info, tmp, &kmmscand_migrate_list.migrate_head,
+			migrate_node) {
+			if (info && (info->mm == mm)) {
+				info->mm = NULL;
+				WRITE_ONCE(kmmscand_migration_list_dirty, true);
+			}
+		}
+	}
+	spin_unlock(&kmmscand_migrate_lock);
+}
+
 static void kmmscand_collect_mm_slot(struct kmmscand_mm_slot *mm_slot)
 {
 	struct mm_slot *slot = &mm_slot->slot;
@@ -294,11 +370,17 @@ static void kmmscand_collect_mm_slot(struct kmmscand_mm_slot *mm_slot)
 		hash_del(&slot->hash);
 		list_del(&slot->mm_node);
 
+		kmmscand_cleanup_migration_list(mm);
+
 		mm_slot_free(kmmscand_slot_cache, mm_slot);
 		mmdrop(mm);
 	}
 }
 
+static void kmmscand_migrate_folio(void)
+{
+}
+
 static unsigned long kmmscand_scan_mm_slot(void)
 {
 	bool next_mm = false;
@@ -347,9 +429,17 @@ static unsigned long kmmscand_scan_mm_slot(void)
 
 		if (vma_scanned_size >= kmmscand_scan_size) {
 			next_mm = true;
-			/* TBD: Add scanned folios to migration list */
+			/* Add scanned folios to migration list */
+			spin_lock(&kmmscand_migrate_lock);
+			list_splice_tail_init(&kmmscand_scanctrl.scan_list,
+						&kmmscand_migrate_list.migrate_head);
+			spin_unlock(&kmmscand_migrate_lock);
 			break;
 		}
+		spin_lock(&kmmscand_migrate_lock);
+		list_splice_tail_init(&kmmscand_scanctrl.scan_list,
+					&kmmscand_migrate_list.migrate_head);
+		spin_unlock(&kmmscand_migrate_lock);
 	}
 
 	if (!vma)
@@ -478,7 +568,7 @@ void __kmmscand_exit(struct mm_struct *mm)
 {
 	struct kmmscand_mm_slot *mm_slot;
 	struct mm_slot *slot;
-	int free = 0;
+	int free = 0, serialize = 1;
 
 	spin_lock(&kmmscand_mm_lock);
 	slot = mm_slot_lookup(kmmscand_slots_hash, mm);
@@ -493,10 +583,15 @@ void __kmmscand_exit(struct mm_struct *mm)
 		free = 1;
 		/* TBD: Set the actual next slot */
 		kmmscand_scan.mm_slot = NULL;
+	} else if (mm_slot && kmmscand_scan.mm_slot == mm_slot && mm_slot->is_scanned) {
+		serialize = 0;
 	}
 
 	spin_unlock(&kmmscand_mm_lock);
 
+	if (serialize)
+		kmmscand_cleanup_migration_list(mm);
+
 	if (free) {
 		mm_slot_free(kmmscand_slot_cache, mm_slot);
 		mmdrop(mm);
@@ -546,10 +641,59 @@ static int stop_kmmscand(void)
 
 	return err;
 }
+static int kmmmigrated(void *arg)
+{
+	for (;;) {
+		WRITE_ONCE(migrated_need_wakeup, false);
+		if (unlikely(kthread_should_stop()))
+			break;
+		if (kmmmigrated_has_work())
+			kmmscand_migrate_folio();
+		msleep(20);
+		kmmmigrated_wait_work();
+	}
+	return 0;
+}
+
+static int start_kmmmigrated(void)
+{
+	int err = 0;
+
+	guard(mutex)(&kmmmigrated_mutex);
+
+	/* Someone already succeeded in starting daemon */
+	if (kmmmigrated_thread)
+		goto end;
+
+	kmmmigrated_thread = kthread_run(kmmmigrated, NULL, "kmmmigrated");
+	if (IS_ERR(kmmmigrated_thread)) {
+		pr_err("kmmmigrated: kthread_run(kmmmigrated)  failed\n");
+		err = PTR_ERR(kmmmigrated_thread);
+		kmmmigrated_thread = NULL;
+		goto end;
+	} else {
+		pr_info("kmmmigrated: Successfully started kmmmigrated");
+	}
+
+	wake_up_interruptible(&kmmmigrated_wait);
+end:
+	return err;
+}
+
+static int stop_kmmmigrated(void)
+{
+	guard(mutex)(&kmmmigrated_mutex);
+	kthread_stop(kmmmigrated_thread);
+	return 0;
+}
+
 static void init_list(void)
 {
+	INIT_LIST_HEAD(&kmmscand_migrate_list.migrate_head);
 	INIT_LIST_HEAD(&kmmscand_scanctrl.scan_list);
+	spin_lock_init(&kmmscand_migrate_lock);
 	init_waitqueue_head(&kmmscand_wait);
+	init_waitqueue_head(&kmmmigrated_wait);
 }
 
 static int __init kmmscand_init(void)
@@ -568,8 +712,15 @@ static int __init kmmscand_init(void)
 	if (err)
 		goto err_kmmscand;
 
+	err = start_kmmmigrated();
+	if (err)
+		goto err_kmmmigrated;
+
 	return 0;
 
+err_kmmmigrated:
+	stop_kmmmigrated();
+
 err_kmmscand:
 	stop_kmmscand();
 	kmmscand_destroy();
-- 
2.34.1



  parent reply	other threads:[~2025-03-19 19:31 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-19 19:30 [RFC PATCH V1 00/13] mm: slowtier page promotion based on PTE A bit Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 01/13] mm: Add kmmscand kernel daemon Raghavendra K T
2025-03-21 16:06   ` Jonathan Cameron
2025-03-24 15:09     ` Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 02/13] mm: Maintain mm_struct list in the system Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 03/13] mm: Scan the mm and create a migration list Raghavendra K T
2025-03-19 19:30 ` Raghavendra K T [this message]
2025-03-21 17:29   ` [RFC PATCH V1 04/13] mm: Create a separate kernel thread for migration Jonathan Cameron
2025-03-24 15:17     ` Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 05/13] mm/migration: Migrate accessed folios to toptier node Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 06/13] mm: Add throttling of mm scanning using scan_period Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 07/13] mm: Add throttling of mm scanning using scan_size Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 08/13] mm: Add initial scan delay Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 09/13] mm: Add heuristic to calculate target node Raghavendra K T
2025-03-21 17:42   ` Jonathan Cameron
2025-03-24 16:17     ` Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 10/13] sysfs: Add sysfs support to tune scanning Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 11/13] vmstat: Add vmstat counters Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 12/13] trace/kmmscand: Add tracing of scanning and migration Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 13/13] prctl: Introduce new prctl to control scanning Raghavendra K T
2025-03-19 23:00 ` [RFC PATCH V1 00/13] mm: slowtier page promotion based on PTE A bit Davidlohr Bueso
2025-03-20  8:51   ` Raghavendra K T
2025-03-20 19:11     ` Raghavendra K T
2025-03-21 20:35       ` Davidlohr Bueso
2025-03-25  6:36         ` Raghavendra K T
2025-03-20 21:50     ` Davidlohr Bueso
2025-03-21  6:48       ` Raghavendra K T
2025-03-21 15:52 ` Jonathan Cameron
     [not found] ` <20250321105309.3521-1-hdanton@sina.com>
2025-03-23 18:14   ` [RFC PATCH V1 09/13] mm: Add heuristic to calculate target node Raghavendra K T
     [not found]   ` <20250324110543.3599-1-hdanton@sina.com>
2025-03-24 14:54     ` Raghavendra K T

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250319193028.29514-5-raghavendra.kt@amd.com \
    --to=raghavendra.kt@amd.com \
    --cc=AneeshKumar.KizhakeVeetil@arm.com \
    --cc=Hasan.Maruf@amd.com \
    --cc=Jonathan.Cameron@huawei.com \
    --cc=Michael.Day@amd.com \
    --cc=akpm@linux-foundation.org \
    --cc=bharata@amd.com \
    --cc=dave.hansen@intel.com \
    --cc=dave@stgolabs.net \
    --cc=david@redhat.com \
    --cc=dongjoo.linux.dev@gmail.com \
    --cc=feng.tang@intel.com \
    --cc=gourry@gourry.net \
    --cc=hannes@cmpxchg.org \
    --cc=honggyu.kim@sk.com \
    --cc=hughd@google.com \
    --cc=jhubbard@nvidia.com \
    --cc=jon.grimm@amd.com \
    --cc=k.shutemov@gmail.com \
    --cc=kbusch@meta.com \
    --cc=kmanaouil.dev@gmail.com \
    --cc=leesuyeon0506@gmail.com \
    --cc=leillc@google.com \
    --cc=liam.howlett@oracle.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@techsingularity.net \
    --cc=mingo@redhat.com \
    --cc=nadav.amit@gmail.com \
    --cc=nphamcs@gmail.com \
    --cc=peterz@infradead.org \
    --cc=riel@surriel.com \
    --cc=rientjes@google.com \
    --cc=rppt@kernel.org \
    --cc=santosh.shukla@amd.com \
    --cc=shivankg@amd.com \
    --cc=shy828301@gmail.com \
    --cc=sj@kernel.org \
    --cc=vbabka@suse.cz \
    --cc=weixugc@google.com \
    --cc=willy@infradead.org \
    --cc=ying.huang@linux.alibaba.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox