From: Raghavendra K T <raghavendra.kt@amd.com>
To: <raghavendra.kt@amd.com>
Cc: <AneeshKumar.KizhakeVeetil@arm.com>, <Hasan.Maruf@amd.com>,
<Michael.Day@amd.com>, <akpm@linux-foundation.org>,
<bharata@amd.com>, <dave.hansen@intel.com>, <david@redhat.com>,
<dongjoo.linux.dev@gmail.com>, <feng.tang@intel.com>,
<gourry@gourry.net>, <hannes@cmpxchg.org>, <honggyu.kim@sk.com>,
<hughd@google.com>, <jhubbard@nvidia.com>, <jon.grimm@amd.com>,
<k.shutemov@gmail.com>, <kbusch@meta.com>,
<kmanaouil.dev@gmail.com>, <leesuyeon0506@gmail.com>,
<leillc@google.com>, <liam.howlett@oracle.com>,
<linux-kernel@vger.kernel.org>, <linux-mm@kvack.org>,
<mgorman@techsingularity.net>, <mingo@redhat.com>,
<nadav.amit@gmail.com>, <nphamcs@gmail.com>,
<peterz@infradead.org>, <riel@surriel.com>, <rientjes@google.com>,
<rppt@kernel.org>, <santosh.shukla@amd.com>, <shivankg@amd.com>,
<shy828301@gmail.com>, <sj@kernel.org>, <vbabka@suse.cz>,
<weixugc@google.com>, <willy@infradead.org>,
<ying.huang@linux.alibaba.com>, <ziy@nvidia.com>,
<Jonathan.Cameron@huawei.com>, <dave@stgolabs.net>
Subject: [RFC PATCH V1 07/13] mm: Add throttling of mm scanning using scan_size
Date: Wed, 19 Mar 2025 19:30:22 +0000 [thread overview]
Message-ID: <20250319193028.29514-8-raghavendra.kt@amd.com> (raw)
In-Reply-To: <20250319193028.29514-1-raghavendra.kt@amd.com>
Before this patch, scanning is done on entire virtual address space
of all the tasks. Now the scan size is shrunk or expanded based on the
useful pages found in the last scan.
This helps to quickly get out of unnecessary scanning thus burning
lesser CPU.
Drawback: If a useful chunk is at the other end of the VMA space, it
will delay scanning and migration.
Shrink/expand algorithm for scan_size:
X : Number of useful pages in the last scan.
Y : Number of useful pages found in current scan.
Initial scan_size is 1GB
case 1: (X = 0, Y = 0)
Decrease scan_size by 2
case 2: (X = 0, Y > 0)
Aggressively change to MAX (4GB)
case 3: (X > 0, Y = 0 )
No change
case 4: (X > 0, Y > 0)
Increase scan_size by 2
Scan size is clamped between MIN (256MB) and MAX (4GB)).
TBD: Tuning this based on real workload
Signed-off-by: Raghavendra K T <raghavendra.kt@amd.com>
---
mm/kmmscand.c | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)
diff --git a/mm/kmmscand.c b/mm/kmmscand.c
index cd2215f2e00e..a19b1f31271d 100644
--- a/mm/kmmscand.c
+++ b/mm/kmmscand.c
@@ -28,10 +28,15 @@
static struct task_struct *kmmscand_thread __read_mostly;
static DEFINE_MUTEX(kmmscand_mutex);
+
/*
* Total VMA size to cover during scan.
+ * Min: 256MB default: 1GB max: 4GB
*/
+#define KMMSCAND_SCAN_SIZE_MIN (256 * 1024 * 1024UL)
+#define KMMSCAND_SCAN_SIZE_MAX (4 * 1024 * 1024 * 1024UL)
#define KMMSCAND_SCAN_SIZE (1 * 1024 * 1024 * 1024UL)
+
static unsigned long kmmscand_scan_size __read_mostly = KMMSCAND_SCAN_SIZE;
/*
@@ -90,6 +95,8 @@ struct kmmscand_mm_slot {
unsigned long next_scan;
/* Tracks how many useful pages obtained for migration in the last scan */
unsigned long scan_delta;
+ /* Determines how much VMA address space to be covered in the scanning */
+ unsigned long scan_size;
long address;
bool is_scanned;
};
@@ -621,6 +628,8 @@ static void kmmscand_migrate_folio(void)
*/
#define KMMSCAND_IGNORE_SCAN_THR 256
+#define SCAN_SIZE_CHANGE_SHIFT 1
+
/* Maintains stability of scan_period by decaying last time accessed pages */
#define SCAN_DECAY_SHIFT 4
/*
@@ -636,14 +645,26 @@ static void kmmscand_migrate_folio(void)
* Increase scan_period by (2 << SCAN_PERIOD_CHANGE_SCALE).
* case 4: (X > 0, Y > 0)
* Decrease scan_period by SCAN_PERIOD_TUNE_PERCENT.
+ * Tuning scan_size:
+ * Initial scan_size is 4GB
+ * case 1: (X = 0, Y = 0)
+ * Decrease scan_size by (1 << SCAN_SIZE_CHANGE_SHIFT).
+ * case 2: (X = 0, Y > 0)
+ * scan_size = KMMSCAND_SCAN_SIZE_MAX
+ * case 3: (X > 0, Y = 0 )
+ * No change
+ * case 4: (X > 0, Y > 0)
+ * Increase scan_size by (1 << SCAN_SIZE_CHANGE_SHIFT).
*/
static inline void kmmscand_update_mmslot_info(struct kmmscand_mm_slot *mm_slot,
unsigned long total)
{
unsigned int scan_period;
unsigned long now;
+ unsigned long scan_size;
unsigned long old_scan_delta;
+ scan_size = mm_slot->scan_size;
scan_period = mm_slot->scan_period;
old_scan_delta = mm_slot->scan_delta;
@@ -664,20 +685,25 @@ static inline void kmmscand_update_mmslot_info(struct kmmscand_mm_slot *mm_slot,
if (!old_scan_delta && !total) {
scan_period = (100 + SCAN_PERIOD_TUNE_PERCENT) * scan_period;
scan_period /= 100;
+ scan_size = scan_size >> SCAN_SIZE_CHANGE_SHIFT;
} else if (old_scan_delta && total) {
scan_period = (100 - SCAN_PERIOD_TUNE_PERCENT) * scan_period;
scan_period /= 100;
+ scan_size = scan_size << SCAN_SIZE_CHANGE_SHIFT;
} else if (old_scan_delta && !total) {
scan_period = scan_period << SCAN_PERIOD_CHANGE_SCALE;
} else {
scan_period = scan_period >> SCAN_PERIOD_CHANGE_SCALE;
+ scan_size = KMMSCAND_SCAN_SIZE_MAX;
}
scan_period = clamp(scan_period, KMMSCAND_SCAN_PERIOD_MIN, KMMSCAND_SCAN_PERIOD_MAX);
+ scan_size = clamp(scan_size, KMMSCAND_SCAN_SIZE_MIN, KMMSCAND_SCAN_SIZE_MAX);
now = jiffies;
mm_slot->next_scan = now + msecs_to_jiffies(scan_period);
mm_slot->scan_period = scan_period;
+ mm_slot->scan_size = scan_size;
mm_slot->scan_delta = total;
}
@@ -689,6 +715,7 @@ static unsigned long kmmscand_scan_mm_slot(void)
unsigned int mm_slot_scan_period;
unsigned long now;
unsigned long mm_slot_next_scan;
+ unsigned long mm_slot_scan_size;
unsigned long vma_scanned_size = 0;
unsigned long address;
unsigned long total = 0;
@@ -717,6 +744,7 @@ static unsigned long kmmscand_scan_mm_slot(void)
mm_slot->is_scanned = true;
mm_slot_next_scan = mm_slot->next_scan;
mm_slot_scan_period = mm_slot->scan_period;
+ mm_slot_scan_size = mm_slot->scan_size;
spin_unlock(&kmmscand_mm_lock);
if (unlikely(!mmap_read_trylock(mm)))
@@ -864,6 +892,7 @@ void __kmmscand_enter(struct mm_struct *mm)
kmmscand_slot->address = 0;
kmmscand_slot->scan_period = kmmscand_mm_scan_period_ms;
+ kmmscand_slot->scan_size = kmmscand_scan_size;
kmmscand_slot->next_scan = 0;
kmmscand_slot->scan_delta = 0;
--
2.34.1
next prev parent reply other threads:[~2025-03-19 19:32 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-03-19 19:30 [RFC PATCH V1 00/13] mm: slowtier page promotion based on PTE A bit Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 01/13] mm: Add kmmscand kernel daemon Raghavendra K T
2025-03-21 16:06 ` Jonathan Cameron
2025-03-24 15:09 ` Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 02/13] mm: Maintain mm_struct list in the system Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 03/13] mm: Scan the mm and create a migration list Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 04/13] mm: Create a separate kernel thread for migration Raghavendra K T
2025-03-21 17:29 ` Jonathan Cameron
2025-03-24 15:17 ` Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 05/13] mm/migration: Migrate accessed folios to toptier node Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 06/13] mm: Add throttling of mm scanning using scan_period Raghavendra K T
2025-03-19 19:30 ` Raghavendra K T [this message]
2025-03-19 19:30 ` [RFC PATCH V1 08/13] mm: Add initial scan delay Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 09/13] mm: Add heuristic to calculate target node Raghavendra K T
2025-03-21 17:42 ` Jonathan Cameron
2025-03-24 16:17 ` Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 10/13] sysfs: Add sysfs support to tune scanning Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 11/13] vmstat: Add vmstat counters Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 12/13] trace/kmmscand: Add tracing of scanning and migration Raghavendra K T
2025-03-19 19:30 ` [RFC PATCH V1 13/13] prctl: Introduce new prctl to control scanning Raghavendra K T
2025-03-19 23:00 ` [RFC PATCH V1 00/13] mm: slowtier page promotion based on PTE A bit Davidlohr Bueso
2025-03-20 8:51 ` Raghavendra K T
2025-03-20 19:11 ` Raghavendra K T
2025-03-21 20:35 ` Davidlohr Bueso
2025-03-25 6:36 ` Raghavendra K T
2025-03-20 21:50 ` Davidlohr Bueso
2025-03-21 6:48 ` Raghavendra K T
2025-03-21 15:52 ` Jonathan Cameron
[not found] ` <20250321105309.3521-1-hdanton@sina.com>
2025-03-23 18:14 ` [RFC PATCH V1 09/13] mm: Add heuristic to calculate target node Raghavendra K T
[not found] ` <20250324110543.3599-1-hdanton@sina.com>
2025-03-24 14:54 ` Raghavendra K T
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250319193028.29514-8-raghavendra.kt@amd.com \
--to=raghavendra.kt@amd.com \
--cc=AneeshKumar.KizhakeVeetil@arm.com \
--cc=Hasan.Maruf@amd.com \
--cc=Jonathan.Cameron@huawei.com \
--cc=Michael.Day@amd.com \
--cc=akpm@linux-foundation.org \
--cc=bharata@amd.com \
--cc=dave.hansen@intel.com \
--cc=dave@stgolabs.net \
--cc=david@redhat.com \
--cc=dongjoo.linux.dev@gmail.com \
--cc=feng.tang@intel.com \
--cc=gourry@gourry.net \
--cc=hannes@cmpxchg.org \
--cc=honggyu.kim@sk.com \
--cc=hughd@google.com \
--cc=jhubbard@nvidia.com \
--cc=jon.grimm@amd.com \
--cc=k.shutemov@gmail.com \
--cc=kbusch@meta.com \
--cc=kmanaouil.dev@gmail.com \
--cc=leesuyeon0506@gmail.com \
--cc=leillc@google.com \
--cc=liam.howlett@oracle.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mgorman@techsingularity.net \
--cc=mingo@redhat.com \
--cc=nadav.amit@gmail.com \
--cc=nphamcs@gmail.com \
--cc=peterz@infradead.org \
--cc=riel@surriel.com \
--cc=rientjes@google.com \
--cc=rppt@kernel.org \
--cc=santosh.shukla@amd.com \
--cc=shivankg@amd.com \
--cc=shy828301@gmail.com \
--cc=sj@kernel.org \
--cc=vbabka@suse.cz \
--cc=weixugc@google.com \
--cc=willy@infradead.org \
--cc=ying.huang@linux.alibaba.com \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox