linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Kinsey Ho <kinseyho@google.com>
To: linux-mm@kvack.org, linux-kernel@vger.kernel.org
Cc: yuanchu@google.com, AneeshKumar.KizhakeVeetil@arm.com,
	Hasan.Maruf@amd.com,  Jonathan.Cameron@huawei.com,
	Michael.Day@amd.com, akpm@linux-foundation.org,
	 dave.hansen@intel.com, david@redhat.com, feng.tang@intel.com,
	 gourry@gourry.net, hannes@cmpxchg.org, honggyu.kim@sk.com,
	hughd@google.com,  jhubbard@nvidia.com, k.shutemov@gmail.com,
	kbusch@meta.com,  kmanaouil.dev@gmail.com,
	leesuyeon0506@gmail.com, leillc@google.com,
	 liam.howlett@oracle.com, mgorman@techsingularity.net,
	mingo@redhat.com,  nadav.amit@gmail.com, nphamcs@gmail.com,
	peterz@infradead.org,  raghavendra.kt@amd.com, riel@surriel.com,
	rientjes@google.com,  rppt@kernel.org, shivankg@amd.com,
	shy828301@gmail.com, sj@kernel.org,  vbabka@suse.cz,
	weixugc@google.com, willy@infradead.org,
	 ying.huang@linux.alibaba.com, ziy@nvidia.com, dave@stgolabs.net,
	 hyeonggon.yoo@sk.com, bharata@amd.com,
	Kinsey Ho <kinseyho@google.com>
Subject: [RFC PATCH v1 2/2] mm: klruscand: use mglru scanning for page promotion
Date: Mon, 24 Mar 2025 15:03:01 -0700	[thread overview]
Message-ID: <20250324220301.1273038-3-kinseyho@google.com> (raw)
In-Reply-To: <20250324220301.1273038-1-kinseyho@google.com>

Introduce a new kernel daemon, klruscand, that periodically invokes the
MGLRU page table walk. It leverages the new callbacks to gather access
information and forwards it to the kpromoted daemon for promotion
decisions.

This benefits from reusing the existing MGLRU page table walk
infrastructure, which is optimized with features such as hierarchical
scanning and bloom filters to reduce CPU overhead.

As an additional optimization to be added in the future, we can tune
the scan intervals for each memcg.

Signed-off-by: Kinsey Ho <kinseyho@google.com>
Signed-off-by: Yuanchu Xie <yuanchu@google.com>
---
 mm/Kconfig     |   8 ++++
 mm/Makefile    |   1 +
 mm/klruscand.c | 118 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 127 insertions(+)
 create mode 100644 mm/klruscand.c

diff --git a/mm/Kconfig b/mm/Kconfig
index ceaa462a0ce6..ed0fa8f2551e 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1366,6 +1366,14 @@ config KPROMOTED
 	  Promote hot pages from lower tier to top tier by using the
 	  memory access information provided by various sources.
 
+config KLRUSCAND
+	bool "Kernel lower tier access scan daemon"
+	default y
+	depends on KPROMOTED && LRU_GEN_WALKS_MMU
+	help
+	  Scan for accesses from lower tiers by invoking MGLRU to perform
+	  page table walks.
+
 source "mm/damon/Kconfig"
 
 endmenu
diff --git a/mm/Makefile b/mm/Makefile
index bf4f5f18f1f9..eb7b76db3b33 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -148,3 +148,4 @@ obj-$(CONFIG_EXECMEM) += execmem.o
 obj-$(CONFIG_TMPFS_QUOTA) += shmem_quota.o
 obj-$(CONFIG_PT_RECLAIM) += pt_reclaim.o
 obj-$(CONFIG_KPROMOTED) += kpromoted.o
+obj-$(CONFIG_KLRUSCAND) += klruscand.o
diff --git a/mm/klruscand.c b/mm/klruscand.c
new file mode 100644
index 000000000000..a53d43c60155
--- /dev/null
+++ b/mm/klruscand.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/memcontrol.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/random.h>
+#include <linux/migrate.h>
+#include <linux/mm_inline.h>
+#include <linux/slab.h>
+#include <linux/sched/clock.h>
+#include <linux/memory-tiers.h>
+#include <linux/sched/mm.h>
+#include <linux/sched.h>
+#include <linux/kpromoted.h>
+
+#include "internal.h"
+
+#define KLRUSCAND_INTERVAL_MS 4000
+#define BATCH_SIZE (2 << 16)
+
+static struct task_struct *scan_thread;
+static pfn_t pfn_batch[BATCH_SIZE];
+static int batch_index;
+
+static void flush_cb(void)
+{
+	int i = 0;
+
+	for (; i < batch_index; i++) {
+		u64 pfn = pfn_batch[i].val;
+
+		kpromoted_record_access((unsigned long)pfn, NUMA_NO_NODE,
+					KPROMOTED_PGTABLE_SCAN, jiffies);
+
+		if (i % 16 == 0)
+			cond_resched();
+	}
+	batch_index = 0;
+}
+
+static int accessed_cb(pfn_t pfn)
+{
+	if (batch_index >= BATCH_SIZE)
+		return -EAGAIN;
+
+	pfn_batch[batch_index++] = pfn;
+	return 0;
+}
+
+static int klruscand_run(void *unused)
+{
+	struct lru_gen_mm_walk *walk;
+
+	walk = kzalloc(sizeof(*walk),
+		       __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN);
+	if (!walk)
+		return -ENOMEM;
+
+	while (!kthread_should_stop()) {
+		unsigned long next_wake_time;
+		long sleep_time;
+		struct mem_cgroup *memcg;
+		int flags;
+		int nid;
+
+		next_wake_time = jiffies + msecs_to_jiffies(KLRUSCAND_INTERVAL_MS);
+
+		for_each_node_state(nid, N_MEMORY) {
+			pg_data_t *pgdat = NODE_DATA(nid);
+			struct reclaim_state rs = { 0 };
+
+			if (node_is_toptier(nid))
+				continue;
+
+			rs.mm_walk = walk;
+			set_task_reclaim_state(current, &rs);
+			flags = memalloc_noreclaim_save();
+
+			memcg = mem_cgroup_iter(NULL, NULL, NULL);
+			do {
+				struct lruvec *lruvec =
+					mem_cgroup_lruvec(memcg, pgdat);
+				unsigned long max_seq =
+					READ_ONCE((lruvec)->lrugen.max_seq);
+
+				lru_gen_scan_lruvec(lruvec, max_seq,
+						    accessed_cb, flush_cb);
+				cond_resched();
+			} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
+
+			memalloc_noreclaim_restore(flags);
+			set_task_reclaim_state(current, NULL);
+			memset(walk, 0, sizeof(*walk));
+		}
+
+		sleep_time = next_wake_time - jiffies;
+		if (sleep_time > 0 && sleep_time != MAX_SCHEDULE_TIMEOUT)
+			schedule_timeout_idle(sleep_time);
+	}
+	kfree(walk);
+	return 0;
+}
+
+static int __init klruscand_init(void)
+{
+	struct task_struct *task;
+
+	task = kthread_run(klruscand_run, NULL, "klruscand");
+
+	if (IS_ERR(task)) {
+		pr_err("Failed to create klruscand kthread\n");
+		return PTR_ERR(task);
+	}
+
+	scan_thread = task;
+	return 0;
+}
+module_init(klruscand_init);
-- 
2.49.0.395.g12beb8f557-goog



  parent reply	other threads:[~2025-03-24 22:03 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-24 22:02 [RFC PATCH v1 0/2] mm: multi-gen LRU " Kinsey Ho
2025-03-24 22:03 ` [RFC PATCH v1 1/2] mm: mglru: generalize page table walk Kinsey Ho
2025-03-24 22:03 ` Kinsey Ho [this message]
2025-03-25 11:56 ` [RFC PATCH v1 0/2] mm: multi-gen LRU scanning for page promotion Bharata B Rao
2025-03-25 21:55   ` Yuanchu Xie

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250324220301.1273038-3-kinseyho@google.com \
    --to=kinseyho@google.com \
    --cc=AneeshKumar.KizhakeVeetil@arm.com \
    --cc=Hasan.Maruf@amd.com \
    --cc=Jonathan.Cameron@huawei.com \
    --cc=Michael.Day@amd.com \
    --cc=akpm@linux-foundation.org \
    --cc=bharata@amd.com \
    --cc=dave.hansen@intel.com \
    --cc=dave@stgolabs.net \
    --cc=david@redhat.com \
    --cc=feng.tang@intel.com \
    --cc=gourry@gourry.net \
    --cc=hannes@cmpxchg.org \
    --cc=honggyu.kim@sk.com \
    --cc=hughd@google.com \
    --cc=hyeonggon.yoo@sk.com \
    --cc=jhubbard@nvidia.com \
    --cc=k.shutemov@gmail.com \
    --cc=kbusch@meta.com \
    --cc=kmanaouil.dev@gmail.com \
    --cc=leesuyeon0506@gmail.com \
    --cc=leillc@google.com \
    --cc=liam.howlett@oracle.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@techsingularity.net \
    --cc=mingo@redhat.com \
    --cc=nadav.amit@gmail.com \
    --cc=nphamcs@gmail.com \
    --cc=peterz@infradead.org \
    --cc=raghavendra.kt@amd.com \
    --cc=riel@surriel.com \
    --cc=rientjes@google.com \
    --cc=rppt@kernel.org \
    --cc=shivankg@amd.com \
    --cc=shy828301@gmail.com \
    --cc=sj@kernel.org \
    --cc=vbabka@suse.cz \
    --cc=weixugc@google.com \
    --cc=willy@infradead.org \
    --cc=ying.huang@linux.alibaba.com \
    --cc=yuanchu@google.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox