From: Bharata B Rao <bharata@amd.com>
To: <linux-kernel@vger.kernel.org>, <linux-mm@kvack.org>
Cc: <Jonathan.Cameron@huawei.com>, <dave.hansen@intel.com>,
<gourry@gourry.net>, <mgorman@techsingularity.net>,
<mingo@redhat.com>, <peterz@infradead.org>,
<raghavendra.kt@amd.com>, <riel@surriel.com>,
<rientjes@google.com>, <sj@kernel.org>, <weixugc@google.com>,
<willy@infradead.org>, <ying.huang@linux.alibaba.com>,
<ziy@nvidia.com>, <dave@stgolabs.net>, <nifan.cxl@gmail.com>,
<xuezhengchu@huawei.com>, <yiannis@zptcorp.com>,
<akpm@linux-foundation.org>, <david@redhat.com>,
<byungchul@sk.com>, <kinseyho@google.com>,
<joshua.hahnjy@gmail.com>, <yuanchu@google.com>,
<balbirs@nvidia.com>, <alok.rathore@samsung.com>,
<shivankg@amd.com>, Bharata B Rao <bharata@amd.com>
Subject: [RFC PATCH v5 10/10] mm: pghot: Add folio_mark_accessed() as hotness source
Date: Thu, 29 Jan 2026 20:10:43 +0530 [thread overview]
Message-ID: <20260129144043.231636-11-bharata@amd.com> (raw)
In-Reply-To: <20260129144043.231636-1-bharata@amd.com>
Unmapped page cache pages that end up in lower tiers don't get
promoted easily. There were attempts to identify such pages and
get them promoted as part of NUMA Balancing earlier [1]. The
same idea is taken forward here by using folio_mark_accessed()
as a source of hotness.
Lower tier accesses from folio_mark_accessed() are reported to
pghot sub-system for hotness tracking and subsequent promotion.
TODO: Need a better naming for this hotness source. Need to
better understand/evaluate the overhead of hotness info
collection from this path.
[1] https://lore.kernel.org/linux-mm/20250411221111.493193-1-gourry@gourry.net/
Signed-off-by: Bharata B Rao <bharata@amd.com>
---
Documentation/admin-guide/mm/pghot.txt | 7 ++++++-
include/linux/pghot.h | 5 +++++
include/linux/vm_event_item.h | 1 +
mm/pghot-tunables.c | 7 +++++++
mm/pghot.c | 6 ++++++
mm/swap.c | 8 ++++++++
mm/vmstat.c | 1 +
7 files changed, 34 insertions(+), 1 deletion(-)
diff --git a/Documentation/admin-guide/mm/pghot.txt b/Documentation/admin-guide/mm/pghot.txt
index b329e692ef89..c8eb61064247 100644
--- a/Documentation/admin-guide/mm/pghot.txt
+++ b/Documentation/admin-guide/mm/pghot.txt
@@ -23,9 +23,10 @@ Path: /sys/kernel/debug/pghot/
- 0: Hardware hints (value 0x1)
- 1: Page table scan (value 0x2)
- 2: Hint faults (value 0x4)
+ - 3: folio_mark_accessed (value 0x8)
- Default: 0 (disabled)
- Example:
- # echo 0x7 > /sys/kernel/debug/pghot/enabled_sources
+ # echo 0xf > /sys/kernel/debug/pghot/enabled_sources
Enables all sources.
2. **target_nid**
@@ -82,3 +83,7 @@ Path: /proc/vmstat
4. **pghot_recorded_hintfaults**
- Number of recorded accesses reported by NUMA Balancing based
hotness source.
+
+5. **pghot_recorded_fma**
+ - Number of recorded accesses reported by folio_mark_accessed()
+ hotness source.
diff --git a/include/linux/pghot.h b/include/linux/pghot.h
index 603791183102..8cf9dfb5365a 100644
--- a/include/linux/pghot.h
+++ b/include/linux/pghot.h
@@ -19,6 +19,7 @@ enum pghot_src {
PGHOT_HW_HINTS,
PGHOT_PGTABLE_SCAN,
PGHOT_HINT_FAULT,
+ PGHOT_FMA,
};
#ifdef CONFIG_PGHOT
@@ -36,6 +37,7 @@ void pghot_debug_init(void);
DECLARE_STATIC_KEY_FALSE(pghot_src_hwhints);
DECLARE_STATIC_KEY_FALSE(pghot_src_pgtscans);
DECLARE_STATIC_KEY_FALSE(pghot_src_hintfaults);
+DECLARE_STATIC_KEY_FALSE(pghot_src_fma);
/*
* Bit positions to enable individual sources in pghot/records_enabled
@@ -45,6 +47,7 @@ enum pghot_src_enabled {
PGHOT_HWHINTS_BIT = 0,
PGHOT_PGTSCAN_BIT,
PGHOT_HINTFAULT_BIT,
+ PGHOT_FMA_BIT,
PGHOT_MAX_BIT
};
@@ -52,6 +55,8 @@ enum pghot_src_enabled {
#define PGHOT_PGTSCAN_ENABLED BIT(PGHOT_PGTSCAN_BIT)
#define PGHOT_HINTFAULT_ENABLED BIT(PGHOT_HINTFAULT_BIT)
#define PGHOT_SRC_ENABLED_MASK GENMASK(PGHOT_MAX_BIT - 1, 0)
+#define PGHOT_FMA_ENABLED BIT(PGHOT_FMA_BIT)
+#define PGHOT_SRC_ENABLED_MASK GENMASK(PGHOT_MAX_BIT - 1, 0)
#define PGHOT_DEFAULT_FREQ_THRESHOLD 2
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 67efbca9051c..ac1f28646b9c 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -193,6 +193,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
PGHOT_RECORD_HWHINTS,
PGHOT_RECORD_PGTSCANS,
PGHOT_RECORD_HINTFAULTS,
+ PGHOT_RECORD_FMA,
#ifdef CONFIG_HWMEM_PROFILER
HWHINT_NR_EVENTS,
HWHINT_KERNEL,
diff --git a/mm/pghot-tunables.c b/mm/pghot-tunables.c
index 79afbcb1e4f0..11c7f742a1be 100644
--- a/mm/pghot-tunables.c
+++ b/mm/pghot-tunables.c
@@ -124,6 +124,13 @@ static void pghot_src_enabled_update(unsigned int enabled)
else
static_branch_disable(&pghot_src_hintfaults);
}
+
+ if (changed & PGHOT_FMA_ENABLED) {
+ if (enabled & PGHOT_FMA_ENABLED)
+ static_branch_enable(&pghot_src_fma);
+ else
+ static_branch_disable(&pghot_src_fma);
+ }
}
static ssize_t pghot_src_enabled_write(struct file *filp, const char __user *ubuf,
diff --git a/mm/pghot.c b/mm/pghot.c
index 6fc76c1eaff8..537f4af816ff 100644
--- a/mm/pghot.c
+++ b/mm/pghot.c
@@ -43,6 +43,7 @@ static unsigned int sysctl_pghot_promote_rate_limit = 65536;
DEFINE_STATIC_KEY_FALSE(pghot_src_hwhints);
DEFINE_STATIC_KEY_FALSE(pghot_src_pgtscans);
DEFINE_STATIC_KEY_FALSE(pghot_src_hintfaults);
+DEFINE_STATIC_KEY_FALSE(pghot_src_fma);
#ifdef CONFIG_SYSCTL
static const struct ctl_table pghot_sysctls[] = {
@@ -113,6 +114,11 @@ int pghot_record_access(unsigned long pfn, int nid, int src, unsigned long now)
return -EINVAL;
count_vm_event(PGHOT_RECORD_HINTFAULTS);
break;
+ case PGHOT_FMA:
+ if (!static_branch_likely(&pghot_src_fma))
+ return -EINVAL;
+ count_vm_event(PGHOT_RECORD_FMA);
+ break;
default:
return -EINVAL;
}
diff --git a/mm/swap.c b/mm/swap.c
index 2260dcd2775e..31a654b19844 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -37,6 +37,8 @@
#include <linux/page_idle.h>
#include <linux/local_lock.h>
#include <linux/buffer_head.h>
+#include <linux/pghot.h>
+#include <linux/memory-tiers.h>
#include "internal.h"
@@ -454,8 +456,14 @@ static bool lru_gen_clear_refs(struct folio *folio)
*/
void folio_mark_accessed(struct folio *folio)
{
+ unsigned long pfn = folio_pfn(folio);
+
if (folio_test_dropbehind(folio))
return;
+
+ if (!node_is_toptier(pfn_to_nid(pfn)))
+ pghot_record_access(pfn, NUMA_NO_NODE, PGHOT_FMA, jiffies);
+
if (lru_gen_enabled()) {
lru_gen_inc_refs(folio);
return;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 62c47f44edf0..c4d90baf440b 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1506,6 +1506,7 @@ const char * const vmstat_text[] = {
[I(PGHOT_RECORD_HWHINTS)] = "pghot_recorded_hwhints",
[I(PGHOT_RECORD_PGTSCANS)] = "pghot_recorded_pgtscans",
[I(PGHOT_RECORD_HINTFAULTS)] = "pghot_recorded_hintfaults",
+ [I(PGHOT_RECORD_FMA)] = "pghot_recorded_fma",
#ifdef CONFIG_HWMEM_PROFILER
[I(HWHINT_NR_EVENTS)] = "hwhint_nr_events",
[I(HWHINT_KERNEL)] = "hwhint_kernel",
--
2.34.1
next prev parent reply other threads:[~2026-01-29 14:46 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-01-29 14:40 [RFC PATCH v5 00/10] mm: Hot page tracking and promotion infrastructure Bharata B Rao
2026-01-29 14:40 ` [RFC PATCH v5 01/10] mm: migrate: Allow misplaced migration without VMA Bharata B Rao
2026-01-29 14:40 ` [RFC PATCH v5 02/10] migrate: Add migrate_misplaced_folios_batch() Bharata B Rao
2026-01-29 14:40 ` [RFC PATCH v5 03/10] mm: Hot page tracking and promotion Bharata B Rao
2026-02-11 15:40 ` Bharata B Rao
2026-02-11 16:08 ` Gregory Price
2026-02-12 2:03 ` Bharata B Rao
2026-01-29 14:40 ` [RFC PATCH v5 04/10] mm: pghot: Precision mode for pghot Bharata B Rao
2026-01-29 14:40 ` [RFC PATCH v5 05/10] mm: sched: move NUMA balancing tiering promotion to pghot Bharata B Rao
2026-01-29 14:40 ` [RFC PATCH v5 06/10] x86: ibs: In-kernel IBS driver for memory access profiling Bharata B Rao
2026-01-29 14:40 ` [RFC PATCH v5 07/10] x86: ibs: Enable IBS profiling for memory accesses Bharata B Rao
2026-01-29 14:40 ` [RFC PATCH v5 08/10] mm: mglru: generalize page table walk Bharata B Rao
2026-01-29 14:40 ` [RFC PATCH v5 09/10] mm: klruscand: use mglru scanning for page promotion Bharata B Rao
2026-01-29 14:40 ` Bharata B Rao [this message]
2026-02-09 3:25 ` [RFC PATCH v5 00/10] mm: Hot page tracking and promotion infrastructure Bharata B Rao
2026-02-09 3:30 ` Bharata B Rao
2026-02-11 15:30 ` Bharata B Rao
2026-02-11 16:04 ` Gregory Price
2026-02-12 2:16 ` Bharata B Rao
2026-02-11 16:06 ` Gregory Price
2026-02-12 16:15 ` Bharata B Rao
2026-02-13 14:56 ` Gregory Price
2026-02-16 3:00 ` Bharata B Rao
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260129144043.231636-11-bharata@amd.com \
--to=bharata@amd.com \
--cc=Jonathan.Cameron@huawei.com \
--cc=akpm@linux-foundation.org \
--cc=alok.rathore@samsung.com \
--cc=balbirs@nvidia.com \
--cc=byungchul@sk.com \
--cc=dave.hansen@intel.com \
--cc=dave@stgolabs.net \
--cc=david@redhat.com \
--cc=gourry@gourry.net \
--cc=joshua.hahnjy@gmail.com \
--cc=kinseyho@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mgorman@techsingularity.net \
--cc=mingo@redhat.com \
--cc=nifan.cxl@gmail.com \
--cc=peterz@infradead.org \
--cc=raghavendra.kt@amd.com \
--cc=riel@surriel.com \
--cc=rientjes@google.com \
--cc=shivankg@amd.com \
--cc=sj@kernel.org \
--cc=weixugc@google.com \
--cc=willy@infradead.org \
--cc=xuezhengchu@huawei.com \
--cc=yiannis@zptcorp.com \
--cc=ying.huang@linux.alibaba.com \
--cc=yuanchu@google.com \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox