linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Bharata B Rao <bharata@amd.com>
To: <linux-kernel@vger.kernel.org>, <linux-mm@kvack.org>
Cc: <Jonathan.Cameron@huawei.com>, <dave.hansen@intel.com>,
	<gourry@gourry.net>, <mgorman@techsingularity.net>,
	<mingo@redhat.com>, <peterz@infradead.org>,
	<raghavendra.kt@amd.com>, <riel@surriel.com>,
	<rientjes@google.com>, <sj@kernel.org>, <weixugc@google.com>,
	<willy@infradead.org>, <ying.huang@linux.alibaba.com>,
	<ziy@nvidia.com>, <dave@stgolabs.net>, <nifan.cxl@gmail.com>,
	<xuezhengchu@huawei.com>, <yiannis@zptcorp.com>,
	<akpm@linux-foundation.org>, <david@redhat.com>,
	<byungchul@sk.com>, <kinseyho@google.com>,
	<joshua.hahnjy@gmail.com>, <yuanchu@google.com>,
	<balbirs@nvidia.com>, <alok.rathore@samsung.com>,
	<shivankg@amd.com>, Bharata B Rao <bharata@amd.com>
Subject: [RFC PATCH v4 5/9] x86: ibs: Enable IBS profiling for memory accesses
Date: Sat, 6 Dec 2025 15:44:19 +0530	[thread overview]
Message-ID: <20251206101423.5004-6-bharata@amd.com> (raw)
In-Reply-To: <20251206101423.5004-1-bharata@amd.com>

Enable IBS memory access data collection for user memory
accesses by programming the required MSRs. The profiling
is turned ON only for user mode execution and turned OFF
for kernel mode execution. Profiling is explicitly disabled
for NMI handler too.

TODOs:

- IBS sampling rate is kept fixed for now.
- Arch/vendor separation/isolation of the code needs relook.

Signed-off-by: Bharata B Rao <bharata@amd.com>
---
 arch/x86/include/asm/entry-common.h |  3 +++
 arch/x86/include/asm/hardirq.h      |  2 ++
 arch/x86/mm/ibs.c                   | 32 +++++++++++++++++++++++++++++
 include/linux/pghot.h               |  4 ++++
 4 files changed, 41 insertions(+)

diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index ce3eb6d5fdf9..0f381a63669e 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -4,6 +4,7 @@
 
 #include <linux/randomize_kstack.h>
 #include <linux/user-return-notifier.h>
+#include <linux/pghot.h>
 
 #include <asm/nospec-branch.h>
 #include <asm/io_bitmap.h>
@@ -13,6 +14,7 @@
 /* Check that the stack and regs on entry from user mode are sane. */
 static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
 {
+	hwmem_access_profiling_stop();
 	if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) {
 		/*
 		 * Make sure that the entry code gave us a sensible EFLAGS
@@ -106,6 +108,7 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
 static __always_inline void arch_exit_to_user_mode(void)
 {
 	amd_clear_divider();
+	hwmem_access_profiling_start();
 }
 #define arch_exit_to_user_mode arch_exit_to_user_mode
 
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index f00c09ffe6a9..281dd76f5efb 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -91,4 +91,6 @@ static __always_inline bool kvm_get_cpu_l1tf_flush_l1d(void)
 static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void) { }
 #endif /* IS_ENABLED(CONFIG_KVM_INTEL) */
 
+#define arch_nmi_enter()	hwmem_access_profiling_stop()
+#define arch_nmi_exit()		hwmem_access_profiling_start()
 #endif /* _ASM_X86_HARDIRQ_H */
diff --git a/arch/x86/mm/ibs.c b/arch/x86/mm/ibs.c
index 946f3a82ca11..cf21052118c9 100644
--- a/arch/x86/mm/ibs.c
+++ b/arch/x86/mm/ibs.c
@@ -16,6 +16,7 @@ static u64 ibs_config __read_mostly;
 static u32 ibs_caps;
 
 #define IBS_NR_SAMPLES	150
+#define IBS_SAMPLE_PERIOD      10000
 
 /*
  * Basic access info captured for each memory access.
@@ -43,6 +44,36 @@ struct ibs_sample_pcpu __percpu *ibs_s;
 static struct work_struct ibs_work;
 static struct irq_work ibs_irq_work;
 
+void hwmem_access_profiling_stop(void)
+{
+	u64 ops_ctl;
+
+	if (!hwmem_access_profiling)
+		return;
+
+	rdmsrl(MSR_AMD64_IBSOPCTL, ops_ctl);
+	wrmsrl(MSR_AMD64_IBSOPCTL, ops_ctl & ~IBS_OP_ENABLE);
+}
+
+void hwmem_access_profiling_start(void)
+{
+	u64 config = 0;
+	unsigned int period = IBS_SAMPLE_PERIOD;
+
+	if (!hwmem_access_profiling)
+		return;
+
+	/* Disable IBS for kernel thread */
+	if (!current->mm)
+		goto out;
+
+	config = (period >> 4) & IBS_OP_MAX_CNT;
+	config |= (period & IBS_OP_MAX_CNT_EXT_MASK);
+	config |= ibs_config;
+out:
+	wrmsrl(MSR_AMD64_IBSOPCTL, config);
+}
+
 bool hwmem_access_profiler_inuse(void)
 {
 	return hwmem_access_profiling;
@@ -309,6 +340,7 @@ static int __init ibs_access_profiling_init(void)
 			  x86_amd_ibs_access_profile_startup,
 			  x86_amd_ibs_access_profile_teardown);
 
+	hwmem_access_profiling = true;
 	pr_info("IBS setup for memory access profiling\n");
 	return 0;
 }
diff --git a/include/linux/pghot.h b/include/linux/pghot.h
index 6410cb131d3d..00f450f79c86 100644
--- a/include/linux/pghot.h
+++ b/include/linux/pghot.h
@@ -4,8 +4,12 @@
 
 #ifdef CONFIG_HWMEM_PROFILER
 bool hwmem_access_profiler_inuse(void);
+void hwmem_access_profiling_start(void);
+void hwmem_access_profiling_stop(void);
 #else
 static inline bool hwmem_access_profiler_inuse(void) { return false; }
+static inline void hwmem_access_profiling_start(void) {}
+static inline void hwmem_access_profiling_stop(void) {}
 #endif
 
 /* Page hotness temperature sources */
-- 
2.34.1



  parent reply	other threads:[~2025-12-06 10:17 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-06 10:14 [RFC PATCH v4 0/9] mm: Hot page tracking and promotion infrastructure Bharata B Rao
2025-12-06 10:14 ` [RFC PATCH v4 1/9] mm: migrate: Allow misplaced migration without VMA too Bharata B Rao
2025-12-06 10:14 ` [RFC PATCH v4 2/9] migrate: implement migrate_misplaced_folios_batch Bharata B Rao
2025-12-06 10:14 ` [RFC PATCH v4 3/9] mm: Hot page tracking and promotion Bharata B Rao
2025-12-06 10:14 ` [RFC PATCH v4 4/9] x86: ibs: In-kernel IBS driver for memory access profiling Bharata B Rao
2025-12-06 10:14 ` Bharata B Rao [this message]
2025-12-06 10:14 ` [RFC PATCH v4 6/9] mm: mglru: generalize page table walk Bharata B Rao
2025-12-06 10:14 ` [RFC PATCH v4 7/9] mm: klruscand: use mglru scanning for page promotion Bharata B Rao
2025-12-06 10:14 ` [RFC PATCH v4 8/9] mm: sched: Move hot page promotion from NUMAB=2 to pghot tracking Bharata B Rao
2025-12-06 10:14 ` [RFC PATCH v4 9/9] mm: pghot: Add folio_mark_accessed() as hotness source Bharata B Rao

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251206101423.5004-6-bharata@amd.com \
    --to=bharata@amd.com \
    --cc=Jonathan.Cameron@huawei.com \
    --cc=akpm@linux-foundation.org \
    --cc=alok.rathore@samsung.com \
    --cc=balbirs@nvidia.com \
    --cc=byungchul@sk.com \
    --cc=dave.hansen@intel.com \
    --cc=dave@stgolabs.net \
    --cc=david@redhat.com \
    --cc=gourry@gourry.net \
    --cc=joshua.hahnjy@gmail.com \
    --cc=kinseyho@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@techsingularity.net \
    --cc=mingo@redhat.com \
    --cc=nifan.cxl@gmail.com \
    --cc=peterz@infradead.org \
    --cc=raghavendra.kt@amd.com \
    --cc=riel@surriel.com \
    --cc=rientjes@google.com \
    --cc=shivankg@amd.com \
    --cc=sj@kernel.org \
    --cc=weixugc@google.com \
    --cc=willy@infradead.org \
    --cc=xuezhengchu@huawei.com \
    --cc=yiannis@zptcorp.com \
    --cc=ying.huang@linux.alibaba.com \
    --cc=yuanchu@google.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox