From: Baolin Wang <baolin.wang@linux.alibaba.com>
To: akpm@linux-foundation.org, ying.huang@intel.com,
dave.hansen@linux.intel.com
Cc: ziy@nvidia.com, shy828301@gmail.com,
baolin.wang@linux.alibaba.com, zhongjiang-ali@linux.alibaba.com,
xlpang@linux.alibaba.com, linux-mm@kvack.org,
linux-kernel@vger.kernel.org
Subject: [RFC PATCH 2/4] mm: Add a debug interface to control the range of speculative numa fault
Date: Sun, 12 Dec 2021 19:31:58 +0800 [thread overview]
Message-ID: <913a8a5282d265dc771309ca552c9c62c247c2b0.1639306956.git.baolin.wang@linux.alibaba.com> (raw)
In-Reply-To: <cover.1639306956.git.baolin.wang@linux.alibaba.com>
In-Reply-To: <cover.1639306956.git.baolin.wang@linux.alibaba.com>
Add a debug interface to control the range of speculative numa fault,
which can be used to tuning the performance or event close the speculative
numa fault window for some workloads.
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
---
mm/memory.c | 46 +++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 43 insertions(+), 3 deletions(-)
diff --git a/mm/memory.c b/mm/memory.c
index 2c9ed63e4e23..a0f4a2a008cc 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4052,7 +4052,29 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
static unsigned long fault_around_bytes __read_mostly =
rounddown_pow_of_two(65536);
+static unsigned long numa_around_bytes __read_mostly;
+
#ifdef CONFIG_DEBUG_FS
+static int numa_around_bytes_get(void *data, u64 *val)
+{
+ *val = numa_around_bytes;
+ return 0;
+}
+
+static int numa_around_bytes_set(void *data, u64 val)
+{
+ if (val / PAGE_SIZE > PTRS_PER_PTE)
+ return -EINVAL;
+ if (val > PAGE_SIZE)
+ numa_around_bytes = rounddown_pow_of_two(val);
+ else
+ numa_around_bytes = 0; /* rounddown_pow_of_two(0) is undefined */
+ return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(numa_around_bytes_fops,
+ numa_around_bytes_get,
+ numa_around_bytes_set, "%llu\n");
+
static int fault_around_bytes_get(void *data, u64 *val)
{
*val = fault_around_bytes;
@@ -4080,6 +4102,8 @@ static int __init fault_around_debugfs(void)
{
debugfs_create_file_unsafe("fault_around_bytes", 0644, NULL, NULL,
&fault_around_bytes_fops);
+ debugfs_create_file_unsafe("numa_around_bytes", 0644, NULL, NULL,
+ &numa_around_bytes_fops);
return 0;
}
late_initcall(fault_around_debugfs);
@@ -4348,10 +4372,13 @@ static bool try_next_numa_page(struct vm_fault *vmf, unsigned int win_pages,
((win) & NUMA_FAULT_WINDOW_SIZE_MASK))
static inline unsigned int numa_fault_max_pages(struct vm_area_struct *vma,
- unsigned long fault_address)
+ unsigned long fault_address,
+ unsigned long numa_around_size)
{
+ unsigned long numa_around_addr =
+ (fault_address + numa_around_size) & PAGE_MASK;
unsigned long pmd_end_addr = (fault_address & PMD_MASK) + PMD_SIZE;
- unsigned long max_fault_addr = min_t(unsigned long, pmd_end_addr,
+ unsigned long max_fault_addr = min3(numa_around_addr, pmd_end_addr,
vma->vm_end);
return (max_fault_addr - fault_address - 1) >> PAGE_SHIFT;
@@ -4360,12 +4387,24 @@ static inline unsigned int numa_fault_max_pages(struct vm_area_struct *vma,
static unsigned int adjust_numa_fault_window(struct vm_area_struct *vma,
unsigned long fault_address)
{
+ unsigned long numa_around_size = READ_ONCE(numa_around_bytes);
unsigned long numafault_ahead = GET_NUMA_FAULT_INFO(vma);
unsigned long prev_start = NUMA_FAULT_WINDOW_START(numafault_ahead);
unsigned int prev_pages = NUMA_FAULT_WINDOW_SIZE(numafault_ahead);
unsigned long win_start;
unsigned int win_pages, max_fault_pages;
+ /*
+ * Shut down the proactive numa fault if the numa_around_bytes
+ * is set to 0.
+ */
+ if (!numa_around_size) {
+ if (numafault_ahead)
+ atomic_long_set(&vma->numafault_ahead_info,
+ NUMA_FAULT_INFO(0, 0));
+ return 0;
+ }
+
win_start = fault_address + PAGE_SIZE;
/*
@@ -4437,7 +4476,8 @@ static unsigned int adjust_numa_fault_window(struct vm_area_struct *vma,
* Make sure the size of ahead numa fault address is less than the
* size of current VMA or PMD.
*/
- max_fault_pages = numa_fault_max_pages(vma, fault_address);
+ max_fault_pages = numa_fault_max_pages(vma, fault_address,
+ numa_around_size);
if (win_pages > max_fault_pages)
win_pages = max_fault_pages;
--
2.27.0
next prev parent reply other threads:[~2021-12-12 11:33 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-12-12 11:31 [RFC PATCH 0/4] Add speculative numa fault support Baolin Wang
2021-12-12 11:31 ` [RFC PATCH 1/4] mm: " Baolin Wang
2021-12-12 11:31 ` Baolin Wang [this message]
2021-12-12 11:31 ` [RFC PATCH 3/4] mm: Add speculative numa fault stats Baolin Wang
2021-12-12 11:32 ` [RFC PATCH 4/4] mm: Update the speculative pages' accessing time Baolin Wang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=913a8a5282d265dc771309ca552c9c62c247c2b0.1639306956.git.baolin.wang@linux.alibaba.com \
--to=baolin.wang@linux.alibaba.com \
--cc=akpm@linux-foundation.org \
--cc=dave.hansen@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=shy828301@gmail.com \
--cc=xlpang@linux.alibaba.com \
--cc=ying.huang@intel.com \
--cc=zhongjiang-ali@linux.alibaba.com \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox