linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Baolin Wang <baolin.wang@linux.alibaba.com>
To: akpm@linux-foundation.org, ying.huang@intel.com,
	dave.hansen@linux.intel.com
Cc: ziy@nvidia.com, shy828301@gmail.com,
	baolin.wang@linux.alibaba.com, zhongjiang-ali@linux.alibaba.com,
	xlpang@linux.alibaba.com, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org
Subject: [RFC PATCH 2/4] mm: Add a debug interface to control the range of speculative numa fault
Date: Sun, 12 Dec 2021 19:31:58 +0800	[thread overview]
Message-ID: <913a8a5282d265dc771309ca552c9c62c247c2b0.1639306956.git.baolin.wang@linux.alibaba.com> (raw)
In-Reply-To: <cover.1639306956.git.baolin.wang@linux.alibaba.com>
In-Reply-To: <cover.1639306956.git.baolin.wang@linux.alibaba.com>

Add a debug interface to control the range of speculative numa fault,
which can be used to tuning the performance or event close the speculative
numa fault window for some workloads.

Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
---
 mm/memory.c | 46 +++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 43 insertions(+), 3 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 2c9ed63e4e23..a0f4a2a008cc 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4052,7 +4052,29 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
 static unsigned long fault_around_bytes __read_mostly =
 	rounddown_pow_of_two(65536);
 
+static unsigned long numa_around_bytes __read_mostly;
+
 #ifdef CONFIG_DEBUG_FS
+static int numa_around_bytes_get(void *data, u64 *val)
+{
+	*val = numa_around_bytes;
+	return 0;
+}
+
+static int numa_around_bytes_set(void *data, u64 val)
+{
+	if (val / PAGE_SIZE > PTRS_PER_PTE)
+		return -EINVAL;
+	if (val > PAGE_SIZE)
+		numa_around_bytes = rounddown_pow_of_two(val);
+	else
+		numa_around_bytes = 0; /* rounddown_pow_of_two(0) is undefined */
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(numa_around_bytes_fops,
+			 numa_around_bytes_get,
+			 numa_around_bytes_set, "%llu\n");
+
 static int fault_around_bytes_get(void *data, u64 *val)
 {
 	*val = fault_around_bytes;
@@ -4080,6 +4102,8 @@ static int __init fault_around_debugfs(void)
 {
 	debugfs_create_file_unsafe("fault_around_bytes", 0644, NULL, NULL,
 				   &fault_around_bytes_fops);
+	debugfs_create_file_unsafe("numa_around_bytes", 0644, NULL, NULL,
+				   &numa_around_bytes_fops);
 	return 0;
 }
 late_initcall(fault_around_debugfs);
@@ -4348,10 +4372,13 @@ static bool try_next_numa_page(struct vm_fault *vmf, unsigned int win_pages,
 	((win) & NUMA_FAULT_WINDOW_SIZE_MASK))
 
 static inline unsigned int numa_fault_max_pages(struct vm_area_struct *vma,
-						unsigned long fault_address)
+						unsigned long fault_address,
+						unsigned long numa_around_size)
 {
+	unsigned long numa_around_addr =
+		(fault_address + numa_around_size) & PAGE_MASK;
 	unsigned long pmd_end_addr = (fault_address & PMD_MASK) + PMD_SIZE;
-	unsigned long max_fault_addr = min_t(unsigned long, pmd_end_addr,
+	unsigned long max_fault_addr = min3(numa_around_addr, pmd_end_addr,
 					    vma->vm_end);
 
 	return (max_fault_addr - fault_address - 1) >> PAGE_SHIFT;
@@ -4360,12 +4387,24 @@ static inline unsigned int numa_fault_max_pages(struct vm_area_struct *vma,
 static unsigned int adjust_numa_fault_window(struct vm_area_struct *vma,
 					     unsigned long fault_address)
 {
+	unsigned long numa_around_size = READ_ONCE(numa_around_bytes);
 	unsigned long numafault_ahead = GET_NUMA_FAULT_INFO(vma);
         unsigned long prev_start = NUMA_FAULT_WINDOW_START(numafault_ahead);
         unsigned int prev_pages = NUMA_FAULT_WINDOW_SIZE(numafault_ahead);
 	unsigned long win_start;
 	unsigned int win_pages, max_fault_pages;
 
+	/*
+	 * Shut down the proactive numa fault if the numa_around_bytes
+	 * is set to 0.
+	 */
+	if (!numa_around_size) {
+		if (numafault_ahead)
+			atomic_long_set(&vma->numafault_ahead_info,
+					NUMA_FAULT_INFO(0, 0));
+		return 0;
+	}
+
 	win_start = fault_address + PAGE_SIZE;
 
 	/*
@@ -4437,7 +4476,8 @@ static unsigned int adjust_numa_fault_window(struct vm_area_struct *vma,
 	 * Make sure the size of ahead numa fault address is less than the
 	 * size of current VMA or PMD.
 	 */
-	max_fault_pages = numa_fault_max_pages(vma, fault_address);
+	max_fault_pages = numa_fault_max_pages(vma, fault_address,
+					       numa_around_size);
 	if (win_pages > max_fault_pages)
 		win_pages = max_fault_pages;
 
-- 
2.27.0



  parent reply	other threads:[~2021-12-12 11:33 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-12 11:31 [RFC PATCH 0/4] Add speculative numa fault support Baolin Wang
2021-12-12 11:31 ` [RFC PATCH 1/4] mm: " Baolin Wang
2021-12-12 11:31 ` Baolin Wang [this message]
2021-12-12 11:31 ` [RFC PATCH 3/4] mm: Add speculative numa fault stats Baolin Wang
2021-12-12 11:32 ` [RFC PATCH 4/4] mm: Update the speculative pages' accessing time Baolin Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=913a8a5282d265dc771309ca552c9c62c247c2b0.1639306956.git.baolin.wang@linux.alibaba.com \
    --to=baolin.wang@linux.alibaba.com \
    --cc=akpm@linux-foundation.org \
    --cc=dave.hansen@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=shy828301@gmail.com \
    --cc=xlpang@linux.alibaba.com \
    --cc=ying.huang@intel.com \
    --cc=zhongjiang-ali@linux.alibaba.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox