[RFC PATCH 2 1/1] sched/numa: Increase the VMA accessing PID bits

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Raghavendra K T <raghavendra.kt@amd.com>
To: <linux-kernel@vger.kernel.org>, <linux-mm@kvack.org>
Cc: Ingo Molnar <mingo@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>,
	"Mel Gorman" <mgorman@suse.de>,
	Andrew Morton <akpm@linux-foundation.org>,
	"David Hildenbrand" <david@redhat.com>, <rppt@kernel.org>,
	Juri Lelli <juri.lelli@redhat.com>,
	Vincent Guittot <vincent.guittot@linaro.org>,
	Bharata B Rao <bharata@amd.com>, Johannes Weiner <jweiner@fb.com>,
	"kernel test robot" <oliver.sang@intel.com>,
	Raghavendra K T <raghavendra.kt@amd.com>
Subject: [RFC PATCH 2 1/1] sched/numa: Increase the VMA accessing PID bits
Date: Fri, 22 Mar 2024 19:11:13 +0530	[thread overview]
Message-ID: <88d16815ef4cc2b6c08b4bb713b25421b5589bc7.1710829750.git.raghavendra.kt@amd.com> (raw)
Message-ID: <20240322134113.80xkgVNgD9pU-6PQ9t1QOsExMIOuq2H8XhZ3uE88XEo@z> (raw)
In-Reply-To: <cover.1710829750.git.raghavendra.kt@amd.com>

Currently we use 64 bits to track tasks accessing VMA.

This increases probability of false positive cases and thus
potentially increase unnecssary scanning of VMA though task
had not accessed VMA. Increase it to 128 bits.

Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Raghavendra K T <raghavendra.kt@amd.com>
---
 include/linux/mm.h       | 29 ++++++++++++++++++++++++++---
 include/linux/mm_types.h |  7 ++++++-
 kernel/sched/fair.c      | 21 ++++++++++++++++-----
 3 files changed, 48 insertions(+), 9 deletions(-)

There could be better idea than having array of 2 long variables for 128
bits?

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f5a97dec5169..d8ff7233cf9b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1741,13 +1741,26 @@ static inline int folio_xchg_access_time(struct folio *folio, int time)
 	return last_time << PAGE_ACCESS_TIME_BUCKETS;
 }
 
+static inline int pid_array_idx(int pid_bit)
+{
+	return (pid_bit / BITS_PER_LONG);
+}
+
+static inline int pid_bit_idx(int pid_bit)
+{
+	return (pid_bit % BITS_PER_LONG);
+}
+
 static inline void vma_set_access_pid_bit(struct vm_area_struct *vma)
 {
 	unsigned int pid_bit;
 
-	pid_bit = hash_32(current->pid, ilog2(BITS_PER_LONG));
-	if (vma->numab_state && !test_bit(pid_bit, &vma->numab_state->pids_active[1])) {
-		__set_bit(pid_bit, &vma->numab_state->pids_active[1]);
+	pid_bit = hash_32(current->pid, ilog2(BITS_PER_LONG * NR_PID_ARRAY));
+
+	if (vma->numab_state && !test_bit(pid_bit_idx(pid_bit),
+				&vma->numab_state->pids_active[1][pid_array_idx(pid_bit)])) {
+		__set_bit(pid_bit_idx(pid_bit),
+				&vma->numab_state->pids_active[1][pid_array_idx(pid_bit)]);
 	}
 }
 #else /* !CONFIG_NUMA_BALANCING */
@@ -1800,6 +1813,16 @@ static inline bool cpupid_match_pid(struct task_struct *task, int cpupid)
 	return false;
 }
 
+static inline int pid_array_idx(int pid_bit)
+{
+	return 0;
+}
+
+static inline int pid_bit_idx(int pid_bit)
+{
+	return 0;
+}
+
 static inline void vma_set_access_pid_bit(struct vm_area_struct *vma)
 {
 }
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 8b611e13153e..34bb8e1f0e1c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -574,6 +574,11 @@ struct vma_lock {
 	struct rw_semaphore lock;
 };
 
+#define NR_PID_ARRAY	2
+#define NR_TRACKED_PIDS	(BITS_PER_LONG * NR_PID_ARRAY)
+
+#define NR_ACCESS_PID_HIST     2
+
 struct vma_numab_state {
 	/*
 	 * Initialised as time in 'jiffies' after which VMA
@@ -598,7 +603,7 @@ struct vma_numab_state {
 	 * Window moves after next_pid_reset has expired approximately
 	 * every VMA_PID_RESET_PERIOD jiffies:
 	 */
-	unsigned long pids_active[2];
+	unsigned long pids_active[NR_ACCESS_PID_HIST][NR_PID_ARRAY];
 
 	/* MM scan sequence ID when scan first started after VMA creation */
 	int start_scan_seq;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6a16129f9a5c..63086ca00430 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3159,7 +3159,8 @@ static void reset_ptenuma_scan(struct task_struct *p)
 
 static bool vma_is_accessed(struct mm_struct *mm, struct vm_area_struct *vma)
 {
-	unsigned long pids;
+	int pid_bit, pid_aidx, i;
+	unsigned long pids = 0;
 	/*
 	 * Allow unconditional access first two times, so that all the (pages)
 	 * of VMAs get prot_none fault introduced irrespective of accesses.
@@ -3169,8 +3170,13 @@ static bool vma_is_accessed(struct mm_struct *mm, struct vm_area_struct *vma)
 	if ((READ_ONCE(current->mm->numa_scan_seq) - vma->numab_state->start_scan_seq) < 2)
 		return true;
 
-	pids = vma->numab_state->pids_active[0] | vma->numab_state->pids_active[1];
-	if (test_bit(hash_32(current->pid, ilog2(BITS_PER_LONG)), &pids))
+	pid_bit = hash_32(current->pid, ilog2(BITS_PER_LONG * NR_PID_ARRAY));
+	pid_aidx = pid_array_idx(pid_bit);
+
+	for (i = 0; i < NR_ACCESS_PID_HIST; i++)
+		pids |= vma->numab_state->pids_active[i][pid_aidx];
+
+	if (test_bit(pid_bit_idx(pid_bit), &pids))
 		return true;
 
 	/*
@@ -3204,6 +3210,7 @@ static void task_numa_work(struct callback_head *work)
 	struct vma_iterator vmi;
 	bool vma_pids_skipped;
 	bool vma_pids_forced = false;
+	int i;
 
 	SCHED_WARN_ON(p != container_of(work, struct task_struct, numa_work));
 
@@ -3341,8 +3348,12 @@ static void task_numa_work(struct callback_head *work)
 				time_after(jiffies, vma->numab_state->pids_active_reset)) {
 			vma->numab_state->pids_active_reset = vma->numab_state->pids_active_reset +
 				msecs_to_jiffies(VMA_PID_RESET_PERIOD);
-			vma->numab_state->pids_active[0] = READ_ONCE(vma->numab_state->pids_active[1]);
-			vma->numab_state->pids_active[1] = 0;
+
+			for (i = 0; i < NR_PID_ARRAY; i++) {
+				vma->numab_state->pids_active[0][i] =
+					READ_ONCE(vma->numab_state->pids_active[1][i]);
+				vma->numab_state->pids_active[1][i] = 0;
+			}
 		}
 
 		/* Do not rescan VMAs twice within the same sequence. */
-- 
2.34.1

next prev parent reply	other threads:[~2024-03-22 13:42 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-22 13:41 [RFC PATCH] A Summary of VMA scanning improvements explored Raghavendra K T
2024-03-22 13:41 ` Raghavendra K T [this message]
2024-03-22 13:41   ` [RFC PATCH 2 1/1] sched/numa: Increase the VMA accessing PID bits Raghavendra K T
2024-03-22 13:41   ` [RFC PATCH 3 1/1] sched/numa: Convert 256MB VMA scan limit notion Raghavendra K T
2024-06-25 14:20   ` [RFC PATCH 1 1/1] sched/numa: Hot VMA and shared VMA optimization Chen Yu
2024-06-26  7:42     ` Raghavendra K T

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=88d16815ef4cc2b6c08b4bb713b25421b5589bc7.1710829750.git.raghavendra.kt@amd.com \
    --to=raghavendra.kt@amd.com \
    --cc=akpm@linux-foundation.org \
    --cc=bharata@amd.com \
    --cc=david@redhat.com \
    --cc=juri.lelli@redhat.com \
    --cc=jweiner@fb.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=oliver.sang@intel.com \
    --cc=peterz@infradead.org \
    --cc=rppt@kernel.org \
    --cc=vincent.guittot@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox