From: Yafang Shao <laoar.shao@gmail.com>
To: roman.gushchin@linux.dev, inwardvessel@gmail.com,
shakeel.butt@linux.dev, akpm@linux-foundation.org,
ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
mkoutny@suse.com, yu.c.chen@intel.com, zhao1.liu@intel.com
Cc: bpf@vger.kernel.org, linux-mm@kvack.org,
Yafang Shao <laoar.shao@gmail.com>
Subject: [RFC PATCH bpf-next 1/3] sched: add helpers for numa balancing
Date: Tue, 13 Jan 2026 20:12:36 +0800 [thread overview]
Message-ID: <20260113121238.11300-2-laoar.shao@gmail.com> (raw)
In-Reply-To: <20260113121238.11300-1-laoar.shao@gmail.com>
Three new helpers task_numab_enabled(), task_numab_mode_normal() and
task_numab_mode_tiering() are introduced for later use.
Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
include/linux/sched/numa_balancing.h | 27 +++++++++++++++++++++++++++
kernel/sched/fair.c | 15 +++++++--------
kernel/sched/sched.h | 1 -
mm/memory-tiers.c | 3 ++-
mm/mempolicy.c | 3 +--
mm/migrate.c | 7 ++++---
mm/vmscan.c | 7 +++----
7 files changed, 44 insertions(+), 19 deletions(-)
diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h
index 52b22c5c396d..792b6665f476 100644
--- a/include/linux/sched/numa_balancing.h
+++ b/include/linux/sched/numa_balancing.h
@@ -8,6 +8,7 @@
*/
#include <linux/sched.h>
+#include <linux/sched/sysctl.h>
#define TNF_MIGRATED 0x01
#define TNF_NO_GROUP 0x02
@@ -32,6 +33,28 @@ extern void set_numabalancing_state(bool enabled);
extern void task_numa_free(struct task_struct *p, bool final);
bool should_numa_migrate_memory(struct task_struct *p, struct folio *folio,
int src_nid, int dst_cpu);
+
+extern struct static_key_false sched_numa_balancing;
+static inline bool task_numab_enabled(struct task_struct *p)
+{
+ if (static_branch_unlikely(&sched_numa_balancing))
+ return true;
+ return false;
+}
+
+static inline bool task_numab_mode_normal(void)
+{
+ if (sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL)
+ return true;
+ return false;
+}
+
+static inline bool task_numab_mode_tiering(void)
+{
+ if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING)
+ return true;
+ return false;
+}
#else
static inline void task_numa_fault(int last_node, int node, int pages,
int flags)
@@ -52,6 +75,10 @@ static inline bool should_numa_migrate_memory(struct task_struct *p,
{
return true;
}
+static inline bool task_numab_enabled(struct task_struct *p)
+{
+ return false;
+}
#endif
#endif /* _LINUX_SCHED_NUMA_BALANCING_H */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index da46c3164537..4f6583ef83b2 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1932,8 +1932,8 @@ bool should_numa_migrate_memory(struct task_struct *p, struct folio *folio,
this_cpupid = cpu_pid_to_cpupid(dst_cpu, current->pid);
last_cpupid = folio_xchg_last_cpupid(folio, this_cpupid);
- if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) &&
- !node_is_toptier(src_nid) && !cpupid_valid(last_cpupid))
+ if (!(task_numab_mode_tiering()) && !node_is_toptier(src_nid) &&
+ !cpupid_valid(last_cpupid))
return false;
/*
@@ -3140,7 +3140,7 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
struct numa_group *ng;
int priv;
- if (!static_branch_likely(&sched_numa_balancing))
+ if (!task_numab_enabled(p))
return;
/* for example, ksmd faulting in a user's mm */
@@ -3151,8 +3151,7 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
* NUMA faults statistics are unnecessary for the slow memory
* node for memory tiering mode.
*/
- if (!node_is_toptier(mem_node) &&
- (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING ||
+ if (!node_is_toptier(mem_node) && (task_numab_mode_tiering() ||
!cpupid_valid(last_cpupid)))
return;
@@ -3611,7 +3610,7 @@ static void update_scan_period(struct task_struct *p, int new_cpu)
int src_nid = cpu_to_node(task_cpu(p));
int dst_nid = cpu_to_node(new_cpu);
- if (!static_branch_likely(&sched_numa_balancing))
+ if (!task_numab_enabled(p))
return;
if (!p->mm || !p->numa_faults || (p->flags & PF_EXITING))
@@ -9353,7 +9352,7 @@ static long migrate_degrades_locality(struct task_struct *p, struct lb_env *env)
unsigned long src_weight, dst_weight;
int src_nid, dst_nid, dist;
- if (!static_branch_likely(&sched_numa_balancing))
+ if (!task_numab_enabled(p))
return 0;
if (!p->numa_faults || !(env->sd->flags & SD_NUMA))
@@ -13374,7 +13373,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
entity_tick(cfs_rq, se, queued);
}
- if (static_branch_unlikely(&sched_numa_balancing))
+ if (task_numab_enabled(curr))
task_tick_numa(rq, curr);
update_misfit_status(curr, rq);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index d30cca6870f5..1247e4b0c2b0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2269,7 +2269,6 @@ extern struct static_key sched_feat_keys[__SCHED_FEAT_NR];
#endif /* !CONFIG_JUMP_LABEL */
-extern struct static_key_false sched_numa_balancing;
extern struct static_key_false sched_schedstats;
static inline u64 global_rt_period(void)
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index 864811fff409..cb14d557a995 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -7,6 +7,7 @@
#include <linux/memory-tiers.h>
#include <linux/notifier.h>
#include <linux/sched/sysctl.h>
+#include <linux/sched/numa_balancing.h>
#include "internal.h"
@@ -64,7 +65,7 @@ static const struct bus_type memory_tier_subsys = {
*/
bool folio_use_access_time(struct folio *folio)
{
- return (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) &&
+ return (task_numab_mode_tiering()) &&
!node_is_toptier(folio_nid(folio));
}
#endif
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 68a98ba57882..589bf37bc4ee 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -863,8 +863,7 @@ bool folio_can_map_prot_numa(struct folio *folio, struct vm_area_struct *vma,
* Skip scanning top tier node if normal numa
* balancing is disabled
*/
- if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) &&
- node_is_toptier(nid))
+ if (!task_numab_mode_normal() && node_is_toptier(nid))
return false;
if (folio_use_access_time(folio))
diff --git a/mm/migrate.c b/mm/migrate.c
index 5169f9717f60..aa540f4d4cc8 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -41,6 +41,7 @@
#include <linux/ptrace.h>
#include <linux/memory.h>
#include <linux/sched/sysctl.h>
+#include <linux/sched/numa_balancing.h>
#include <linux/memory-tiers.h>
#include <linux/pagewalk.h>
@@ -802,7 +803,7 @@ void folio_migrate_flags(struct folio *newfolio, struct folio *folio)
* memory node, reset cpupid, because that is used to record
* page access time in slow memory node.
*/
- if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) {
+ if (task_numab_mode_tiering()) {
bool f_toptier = node_is_toptier(folio_nid(folio));
bool t_toptier = node_is_toptier(folio_nid(newfolio));
@@ -2685,7 +2686,7 @@ int migrate_misplaced_folio_prepare(struct folio *folio,
if (!migrate_balanced_pgdat(pgdat, nr_pages)) {
int z;
- if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING))
+ if (!task_numab_mode_tiering())
return -EAGAIN;
for (z = pgdat->nr_zones - 1; z >= 0; z--) {
if (managed_zone(pgdat->node_zones + z))
@@ -2737,7 +2738,7 @@ int migrate_misplaced_folio(struct folio *folio, int node)
if (nr_succeeded) {
count_vm_numa_events(NUMA_PAGE_MIGRATE, nr_succeeded);
count_memcg_events(memcg, NUMA_PAGE_MIGRATE, nr_succeeded);
- if ((sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING)
+ if (task_numab_mode_tiering()
&& !node_is_toptier(folio_nid(folio))
&& node_is_toptier(node))
mod_lruvec_state(lruvec, PGPROMOTE_SUCCESS, nr_succeeded);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 670fe9fae5ba..7ee5695326e3 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -65,6 +65,7 @@
#include <linux/swapops.h>
#include <linux/balloon_compaction.h>
#include <linux/sched/sysctl.h>
+#include <linux/sched/numa_balancing.h>
#include "internal.h"
#include "swap.h"
@@ -4843,9 +4844,7 @@ static bool should_abort_scan(struct lruvec *lruvec, struct scan_control *sc)
if (!current_is_kswapd() || sc->order)
return false;
- mark = sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING ?
- WMARK_PROMO : WMARK_HIGH;
-
+ mark = task_numab_mode_tiering() ? WMARK_PROMO : WMARK_HIGH;
for (i = 0; i <= sc->reclaim_idx; i++) {
struct zone *zone = lruvec_pgdat(lruvec)->node_zones + i;
unsigned long size = wmark_pages(zone, mark) + MIN_LRU_BATCH;
@@ -6774,7 +6773,7 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int highest_zoneidx)
enum zone_stat_item item;
unsigned long free_pages;
- if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING)
+ if (task_numab_mode_tiering())
mark = promo_wmark_pages(zone);
else
mark = high_wmark_pages(zone);
--
2.43.5
next prev parent reply other threads:[~2026-01-13 12:13 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-01-13 12:12 [RFC PATCH bpf-next 0/3] BPF-based NUMA balancing Yafang Shao
2026-01-13 12:12 ` Yafang Shao [this message]
2026-01-13 12:42 ` [RFC PATCH bpf-next 1/3] sched: add helpers for numa balancing bot+bpf-ci
2026-01-13 12:48 ` Yafang Shao
2026-01-13 12:12 ` [RFC PATCH bpf-next 2/3] mm: add support for bpf based " Yafang Shao
2026-01-13 12:29 ` bot+bpf-ci
2026-01-13 12:46 ` Yafang Shao
2026-01-13 12:12 ` [RFC PATCH bpf-next 3/3] mm: set numa balancing hot threshold with bpf Yafang Shao
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260113121238.11300-2-laoar.shao@gmail.com \
--to=laoar.shao@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=inwardvessel@gmail.com \
--cc=linux-mm@kvack.org \
--cc=mkoutny@suse.com \
--cc=roman.gushchin@linux.dev \
--cc=shakeel.butt@linux.dev \
--cc=yu.c.chen@intel.com \
--cc=zhao1.liu@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox