linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Qiliang Yuan <realwujing@gmail.com>
To: "Ingo Molnar" <mingo@redhat.com>,
	"Peter Zijlstra" <peterz@infradead.org>,
	"Juri Lelli" <juri.lelli@redhat.com>,
	"Vincent Guittot" <vincent.guittot@linaro.org>,
	"Dietmar Eggemann" <dietmar.eggemann@arm.com>,
	"Steven Rostedt" <rostedt@goodmis.org>,
	"Ben Segall" <bsegall@google.com>, "Mel Gorman" <mgorman@suse.de>,
	"Valentin Schneider" <vschneid@redhat.com>,
	"Paul E. McKenney" <paulmck@kernel.org>,
	"Frederic Weisbecker" <frederic@kernel.org>,
	"Neeraj Upadhyay" <neeraj.upadhyay@kernel.org>,
	"Joel Fernandes" <joelagnelf@nvidia.com>,
	"Josh Triplett" <josh@joshtriplett.org>,
	"Boqun Feng" <boqun@kernel.org>,
	"Uladzislau Rezki" <urezki@gmail.com>,
	"Mathieu Desnoyers" <mathieu.desnoyers@efficios.com>,
	"Lai Jiangshan" <jiangshanlai@gmail.com>,
	Zqiang <qiang.zhang@linux.dev>,
	"Anna-Maria Behnsen" <anna-maria@linutronix.de>,
	"Ingo Molnar" <mingo@kernel.org>,
	"Thomas Gleixner" <tglx@kernel.org>, "Tejun Heo" <tj@kernel.org>,
	"Andrew Morton" <akpm@linux-foundation.org>,
	"Vlastimil Babka" <vbabka@kernel.org>,
	"Suren Baghdasaryan" <surenb@google.com>,
	"Michal Hocko" <mhocko@suse.com>,
	"Brendan Jackman" <jackmanb@google.com>,
	"Johannes Weiner" <hannes@cmpxchg.org>, "Zi Yan" <ziy@nvidia.com>,
	"Waiman Long" <longman@redhat.com>,
	"Chen Ridong" <chenridong@huaweicloud.com>,
	"Michal Koutný" <mkoutny@suse.com>,
	"Jonathan Corbet" <corbet@lwn.net>,
	"Shuah Khan" <skhan@linuxfoundation.org>,
	"Shuah Khan" <shuah@kernel.org>
Cc: linux-kernel@vger.kernel.org, rcu@vger.kernel.org,
	linux-mm@kvack.org,  cgroups@vger.kernel.org,
	linux-doc@vger.kernel.org,  linux-kselftest@vger.kernel.org,
	Qiliang Yuan <realwujing@gmail.com>
Subject: [PATCH v2 10/12] cgroup/cpuset: Implement SMT-aware grouping and safety guards
Date: Mon, 13 Apr 2026 15:43:16 +0800	[thread overview]
Message-ID: <20260413-wujing-dhm-v2-10-06df21caba5d@gmail.com> (raw)
In-Reply-To: <20260413-wujing-dhm-v2-0-06df21caba5d@gmail.com>

Dynamic Housekeeping Management allows runtime configuration of kernel overhead
isolation boundaries. However, configuring CPUMASKs that separate SMT siblings
(e.g., placing one hardware thread in the housekeeping mask and leaving
the other isolated) can lead to severe performance degradation due to
shared L1 caches and pipeline resources.

This patch introduces `cpuset.housekeeping.smt_aware`, a robust safety guard
to prevent user-space from splitting SMT sibling pairs across isolation boundaries.

When `cpuset.housekeeping.smt_aware` is enabled (1):
- Any write to `cpuset.housekeeping.cpus` must include all SMT siblings
  for each CPU presented in the new mask (verified via `topology_sibling_cpumask`).
- If an invalid mask is supplied, the write operation is aborted with `-EINVAL`.

This ensures the kernel's housekeeping constraints are met while maintaining
maximum hardware thread efficiency.

Signed-off-by: Qiliang Yuan <realwujing@gmail.com>
---
 kernel/cgroup/cpuset-internal.h |  1 +
 kernel/cgroup/cpuset.c          | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/kernel/cgroup/cpuset-internal.h b/kernel/cgroup/cpuset-internal.h
index 3ab437f54ecdf..162594eaf8467 100644
--- a/kernel/cgroup/cpuset-internal.h
+++ b/kernel/cgroup/cpuset-internal.h
@@ -61,6 +61,7 @@ typedef enum {
 	FILE_EFFECTIVE_XCPULIST,
 	FILE_ISOLATED_CPULIST,
 	FILE_HOUSEKEEPING_CPULIST,
+	FILE_HOUSEKEEPING_SMT_AWARE,
 	FILE_CPU_EXCLUSIVE,
 	FILE_MEM_EXCLUSIVE,
 	FILE_MEM_HARDWALL,
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 5df19dc9bfa89..4272bb298ec3d 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -37,6 +37,7 @@
 #include <linux/wait.h>
 #include <linux/workqueue.h>
 #include <linux/task_work.h>
+#include <linux/topology.h>
 
 DEFINE_STATIC_KEY_FALSE(cpusets_pre_enable_key);
 DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key);
@@ -156,6 +157,9 @@ static bool		update_housekeeping;	/* RWCS */
  */
 static cpumask_var_t	isolated_hk_cpus;	/* T */
 
+/* DHM: Enable SMT-aware boundary checks */
+static bool cpuset_housekeeping_smt_aware = false;
+
 /*
  * A flag to force sched domain rebuild at the end of an operation.
  * It can be set in
@@ -3218,6 +3222,16 @@ static ssize_t cpuset_write_housekeeping_cpus(struct kernfs_open_file *of,
 	if (retval)
 		goto out_free;
 
+	if (cpuset_housekeeping_smt_aware) {
+		int cpu;
+		for_each_cpu(cpu, new_mask) {
+			if (!cpumask_subset(topology_sibling_cpumask(cpu), new_mask)) {
+				retval = -EINVAL;
+				goto out_free;
+			}
+		}
+	}
+
 	retval = housekeeping_update_all_types(new_mask);
 
 out_free:
@@ -3225,6 +3239,18 @@ static ssize_t cpuset_write_housekeeping_cpus(struct kernfs_open_file *of,
 	return retval ?: nbytes;
 }
 
+static ssize_t cpuset_write_housekeeping_smt_aware(struct kernfs_open_file *of,
+						   char *buf, size_t nbytes, loff_t off)
+{
+	bool val;
+
+	if (kstrtobool(buf, &val))
+		return -EINVAL;
+
+	cpuset_housekeeping_smt_aware = val;
+	return nbytes;
+}
+
 /*
  * Common handling for a write to a "cpus" or "mems" file.
  */
@@ -3317,6 +3343,9 @@ int cpuset_common_seq_show(struct seq_file *sf, void *v)
 	case FILE_HOUSEKEEPING_CPULIST:
 		seq_printf(sf, "%*pbl\n", cpumask_pr_args(housekeeping_cpumask(HK_TYPE_DOMAIN)));
 		break;
+	case FILE_HOUSEKEEPING_SMT_AWARE:
+		seq_printf(sf, "%d\n", cpuset_housekeeping_smt_aware);
+		break;
 	default:
 		ret = -EINVAL;
 	}
@@ -3464,6 +3493,14 @@ static struct cftype dfl_files[] = {
 		.flags = CFTYPE_ONLY_ON_ROOT,
 	},
 
+	{
+		.name = "housekeeping.smt_aware",
+		.seq_show = cpuset_common_seq_show,
+		.write = cpuset_write_housekeeping_smt_aware,
+		.private = FILE_HOUSEKEEPING_SMT_AWARE,
+		.flags = CFTYPE_ONLY_ON_ROOT,
+	},
+
 	{ }	/* terminate */
 };
 

-- 
2.43.0



  parent reply	other threads:[~2026-04-13  7:45 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-13  7:43 [PATCH v2 00/12] Dynamic Housekeeping Management (DHM) via CPUSets Qiliang Yuan
2026-04-13  7:43 ` [PATCH v2 02/12] sched/isolation: Introduce housekeeping notifier infrastructure Qiliang Yuan
2026-04-13  7:43 ` [PATCH v2 03/12] rcu: Support runtime NOCB initialization and dynamic offloading Qiliang Yuan
2026-04-13  7:43 ` [PATCH v2 04/12] tick/nohz: Transition to dynamic full dynticks state management Qiliang Yuan
2026-04-13  7:43 ` [PATCH v2 05/12] genirq: Support dynamic migration for managed interrupts Qiliang Yuan
2026-04-13  7:43 ` [PATCH v2 06/12] watchdog: Allow runtime toggle of lockup detector affinity Qiliang Yuan
2026-04-13  7:43 ` [PATCH v2 07/12] sched/core: Dynamically update scheduler domain housekeeping mask Qiliang Yuan
2026-04-13  7:43 ` [PATCH v2 08/12] workqueue, mm: Support dynamic housekeeping mask updates Qiliang Yuan
2026-04-13  7:43 ` [PATCH v2 09/12] cgroup/cpuset: Introduce CPUSet-driven dynamic housekeeping (DHM) Qiliang Yuan
2026-04-13  7:43 ` Qiliang Yuan [this message]
2026-04-13  7:43 ` [PATCH v2 11/12] Documentation: cgroup-v2: Document " Qiliang Yuan
2026-04-13  7:43 ` [PATCH v2 12/12] selftests: cgroup: Add functional tests for dynamic housekeeping Qiliang Yuan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260413-wujing-dhm-v2-10-06df21caba5d@gmail.com \
    --to=realwujing@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=anna-maria@linutronix.de \
    --cc=boqun@kernel.org \
    --cc=bsegall@google.com \
    --cc=cgroups@vger.kernel.org \
    --cc=chenridong@huaweicloud.com \
    --cc=corbet@lwn.net \
    --cc=dietmar.eggemann@arm.com \
    --cc=frederic@kernel.org \
    --cc=hannes@cmpxchg.org \
    --cc=jackmanb@google.com \
    --cc=jiangshanlai@gmail.com \
    --cc=joelagnelf@nvidia.com \
    --cc=josh@joshtriplett.org \
    --cc=juri.lelli@redhat.com \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=longman@redhat.com \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=mgorman@suse.de \
    --cc=mhocko@suse.com \
    --cc=mingo@kernel.org \
    --cc=mingo@redhat.com \
    --cc=mkoutny@suse.com \
    --cc=neeraj.upadhyay@kernel.org \
    --cc=paulmck@kernel.org \
    --cc=peterz@infradead.org \
    --cc=qiang.zhang@linux.dev \
    --cc=rcu@vger.kernel.org \
    --cc=rostedt@goodmis.org \
    --cc=shuah@kernel.org \
    --cc=skhan@linuxfoundation.org \
    --cc=surenb@google.com \
    --cc=tglx@kernel.org \
    --cc=tj@kernel.org \
    --cc=urezki@gmail.com \
    --cc=vbabka@kernel.org \
    --cc=vincent.guittot@linaro.org \
    --cc=vschneid@redhat.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox