From: inwardvessel <inwardvessel@gmail.com>
To: tj@kernel.org, shakeel.butt@linux.dev, yosryahmed@google.com,
mhocko@kernel.org, hannes@cmpxchg.org, akpm@linux-foundation.org
Cc: linux-mm@kvack.org, cgroups@vger.kernel.org, kernel-team@meta.com
Subject: [PATCH 3/4 v2] cgroup: separate rstat locks for subsystems
Date: Thu, 27 Feb 2025 13:55:42 -0800 [thread overview]
Message-ID: <20250227215543.49928-4-inwardvessel@gmail.com> (raw)
In-Reply-To: <20250227215543.49928-1-inwardvessel@gmail.com>
From: JP Kobryn <inwardvessel@gmail.com>
Let the existing locks be dedicated to the base stats and rename them as
such. Also add new rstat locks for each enabled subsystem. When handling
cgroup subsystem states, distinguish between formal subsystems (memory,
io, etc) and the base stats subsystem state (represented by
cgroup::self) to decide on which locks to take. This change is made to
prevent contention between subsystems when updating/flushing stats.
Signed-off-by: JP Kobryn <inwardvessel@gmail.com>
---
kernel/cgroup/rstat.c | 93 +++++++++++++++++++++++++++++++++----------
1 file changed, 72 insertions(+), 21 deletions(-)
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index 88908ef9212d..b3eaefc1fd07 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -9,8 +9,12 @@
#include <trace/events/cgroup.h>
-static DEFINE_SPINLOCK(cgroup_rstat_lock);
-static DEFINE_PER_CPU(raw_spinlock_t, cgroup_rstat_cpu_lock);
+static DEFINE_SPINLOCK(cgroup_rstat_base_lock);
+static DEFINE_PER_CPU(raw_spinlock_t, cgroup_rstat_base_cpu_lock);
+
+static spinlock_t cgroup_rstat_subsys_lock[CGROUP_SUBSYS_COUNT];
+static DEFINE_PER_CPU(raw_spinlock_t,
+ cgroup_rstat_subsys_cpu_lock[CGROUP_SUBSYS_COUNT]);
static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu);
@@ -20,8 +24,13 @@ static struct cgroup_rstat_cpu *cgroup_rstat_cpu(
return per_cpu_ptr(css->rstat_cpu, cpu);
}
+static inline bool is_base_css(struct cgroup_subsys_state *css)
+{
+ return css->ss == NULL;
+}
+
/*
- * Helper functions for rstat per CPU lock (cgroup_rstat_cpu_lock).
+ * Helper functions for rstat per CPU locks.
*
* This makes it easier to diagnose locking issues and contention in
* production environments. The parameter @fast_path determine the
@@ -36,12 +45,12 @@ unsigned long _cgroup_rstat_cpu_lock(raw_spinlock_t *cpu_lock, int cpu,
bool contended;
/*
- * The _irqsave() is needed because cgroup_rstat_lock is
- * spinlock_t which is a sleeping lock on PREEMPT_RT. Acquiring
- * this lock with the _irq() suffix only disables interrupts on
- * a non-PREEMPT_RT kernel. The raw_spinlock_t below disables
- * interrupts on both configurations. The _irqsave() ensures
- * that interrupts are always disabled and later restored.
+ * The _irqsave() is needed because the locks used for flushing are
+ * spinlock_t which is a sleeping lock on PREEMPT_RT. Acquiring this lock
+ * with the _irq() suffix only disables interrupts on a non-PREEMPT_RT
+ * kernel. The raw_spinlock_t below disables interrupts on both
+ * configurations. The _irqsave() ensures that interrupts are always
+ * disabled and later restored.
*/
contended = !raw_spin_trylock_irqsave(cpu_lock, flags);
if (contended) {
@@ -87,7 +96,7 @@ __bpf_kfunc void cgroup_rstat_updated(
struct cgroup_subsys_state *css, int cpu)
{
struct cgroup *cgrp = css->cgroup;
- raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu);
+ raw_spinlock_t *cpu_lock;
unsigned long flags;
/*
@@ -101,6 +110,12 @@ __bpf_kfunc void cgroup_rstat_updated(
if (data_race(cgroup_rstat_cpu(css, cpu)->updated_next))
return;
+ if (is_base_css(css))
+ cpu_lock = per_cpu_ptr(&cgroup_rstat_base_cpu_lock, cpu);
+ else
+ cpu_lock = per_cpu_ptr(cgroup_rstat_subsys_cpu_lock, cpu) +
+ css->ss->id;
+
flags = _cgroup_rstat_cpu_lock(cpu_lock, cpu, cgrp, true);
/* put @css and all ancestors on the corresponding updated lists */
@@ -208,11 +223,17 @@ static struct cgroup_subsys_state *cgroup_rstat_updated_list(
struct cgroup_subsys_state *root, int cpu)
{
struct cgroup *cgrp = root->cgroup;
- raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu);
struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(root, cpu);
struct cgroup_subsys_state *head = NULL, *parent, *child;
+ raw_spinlock_t *cpu_lock;
unsigned long flags;
+ if (is_base_css(root))
+ cpu_lock = per_cpu_ptr(&cgroup_rstat_base_cpu_lock, cpu);
+ else
+ cpu_lock = per_cpu_ptr(cgroup_rstat_subsys_cpu_lock, cpu) +
+ root->ss->id;
+
flags = _cgroup_rstat_cpu_lock(cpu_lock, cpu, cgrp, false);
/* Return NULL if this subtree is not on-list */
@@ -315,7 +336,7 @@ static void cgroup_rstat_flush_locked(struct cgroup_subsys_state *css,
struct cgroup *cgrp = css->cgroup;
int cpu;
- lockdep_assert_held(&cgroup_rstat_lock);
+ lockdep_assert_held(&lock);
for_each_possible_cpu(cpu) {
struct cgroup_subsys_state *pos;
@@ -356,12 +377,18 @@ static void cgroup_rstat_flush_locked(struct cgroup_subsys_state *css,
__bpf_kfunc void cgroup_rstat_flush(struct cgroup_subsys_state *css)
{
struct cgroup *cgrp = css->cgroup;
+ spinlock_t *lock;
+
+ if (is_base_css(css))
+ lock = &cgroup_rstat_base_lock;
+ else
+ lock = &cgroup_rstat_subsys_lock[css->ss->id];
might_sleep();
- __cgroup_rstat_lock(&cgroup_rstat_lock, cgrp, -1);
- cgroup_rstat_flush_locked(css, &cgroup_rstat_lock);
- __cgroup_rstat_unlock(&cgroup_rstat_lock, cgrp, -1);
+ __cgroup_rstat_lock(lock, cgrp, -1);
+ cgroup_rstat_flush_locked(css, lock);
+ __cgroup_rstat_unlock(lock, cgrp, -1);
}
/**
@@ -376,10 +403,16 @@ __bpf_kfunc void cgroup_rstat_flush(struct cgroup_subsys_state *css)
void cgroup_rstat_flush_hold(struct cgroup_subsys_state *css)
{
struct cgroup *cgrp = css->cgroup;
+ spinlock_t *lock;
+
+ if (is_base_css(css))
+ lock = &cgroup_rstat_base_lock;
+ else
+ lock = &cgroup_rstat_subsys_lock[css->ss->id];
might_sleep();
- __cgroup_rstat_lock(&cgroup_rstat_lock, cgrp, -1);
- cgroup_rstat_flush_locked(css, &cgroup_rstat_lock);
+ __cgroup_rstat_lock(lock, cgrp, -1);
+ cgroup_rstat_flush_locked(css, lock);
}
/**
@@ -389,7 +422,14 @@ void cgroup_rstat_flush_hold(struct cgroup_subsys_state *css)
void cgroup_rstat_flush_release(struct cgroup_subsys_state *css)
{
struct cgroup *cgrp = css->cgroup;
- __cgroup_rstat_unlock(&cgroup_rstat_lock, cgrp, -1);
+ spinlock_t *lock;
+
+ if (is_base_css(css))
+ lock = &cgroup_rstat_base_lock;
+ else
+ lock = &cgroup_rstat_subsys_lock[css->ss->id];
+
+ __cgroup_rstat_unlock(lock, cgrp, -1);
}
int cgroup_rstat_init(struct cgroup_subsys_state *css)
@@ -435,10 +475,21 @@ void cgroup_rstat_exit(struct cgroup_subsys_state *css)
void __init cgroup_rstat_boot(void)
{
- int cpu;
+ struct cgroup_subsys *ss;
+ int cpu, ssid;
- for_each_possible_cpu(cpu)
- raw_spin_lock_init(per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu));
+ for_each_subsys(ss, ssid) {
+ spin_lock_init(&cgroup_rstat_subsys_lock[ssid]);
+ }
+
+ for_each_possible_cpu(cpu) {
+ raw_spin_lock_init(per_cpu_ptr(&cgroup_rstat_base_cpu_lock, cpu));
+
+ for_each_subsys(ss, ssid) {
+ raw_spin_lock_init(
+ per_cpu_ptr(cgroup_rstat_subsys_cpu_lock, cpu) + ssid);
+ }
+ }
}
/*
--
2.43.5
next prev parent reply other threads:[~2025-02-27 21:56 UTC|newest]
Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-02-27 21:55 [PATCH 0/4 v2] cgroup: separate rstat trees inwardvessel
2025-02-27 21:55 ` [PATCH 1/4 v2] cgroup: move cgroup_rstat from cgroup to cgroup_subsys_state inwardvessel
2025-02-27 22:43 ` Shakeel Butt
2025-02-28 19:04 ` Yosry Ahmed
2025-03-01 1:06 ` JP Kobryn
2025-03-01 1:25 ` Yosry Ahmed
2025-03-01 1:30 ` JP Kobryn
2025-03-03 18:18 ` Shakeel Butt
2025-03-03 18:21 ` Yosry Ahmed
2025-03-03 15:20 ` Michal Koutný
2025-03-03 19:31 ` JP Kobryn
2025-02-27 21:55 ` [PATCH 2/4 v2] cgroup: rstat lock indirection inwardvessel
2025-03-03 15:21 ` Michal Koutný
2025-02-27 21:55 ` inwardvessel [this message]
2025-02-27 22:52 ` [PATCH 3/4 v2] cgroup: separate rstat locks for subsystems Shakeel Butt
2025-02-28 16:07 ` JP Kobryn
2025-02-28 17:37 ` JP Kobryn
2025-02-28 19:20 ` Yosry Ahmed
2025-03-06 21:47 ` JP Kobryn
2025-03-01 23:00 ` kernel test robot
2025-03-03 15:22 ` Michal Koutný
2025-03-03 18:29 ` Yosry Ahmed
2025-03-03 18:40 ` Shakeel Butt
2025-03-03 19:23 ` JP Kobryn
2025-03-03 19:39 ` Shakeel Butt
2025-03-03 19:50 ` Yosry Ahmed
2025-03-03 20:09 ` Shakeel Butt
2025-03-03 18:49 ` Michal Koutný
2025-03-10 17:59 ` JP Kobryn
2025-03-11 13:49 ` Michal Koutný
2025-03-06 21:36 ` JP Kobryn
2025-03-03 23:49 ` kernel test robot
2025-02-27 21:55 ` [PATCH 4/4 v2] cgroup: separate rstat list pointers from base stats inwardvessel
2025-02-27 23:01 ` Shakeel Butt
2025-02-28 20:33 ` Yosry Ahmed
2025-02-28 18:22 ` [PATCH 0/4 v2] cgroup: separate rstat trees Yosry Ahmed
2025-03-03 15:19 ` Michal Koutný
2025-03-06 1:07 ` JP Kobryn
2025-03-11 13:49 ` Michal Koutný
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250227215543.49928-4-inwardvessel@gmail.com \
--to=inwardvessel@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=cgroups@vger.kernel.org \
--cc=hannes@cmpxchg.org \
--cc=kernel-team@meta.com \
--cc=linux-mm@kvack.org \
--cc=mhocko@kernel.org \
--cc=shakeel.butt@linux.dev \
--cc=tj@kernel.org \
--cc=yosryahmed@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox