From: JP Kobryn <inwardvessel@gmail.com>
To: shakeel.butt@linux.dev, tj@kernel.org, mhocko@kernel.org,
hannes@cmpxchg.org, yosryahmed@google.com,
akpm@linux-foundation.org
Cc: linux-mm@kvack.org, cgroups@vger.kernel.org, kernel-team@meta.com
Subject: [PATCH 05/11] cgroup: separate rstat for bpf cgroups
Date: Mon, 17 Feb 2025 19:14:42 -0800 [thread overview]
Message-ID: <20250218031448.46951-6-inwardvessel@gmail.com> (raw)
In-Reply-To: <20250218031448.46951-1-inwardvessel@gmail.com>
The processing of bpf cgroup stats is tied to the rstat actions of other
subsystems. Make changes to have them updated/flushed independently.
Give the cgroup_bpf struct its own cgroup_rstat instance and define a
new cgroup_rstat_ops instance specifically for the cgroup_bpf. Then
replace the kfunc status of the existing updated/flush api calls with
non-kfunc status. As an alternative, create new updated/flush kfuncs
specifically for bpf cgroups. In these new kfuncs, make use of the
bpf-specific rstat ops to plumb back in to the existing rstat routines.
Where applicable, use pre-processor conditionals to define bpf rstat
related stuff.
Signed-off-by: JP Kobryn <inwardvessel@gmail.com>
---
include/linux/bpf-cgroup-defs.h | 3 +
include/linux/cgroup.h | 3 +
kernel/bpf/cgroup.c | 6 ++
kernel/cgroup/cgroup-internal.h | 5 +
kernel/cgroup/rstat.c | 95 ++++++++++++++++---
.../selftests/bpf/progs/btf_type_tag_percpu.c | 4 +-
.../bpf/progs/cgroup_hierarchical_stats.c | 8 +-
7 files changed, 107 insertions(+), 17 deletions(-)
diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h
index 0985221d5478..e68359f861fb 100644
--- a/include/linux/bpf-cgroup-defs.h
+++ b/include/linux/bpf-cgroup-defs.h
@@ -75,6 +75,9 @@ struct cgroup_bpf {
/* cgroup_bpf is released using a work queue */
struct work_struct release_work;
+
+ /* per-cpu recursive resource statistics */
+ struct cgroup_rstat rstat;
};
#else /* CONFIG_CGROUP_BPF */
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index eec970622419..253ce4bff576 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -836,6 +836,9 @@ static inline bool cgroup_task_frozen(struct task_struct *task)
#endif /* !CONFIG_CGROUPS */
#ifdef CONFIG_CGROUP_BPF
+void bpf_cgroup_rstat_updated(struct cgroup *cgrp, int cpu);
+void bpf_cgroup_rstat_flush(struct cgroup *cgrp);
+
static inline void cgroup_bpf_get(struct cgroup *cgrp)
{
percpu_ref_get(&cgrp->bpf.refcnt);
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 46e5db65dbc8..72bcfdbda6b1 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -210,6 +210,7 @@ void cgroup_bpf_offline(struct cgroup *cgrp)
{
cgroup_get(cgrp);
percpu_ref_kill(&cgrp->bpf.refcnt);
+ bpf_cgroup_rstat_exit(&cgrp->bpf);
}
static void bpf_cgroup_storages_free(struct bpf_cgroup_storage *storages[])
@@ -490,6 +491,10 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
if (ret)
return ret;
+ ret = bpf_cgroup_rstat_init(&cgrp->bpf);
+ if (ret)
+ goto cleanup_ref;
+
for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
cgroup_bpf_get(p);
@@ -513,6 +518,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
cgroup_bpf_put(p);
+cleanup_ref:
percpu_ref_exit(&cgrp->bpf.refcnt);
return -ENOMEM;
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index 87d062baff90..bba1a1794de2 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -274,6 +274,11 @@ void cgroup_rstat_exit(struct cgroup_subsys_state *css);
void cgroup_rstat_boot(void);
void cgroup_base_stat_cputime_show(struct seq_file *seq);
+#ifdef CONFIG_CGROUP_BPF
+int bpf_cgroup_rstat_init(struct cgroup_bpf *bpf);
+void bpf_cgroup_rstat_exit(struct cgroup_bpf *bpf);
+#endif /* CONFIG_CGROUP_BPF */
+
/*
* namespace.c
*/
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index a8bb304e49c4..14dd8217db64 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -73,6 +73,47 @@ static struct cgroup_rstat_ops rstat_css_ops = {
.flush_fn = rstat_flush_via_css,
};
+#ifdef CONFIG_CGROUP_BPF
+__weak noinline void bpf_rstat_flush(struct cgroup *cgrp,
+ struct cgroup *parent, int cpu);
+
+static struct cgroup *rstat_cgroup_via_bpf(struct cgroup_rstat *rstat)
+{
+ struct cgroup_bpf *bpf = container_of(rstat, typeof(*bpf), rstat);
+ struct cgroup *cgrp = container_of(bpf, typeof(*cgrp), bpf);
+
+ return cgrp;
+}
+
+static struct cgroup_rstat *rstat_parent_via_bpf(
+ struct cgroup_rstat *rstat)
+{
+ struct cgroup *cgrp, *cgrp_parent;
+
+ cgrp = rstat_cgroup_via_bpf(rstat);
+ cgrp_parent = cgroup_parent(cgrp);
+ if (!cgrp_parent)
+ return NULL;
+
+ return &(cgrp_parent->bpf.rstat);
+}
+
+static void rstat_flush_via_bpf(struct cgroup_rstat *rstat, int cpu)
+{
+ struct cgroup *cgrp, *cgrp_parent;
+
+ cgrp = rstat_cgroup_via_bpf(rstat);
+ cgrp_parent = cgroup_parent(cgrp);
+ bpf_rstat_flush(cgrp, cgrp_parent, cpu);
+}
+
+static struct cgroup_rstat_ops rstat_bpf_ops = {
+ .parent_fn = rstat_parent_via_bpf,
+ .cgroup_fn = rstat_cgroup_via_bpf,
+ .flush_fn = rstat_flush_via_bpf,
+};
+#endif /* CONFIG_CGROUP_BPF */
+
/*
* Helper functions for rstat per CPU lock (cgroup_rstat_cpu_lock).
*
@@ -187,11 +228,18 @@ static void __cgroup_rstat_updated(struct cgroup_rstat *rstat, int cpu,
* rstat_cpu->updated_children list. See the comment on top of
* cgroup_rstat_cpu definition for details.
*/
-__bpf_kfunc void cgroup_rstat_updated(struct cgroup_subsys_state *css, int cpu)
+void cgroup_rstat_updated(struct cgroup_subsys_state *css, int cpu)
{
__cgroup_rstat_updated(&css->rstat, cpu, &rstat_css_ops);
}
+#ifdef CONFIG_CGROUP_BPF
+__bpf_kfunc void bpf_cgroup_rstat_updated(struct cgroup *cgroup, int cpu)
+{
+ __cgroup_rstat_updated(&(cgroup->bpf.rstat), cpu, &rstat_bpf_ops);
+}
+#endif /* CONFIG_CGROUP_BPF */
+
/**
* cgroup_rstat_push_children - push children cgroups into the given list
* @head: current head of the list (= subtree root)
@@ -330,8 +378,7 @@ static struct cgroup_rstat *cgroup_rstat_updated_list(
__bpf_hook_start();
-__weak noinline void bpf_rstat_flush(struct cgroup *cgrp,
- struct cgroup *parent, int cpu)
+void bpf_rstat_flush(struct cgroup *cgrp, struct cgroup *parent, int cpu)
{
}
@@ -379,12 +426,8 @@ static void cgroup_rstat_flush_locked(struct cgroup_rstat *rstat,
struct cgroup_rstat *pos = cgroup_rstat_updated_list(
rstat, cpu, ops);
- for (; pos; pos = pos->rstat_flush_next) {
- struct cgroup *pos_cgroup = ops->cgroup_fn(pos);
-
+ for (; pos; pos = pos->rstat_flush_next)
ops->flush_fn(pos, cpu);
- bpf_rstat_flush(pos_cgroup, cgroup_parent(pos_cgroup), cpu);
- }
/* play nice and yield if necessary */
if (need_resched() || spin_needbreak(&cgroup_rstat_lock)) {
@@ -424,11 +467,18 @@ static void __cgroup_rstat_flush(struct cgroup_rstat *rstat,
*
* This function may block.
*/
-__bpf_kfunc void cgroup_rstat_flush(struct cgroup_subsys_state *css)
+void cgroup_rstat_flush(struct cgroup_subsys_state *css)
{
__cgroup_rstat_flush(&css->rstat, &rstat_css_ops);
}
+#ifdef CONFIG_CGROUP_BPF
+__bpf_kfunc void bpf_cgroup_rstat_flush(struct cgroup *cgroup)
+{
+ __cgroup_rstat_flush(&(cgroup->bpf.rstat), &rstat_bpf_ops);
+}
+#endif /* CONFIG_CGROUP_BPF */
+
static void __cgroup_rstat_flush_hold(struct cgroup_rstat *rstat,
struct cgroup_rstat_ops *ops)
__acquires(&cgroup_rstat_lock)
@@ -532,6 +582,27 @@ void cgroup_rstat_exit(struct cgroup_subsys_state *css)
__cgroup_rstat_exit(rstat);
}
+#ifdef CONFIG_CGROUP_BPF
+int bpf_cgroup_rstat_init(struct cgroup_bpf *bpf)
+{
+ struct cgroup_rstat *rstat = &bpf->rstat;
+
+ rstat->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu);
+ if (!rstat->rstat_cpu)
+ return -ENOMEM;
+
+ __cgroup_rstat_init(rstat);
+
+ return 0;
+}
+
+void bpf_cgroup_rstat_exit(struct cgroup_bpf *bpf)
+{
+ __cgroup_rstat_flush(&bpf->rstat, &rstat_bpf_ops);
+ __cgroup_rstat_exit(&bpf->rstat);
+}
+#endif /* CONFIG_CGROUP_BPF */
+
void __init cgroup_rstat_boot(void)
{
int cpu;
@@ -754,10 +825,11 @@ void cgroup_base_stat_cputime_show(struct seq_file *seq)
cgroup_force_idle_show(seq, &cgrp->bstat);
}
+#ifdef CONFIG_CGROUP_BPF
/* Add bpf kfuncs for cgroup_rstat_updated() and cgroup_rstat_flush() */
BTF_KFUNCS_START(bpf_rstat_kfunc_ids)
-BTF_ID_FLAGS(func, cgroup_rstat_updated)
-BTF_ID_FLAGS(func, cgroup_rstat_flush, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_cgroup_rstat_updated)
+BTF_ID_FLAGS(func, bpf_cgroup_rstat_flush, KF_SLEEPABLE)
BTF_KFUNCS_END(bpf_rstat_kfunc_ids)
static const struct btf_kfunc_id_set bpf_rstat_kfunc_set = {
@@ -771,3 +843,4 @@ static int __init bpf_rstat_kfunc_init(void)
&bpf_rstat_kfunc_set);
}
late_initcall(bpf_rstat_kfunc_init);
+#endif /* CONFIG_CGROUP_BPF */
diff --git a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
index 310cd51e12e8..da15ada56218 100644
--- a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
+++ b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
@@ -45,7 +45,7 @@ int BPF_PROG(test_percpu2, struct bpf_testmod_btf_type_tag_2 *arg)
SEC("tp_btf/cgroup_mkdir")
int BPF_PROG(test_percpu_load, struct cgroup *cgrp, const char *path)
{
- g = (__u64)cgrp->self.rstat.rstat_cpu->updated_children;
+ g = (__u64)cgrp->bpf.rstat.rstat_cpu->updated_children;
return 0;
}
@@ -57,7 +57,7 @@ int BPF_PROG(test_percpu_helper, struct cgroup *cgrp, const char *path)
cpu = bpf_get_smp_processor_id();
rstat = (struct cgroup_rstat_cpu *)bpf_per_cpu_ptr(
- cgrp->self.rstat.rstat_cpu, cpu);
+ cgrp->bpf.rstat.rstat_cpu, cpu);
if (rstat) {
/* READ_ONCE */
*(volatile int *)rstat;
diff --git a/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
index 10c803c8dc70..24450dd4d3f3 100644
--- a/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
+++ b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
@@ -37,8 +37,8 @@ struct {
__type(value, struct attach_counter);
} attach_counters SEC(".maps");
-extern void cgroup_rstat_updated(struct cgroup_subsys_state *css, int cpu) __ksym;
-extern void cgroup_rstat_flush(struct cgroup_subsys_state *css) __ksym;
+extern void bpf_cgroup_rstat_updated(struct cgroup *cgrp, int cpu) __ksym;
+extern void bpf_cgroup_rstat_flush(struct cgroup *cgrp) __ksym;
static uint64_t cgroup_id(struct cgroup *cgrp)
{
@@ -75,7 +75,7 @@ int BPF_PROG(counter, struct cgroup *dst_cgrp, struct task_struct *leader,
else if (create_percpu_attach_counter(cg_id, 1))
return 0;
- cgroup_rstat_updated(&dst_cgrp->self, bpf_get_smp_processor_id());
+ bpf_cgroup_rstat_updated(dst_cgrp, bpf_get_smp_processor_id());
return 0;
}
@@ -141,7 +141,7 @@ int BPF_PROG(dumper, struct bpf_iter_meta *meta, struct cgroup *cgrp)
return 1;
/* Flush the stats to make sure we get the most updated numbers */
- cgroup_rstat_flush(&cgrp->self);
+ bpf_cgroup_rstat_flush(cgrp);
total_counter = bpf_map_lookup_elem(&attach_counters, &cg_id);
if (!total_counter) {
--
2.48.1
next prev parent reply other threads:[~2025-02-18 3:15 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-02-18 3:14 [PATCH 00/11] cgroup: separate rstat trees JP Kobryn
2025-02-18 3:14 ` [PATCH 01/11] cgroup: move rstat pointers into struct of their own JP Kobryn
2025-02-19 1:05 ` Shakeel Butt
2025-02-19 1:23 ` Shakeel Butt
2025-02-20 16:53 ` Yosry Ahmed
2025-02-24 17:06 ` JP Kobryn
2025-02-24 18:36 ` Yosry Ahmed
2025-02-18 3:14 ` [PATCH 02/11] cgroup: add level of indirection for cgroup_rstat struct JP Kobryn
2025-02-19 2:26 ` Shakeel Butt
2025-02-20 17:08 ` Yosry Ahmed
2025-02-19 5:57 ` kernel test robot
2025-02-18 3:14 ` [PATCH 03/11] cgroup: move cgroup_rstat from cgroup to cgroup_subsys_state JP Kobryn
2025-02-20 17:06 ` Shakeel Butt
2025-02-20 17:22 ` Yosry Ahmed
2025-02-25 19:20 ` JP Kobryn
2025-02-18 3:14 ` [PATCH 04/11] cgroup: introduce cgroup_rstat_ops JP Kobryn
2025-02-19 7:21 ` kernel test robot
2025-02-20 17:50 ` Shakeel Butt
2025-02-18 3:14 ` JP Kobryn [this message]
2025-02-21 18:14 ` [PATCH 05/11] cgroup: separate rstat for bpf cgroups Shakeel Butt
2025-02-18 3:14 ` [PATCH 06/11] cgroup: rstat lock indirection JP Kobryn
2025-02-21 22:09 ` Shakeel Butt
2025-02-18 3:14 ` [PATCH 07/11] cgroup: fetch cpu-specific lock in rstat cpu lock helpers JP Kobryn
2025-02-21 22:35 ` Shakeel Butt
2025-02-18 3:14 ` [PATCH 08/11] cgroup: rstat cpu lock indirection JP Kobryn
2025-02-19 8:48 ` kernel test robot
2025-02-22 0:18 ` Shakeel Butt
2025-02-18 3:14 ` [PATCH 09/11] cgroup: separate rstat locks for bpf cgroups JP Kobryn
2025-02-18 3:14 ` [PATCH 10/11] cgroup: separate rstat locks for subsystems JP Kobryn
2025-02-22 0:23 ` Shakeel Butt
2025-02-18 3:14 ` [PATCH 11/11] cgroup: separate rstat list pointers from base stats JP Kobryn
2025-02-22 0:28 ` Shakeel Butt
2025-02-20 15:51 ` [PATCH 00/11] cgroup: separate rstat trees Tejun Heo
2025-02-27 23:44 ` JP Kobryn
2025-02-20 17:26 ` Yosry Ahmed
2025-02-20 17:53 ` Shakeel Butt
2025-02-20 17:59 ` Yosry Ahmed
2025-02-20 18:14 ` JP Kobryn
2025-02-20 20:04 ` Yosry Ahmed
2025-02-20 20:22 ` Yosry Ahmed
2025-02-24 21:13 ` Shakeel Butt
2025-02-24 21:54 ` Yosry Ahmed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250218031448.46951-6-inwardvessel@gmail.com \
--to=inwardvessel@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=cgroups@vger.kernel.org \
--cc=hannes@cmpxchg.org \
--cc=kernel-team@meta.com \
--cc=linux-mm@kvack.org \
--cc=mhocko@kernel.org \
--cc=shakeel.butt@linux.dev \
--cc=tj@kernel.org \
--cc=yosryahmed@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox