[PATCH 05/11] cgroup: separate rstat for bpf cgroups

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: JP Kobryn <inwardvessel@gmail.com>
To: shakeel.butt@linux.dev, tj@kernel.org, mhocko@kernel.org,
	hannes@cmpxchg.org, yosryahmed@google.com,
	akpm@linux-foundation.org
Cc: linux-mm@kvack.org, cgroups@vger.kernel.org, kernel-team@meta.com
Subject: [PATCH 05/11] cgroup: separate rstat for bpf cgroups
Date: Mon, 17 Feb 2025 19:14:42 -0800	[thread overview]
Message-ID: <20250218031448.46951-6-inwardvessel@gmail.com> (raw)
In-Reply-To: <20250218031448.46951-1-inwardvessel@gmail.com>

The processing of bpf cgroup stats is tied to the rstat actions of other
subsystems. Make changes to have them updated/flushed independently.
Give the cgroup_bpf struct its own cgroup_rstat instance and define a
new cgroup_rstat_ops instance specifically for the cgroup_bpf. Then
replace the kfunc status of the existing updated/flush api calls with
non-kfunc status. As an alternative, create new updated/flush kfuncs
specifically for bpf cgroups. In these new kfuncs, make use of the
bpf-specific rstat ops to plumb back in to the existing rstat routines.
Where applicable, use pre-processor conditionals to define bpf rstat
related stuff.

Signed-off-by: JP Kobryn <inwardvessel@gmail.com>
---
 include/linux/bpf-cgroup-defs.h               |  3 +
 include/linux/cgroup.h                        |  3 +
 kernel/bpf/cgroup.c                           |  6 ++
 kernel/cgroup/cgroup-internal.h               |  5 +
 kernel/cgroup/rstat.c                         | 95 ++++++++++++++++---
 .../selftests/bpf/progs/btf_type_tag_percpu.c |  4 +-
 .../bpf/progs/cgroup_hierarchical_stats.c     |  8 +-
 7 files changed, 107 insertions(+), 17 deletions(-)

diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h
index 0985221d5478..e68359f861fb 100644
--- a/include/linux/bpf-cgroup-defs.h
+++ b/include/linux/bpf-cgroup-defs.h
@@ -75,6 +75,9 @@ struct cgroup_bpf {
 
 	/* cgroup_bpf is released using a work queue */
 	struct work_struct release_work;
+
+	/* per-cpu recursive resource statistics */
+	struct cgroup_rstat rstat;
 };
 
 #else /* CONFIG_CGROUP_BPF */
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index eec970622419..253ce4bff576 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -836,6 +836,9 @@ static inline bool cgroup_task_frozen(struct task_struct *task)
 #endif /* !CONFIG_CGROUPS */
 
 #ifdef CONFIG_CGROUP_BPF
+void bpf_cgroup_rstat_updated(struct cgroup *cgrp, int cpu);
+void bpf_cgroup_rstat_flush(struct cgroup *cgrp);
+
 static inline void cgroup_bpf_get(struct cgroup *cgrp)
 {
 	percpu_ref_get(&cgrp->bpf.refcnt);
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 46e5db65dbc8..72bcfdbda6b1 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -210,6 +210,7 @@ void cgroup_bpf_offline(struct cgroup *cgrp)
 {
 	cgroup_get(cgrp);
 	percpu_ref_kill(&cgrp->bpf.refcnt);
+	bpf_cgroup_rstat_exit(&cgrp->bpf);
 }
 
 static void bpf_cgroup_storages_free(struct bpf_cgroup_storage *storages[])
@@ -490,6 +491,10 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
 	if (ret)
 		return ret;
 
+	ret = bpf_cgroup_rstat_init(&cgrp->bpf);
+	if (ret)
+		goto cleanup_ref;
+
 	for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
 		cgroup_bpf_get(p);
 
@@ -513,6 +518,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
 	for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
 		cgroup_bpf_put(p);
 
+cleanup_ref:
 	percpu_ref_exit(&cgrp->bpf.refcnt);
 
 	return -ENOMEM;
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index 87d062baff90..bba1a1794de2 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -274,6 +274,11 @@ void cgroup_rstat_exit(struct cgroup_subsys_state *css);
 void cgroup_rstat_boot(void);
 void cgroup_base_stat_cputime_show(struct seq_file *seq);
 
+#ifdef CONFIG_CGROUP_BPF
+int bpf_cgroup_rstat_init(struct cgroup_bpf *bpf);
+void bpf_cgroup_rstat_exit(struct cgroup_bpf *bpf);
+#endif /* CONFIG_CGROUP_BPF */
+
 /*
  * namespace.c
  */
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index a8bb304e49c4..14dd8217db64 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -73,6 +73,47 @@ static struct cgroup_rstat_ops rstat_css_ops = {
 	.flush_fn = rstat_flush_via_css,
 };
 
+#ifdef CONFIG_CGROUP_BPF
+__weak noinline void bpf_rstat_flush(struct cgroup *cgrp,
+				     struct cgroup *parent, int cpu);
+
+static struct cgroup *rstat_cgroup_via_bpf(struct cgroup_rstat *rstat)
+{
+	struct cgroup_bpf *bpf = container_of(rstat, typeof(*bpf), rstat);
+	struct cgroup *cgrp = container_of(bpf, typeof(*cgrp), bpf);
+
+	return cgrp;
+}
+
+static struct cgroup_rstat *rstat_parent_via_bpf(
+		struct cgroup_rstat *rstat)
+{
+	struct cgroup *cgrp, *cgrp_parent;
+
+	cgrp = rstat_cgroup_via_bpf(rstat);
+	cgrp_parent = cgroup_parent(cgrp);
+	if (!cgrp_parent)
+		return NULL;
+
+	return &(cgrp_parent->bpf.rstat);
+}
+
+static void rstat_flush_via_bpf(struct cgroup_rstat *rstat, int cpu)
+{
+	struct cgroup *cgrp, *cgrp_parent;
+
+	cgrp = rstat_cgroup_via_bpf(rstat);
+	cgrp_parent = cgroup_parent(cgrp);
+	bpf_rstat_flush(cgrp, cgrp_parent, cpu);
+}
+
+static struct cgroup_rstat_ops rstat_bpf_ops = {
+	.parent_fn = rstat_parent_via_bpf,
+	.cgroup_fn = rstat_cgroup_via_bpf,
+	.flush_fn = rstat_flush_via_bpf,
+};
+#endif /* CONFIG_CGROUP_BPF */
+
 /*
  * Helper functions for rstat per CPU lock (cgroup_rstat_cpu_lock).
  *
@@ -187,11 +228,18 @@ static void __cgroup_rstat_updated(struct cgroup_rstat *rstat, int cpu,
  * rstat_cpu->updated_children list.  See the comment on top of
  * cgroup_rstat_cpu definition for details.
  */
-__bpf_kfunc void cgroup_rstat_updated(struct cgroup_subsys_state *css, int cpu)
+void cgroup_rstat_updated(struct cgroup_subsys_state *css, int cpu)
 {
 	__cgroup_rstat_updated(&css->rstat, cpu, &rstat_css_ops);
 }
 
+#ifdef CONFIG_CGROUP_BPF
+__bpf_kfunc void bpf_cgroup_rstat_updated(struct cgroup *cgroup, int cpu)
+{
+	__cgroup_rstat_updated(&(cgroup->bpf.rstat), cpu, &rstat_bpf_ops);
+}
+#endif /* CONFIG_CGROUP_BPF */
+
 /**
  * cgroup_rstat_push_children - push children cgroups into the given list
  * @head: current head of the list (= subtree root)
@@ -330,8 +378,7 @@ static struct cgroup_rstat *cgroup_rstat_updated_list(
 
 __bpf_hook_start();
 
-__weak noinline void bpf_rstat_flush(struct cgroup *cgrp,
-				     struct cgroup *parent, int cpu)
+void bpf_rstat_flush(struct cgroup *cgrp, struct cgroup *parent, int cpu)
 {
 }
 
@@ -379,12 +426,8 @@ static void cgroup_rstat_flush_locked(struct cgroup_rstat *rstat,
 		struct cgroup_rstat *pos = cgroup_rstat_updated_list(
 				rstat, cpu, ops);
 
-		for (; pos; pos = pos->rstat_flush_next) {
-			struct cgroup *pos_cgroup = ops->cgroup_fn(pos);
-
+		for (; pos; pos = pos->rstat_flush_next)
 			ops->flush_fn(pos, cpu);
-			bpf_rstat_flush(pos_cgroup, cgroup_parent(pos_cgroup), cpu);
-		}
 
 		/* play nice and yield if necessary */
 		if (need_resched() || spin_needbreak(&cgroup_rstat_lock)) {
@@ -424,11 +467,18 @@ static void __cgroup_rstat_flush(struct cgroup_rstat *rstat,
  *
  * This function may block.
  */
-__bpf_kfunc void cgroup_rstat_flush(struct cgroup_subsys_state *css)
+void cgroup_rstat_flush(struct cgroup_subsys_state *css)
 {
 	__cgroup_rstat_flush(&css->rstat, &rstat_css_ops);
 }
 
+#ifdef CONFIG_CGROUP_BPF
+__bpf_kfunc void bpf_cgroup_rstat_flush(struct cgroup *cgroup)
+{
+	__cgroup_rstat_flush(&(cgroup->bpf.rstat), &rstat_bpf_ops);
+}
+#endif /* CONFIG_CGROUP_BPF */
+
 static void __cgroup_rstat_flush_hold(struct cgroup_rstat *rstat,
 		struct cgroup_rstat_ops *ops)
 	__acquires(&cgroup_rstat_lock)
@@ -532,6 +582,27 @@ void cgroup_rstat_exit(struct cgroup_subsys_state *css)
 	__cgroup_rstat_exit(rstat);
 }
 
+#ifdef CONFIG_CGROUP_BPF
+int bpf_cgroup_rstat_init(struct cgroup_bpf *bpf)
+{
+	struct cgroup_rstat *rstat = &bpf->rstat;
+
+	rstat->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu);
+	if (!rstat->rstat_cpu)
+		return -ENOMEM;
+
+	__cgroup_rstat_init(rstat);
+
+	return 0;
+}
+
+void bpf_cgroup_rstat_exit(struct cgroup_bpf *bpf)
+{
+	__cgroup_rstat_flush(&bpf->rstat, &rstat_bpf_ops);
+	__cgroup_rstat_exit(&bpf->rstat);
+}
+#endif /* CONFIG_CGROUP_BPF */
+
 void __init cgroup_rstat_boot(void)
 {
 	int cpu;
@@ -754,10 +825,11 @@ void cgroup_base_stat_cputime_show(struct seq_file *seq)
 	cgroup_force_idle_show(seq, &cgrp->bstat);
 }
 
+#ifdef CONFIG_CGROUP_BPF
 /* Add bpf kfuncs for cgroup_rstat_updated() and cgroup_rstat_flush() */
 BTF_KFUNCS_START(bpf_rstat_kfunc_ids)
-BTF_ID_FLAGS(func, cgroup_rstat_updated)
-BTF_ID_FLAGS(func, cgroup_rstat_flush, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_cgroup_rstat_updated)
+BTF_ID_FLAGS(func, bpf_cgroup_rstat_flush, KF_SLEEPABLE)
 BTF_KFUNCS_END(bpf_rstat_kfunc_ids)
 
 static const struct btf_kfunc_id_set bpf_rstat_kfunc_set = {
@@ -771,3 +843,4 @@ static int __init bpf_rstat_kfunc_init(void)
 					 &bpf_rstat_kfunc_set);
 }
 late_initcall(bpf_rstat_kfunc_init);
+#endif /* CONFIG_CGROUP_BPF */
diff --git a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
index 310cd51e12e8..da15ada56218 100644
--- a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
+++ b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
@@ -45,7 +45,7 @@ int BPF_PROG(test_percpu2, struct bpf_testmod_btf_type_tag_2 *arg)
 SEC("tp_btf/cgroup_mkdir")
 int BPF_PROG(test_percpu_load, struct cgroup *cgrp, const char *path)
 {
-	g = (__u64)cgrp->self.rstat.rstat_cpu->updated_children;
+	g = (__u64)cgrp->bpf.rstat.rstat_cpu->updated_children;
 	return 0;
 }
 
@@ -57,7 +57,7 @@ int BPF_PROG(test_percpu_helper, struct cgroup *cgrp, const char *path)
 
 	cpu = bpf_get_smp_processor_id();
 	rstat = (struct cgroup_rstat_cpu *)bpf_per_cpu_ptr(
-			cgrp->self.rstat.rstat_cpu, cpu);
+			cgrp->bpf.rstat.rstat_cpu, cpu);
 	if (rstat) {
 		/* READ_ONCE */
 		*(volatile int *)rstat;
diff --git a/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
index 10c803c8dc70..24450dd4d3f3 100644
--- a/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
+++ b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
@@ -37,8 +37,8 @@ struct {
 	__type(value, struct attach_counter);
 } attach_counters SEC(".maps");
 
-extern void cgroup_rstat_updated(struct cgroup_subsys_state *css, int cpu) __ksym;
-extern void cgroup_rstat_flush(struct cgroup_subsys_state *css) __ksym;
+extern void bpf_cgroup_rstat_updated(struct cgroup *cgrp, int cpu) __ksym;
+extern void bpf_cgroup_rstat_flush(struct cgroup *cgrp) __ksym;
 
 static uint64_t cgroup_id(struct cgroup *cgrp)
 {
@@ -75,7 +75,7 @@ int BPF_PROG(counter, struct cgroup *dst_cgrp, struct task_struct *leader,
 	else if (create_percpu_attach_counter(cg_id, 1))
 		return 0;
 
-	cgroup_rstat_updated(&dst_cgrp->self, bpf_get_smp_processor_id());
+	bpf_cgroup_rstat_updated(dst_cgrp, bpf_get_smp_processor_id());
 	return 0;
 }
 
@@ -141,7 +141,7 @@ int BPF_PROG(dumper, struct bpf_iter_meta *meta, struct cgroup *cgrp)
 		return 1;
 
 	/* Flush the stats to make sure we get the most updated numbers */
-	cgroup_rstat_flush(&cgrp->self);
+	bpf_cgroup_rstat_flush(cgrp);
 
 	total_counter = bpf_map_lookup_elem(&attach_counters, &cg_id);
 	if (!total_counter) {
-- 
2.48.1

next prev parent reply	other threads:[~2025-02-18  3:15 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-18  3:14 [PATCH 00/11] cgroup: separate rstat trees JP Kobryn
2025-02-18  3:14 ` [PATCH 01/11] cgroup: move rstat pointers into struct of their own JP Kobryn
2025-02-19  1:05   ` Shakeel Butt
2025-02-19  1:23     ` Shakeel Butt
2025-02-20 16:53   ` Yosry Ahmed
2025-02-24 17:06     ` JP Kobryn
2025-02-24 18:36       ` Yosry Ahmed
2025-02-18  3:14 ` [PATCH 02/11] cgroup: add level of indirection for cgroup_rstat struct JP Kobryn
2025-02-19  2:26   ` Shakeel Butt
2025-02-20 17:08     ` Yosry Ahmed
2025-02-19  5:57   ` kernel test robot
2025-02-18  3:14 ` [PATCH 03/11] cgroup: move cgroup_rstat from cgroup to cgroup_subsys_state JP Kobryn
2025-02-20 17:06   ` Shakeel Butt
2025-02-20 17:22     ` Yosry Ahmed
2025-02-25 19:20       ` JP Kobryn
2025-02-18  3:14 ` [PATCH 04/11] cgroup: introduce cgroup_rstat_ops JP Kobryn
2025-02-19  7:21   ` kernel test robot
2025-02-20 17:50   ` Shakeel Butt
2025-02-18  3:14 ` JP Kobryn [this message]
2025-02-21 18:14   ` [PATCH 05/11] cgroup: separate rstat for bpf cgroups Shakeel Butt
2025-02-18  3:14 ` [PATCH 06/11] cgroup: rstat lock indirection JP Kobryn
2025-02-21 22:09   ` Shakeel Butt
2025-02-18  3:14 ` [PATCH 07/11] cgroup: fetch cpu-specific lock in rstat cpu lock helpers JP Kobryn
2025-02-21 22:35   ` Shakeel Butt
2025-02-18  3:14 ` [PATCH 08/11] cgroup: rstat cpu lock indirection JP Kobryn
2025-02-19  8:48   ` kernel test robot
2025-02-22  0:18   ` Shakeel Butt
2025-02-18  3:14 ` [PATCH 09/11] cgroup: separate rstat locks for bpf cgroups JP Kobryn
2025-02-18  3:14 ` [PATCH 10/11] cgroup: separate rstat locks for subsystems JP Kobryn
2025-02-22  0:23   ` Shakeel Butt
2025-02-18  3:14 ` [PATCH 11/11] cgroup: separate rstat list pointers from base stats JP Kobryn
2025-02-22  0:28   ` Shakeel Butt
2025-02-20 15:51 ` [PATCH 00/11] cgroup: separate rstat trees Tejun Heo
2025-02-27 23:44   ` JP Kobryn
2025-02-20 17:26 ` Yosry Ahmed
2025-02-20 17:53   ` Shakeel Butt
2025-02-20 17:59     ` Yosry Ahmed
2025-02-20 18:14       ` JP Kobryn
2025-02-20 20:04         ` Yosry Ahmed
2025-02-20 20:22           ` Yosry Ahmed
2025-02-24 21:13           ` Shakeel Butt
2025-02-24 21:54             ` Yosry Ahmed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250218031448.46951-6-inwardvessel@gmail.com \
    --to=inwardvessel@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=cgroups@vger.kernel.org \
    --cc=hannes@cmpxchg.org \
    --cc=kernel-team@meta.com \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=shakeel.butt@linux.dev \
    --cc=tj@kernel.org \
    --cc=yosryahmed@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox