From: Hui Zhu <hui.zhu@linux.dev>
To: Andrew Morton <akpm@linux-foundation.org>,
Johannes Weiner <hannes@cmpxchg.org>,
Michal Hocko <mhocko@kernel.org>,
Roman Gushchin <roman.gushchin@linux.dev>,
Shakeel Butt <shakeel.butt@linux.dev>,
Muchun Song <muchun.song@linux.dev>,
Alexei Starovoitov <ast@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
Andrii Nakryiko <andrii@kernel.org>,
Martin KaFai Lau <martin.lau@linux.dev>,
Eduard Zingerman <eddyz87@gmail.com>, Song Liu <song@kernel.org>,
Yonghong Song <yonghong.song@linux.dev>,
John Fastabend <john.fastabend@gmail.com>,
KP Singh <kpsingh@kernel.org>,
Stanislav Fomichev <sdf@fomichev.me>, Hao Luo <haoluo@google.com>,
Jiri Olsa <jolsa@kernel.org>, Shuah Khan <shuah@kernel.org>,
Peter Zijlstra <peterz@infradead.org>,
Miguel Ojeda <ojeda@kernel.org>,
Nathan Chancellor <nathan@kernel.org>,
Kees Cook <kees@kernel.org>, Tejun Heo <tj@kernel.org>,
Jeff Xu <jeffxu@chromium.org>,
mkoutny@suse.com, Jan Hendrik Farr <kernel@jfarr.cc>,
Christian Brauner <brauner@kernel.org>,
Randy Dunlap <rdunlap@infradead.org>,
Brian Gerst <brgerst@gmail.com>,
Masahiro Yamada <masahiroy@kernel.org>,
davem@davemloft.net, Jakub Kicinski <kuba@kernel.org>,
Jesper Dangaard Brouer <hawk@kernel.org>,
JP Kobryn <inwardvessel@gmail.com>,
Willem de Bruijn <willemb@google.com>,
Jason Xing <kerneljasonxing@gmail.com>,
Paul Chaignon <paul.chaignon@gmail.com>,
Anton Protopopov <a.s.protopopov@gmail.com>,
Amery Hung <ameryhung@gmail.com>,
Chen Ridong <chenridong@huaweicloud.com>,
Lance Yang <lance.yang@linux.dev>,
Jiayuan Chen <jiayuan.chen@linux.dev>,
linux-kernel@vger.kernel.org, linux-mm@kvack.org,
cgroups@vger.kernel.org, bpf@vger.kernel.org,
netdev@vger.kernel.org, linux-kselftest@vger.kernel.org
Cc: Hui Zhu <zhuhui@kylinos.cn>, Geliang Tang <geliang@kernel.org>
Subject: [RFC PATCH bpf-next v6 10/12] mm/bpf: Add BPF_F_ALLOW_OVERRIDE support for memcg_bpf_ops
Date: Wed, 4 Feb 2026 17:00:06 +0800 [thread overview]
Message-ID: <274f34842dcc1c73c43f05cd1bcac6763107cbe6.1770194182.git.zhuhui@kylinos.cn> (raw)
In-Reply-To: <cover.1770194182.git.zhuhui@kylinos.cn>
From: Hui Zhu <zhuhui@kylinos.cn>
To allow for more flexible attachment policies in nested cgroup
hierarchies, this patch introduces support for the
`BPF_F_ALLOW_OVERRIDE` flag for `memcg_bpf_ops`.
When a `memcg_bpf_ops` is attached to a cgroup with this flag, it
permits child cgroups to attach their own, different `memcg_bpf_ops`,
overriding the parent's inherited program. Without this flag,
attaching a BPF program to a cgroup that already has one (either
directly or via inheritance) will fail.
The implementation involves:
- Adding a `bpf_ops_flags` field to `struct mem_cgroup`.
- During registration (`bpf_memcg_ops_reg`), checking for existing
programs and the `BPF_F_ALLOW_OVERRIDE` flag.
- During unregistration (`bpf_memcg_ops_unreg`), correctly restoring
the parent's BPF program to the cgroup hierarchy.
- Ensuring flags are inherited by child cgroups during online events.
This change enables complex, multi-level policy enforcement where
different subtrees of the cgroup hierarchy can have distinct memory
management BPF programs.
Signed-off-by: Geliang Tang <geliang@kernel.org>
Signed-off-by: Hui Zhu <zhuhui@kylinos.cn>
---
include/linux/memcontrol.h | 1 +
mm/bpf_memcontrol.c | 96 ++++++++++++++++++++++++++------------
2 files changed, 66 insertions(+), 31 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d91dbb95069b..c7b32a01a854 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -355,6 +355,7 @@ struct mem_cgroup {
#ifdef CONFIG_BPF_SYSCALL
struct memcg_bpf_ops *bpf_ops;
+ u32 bpf_ops_flags;
#endif
struct mem_cgroup_per_node *nodeinfo[];
diff --git a/mm/bpf_memcontrol.c b/mm/bpf_memcontrol.c
index 72b720400628..909751263f98 100644
--- a/mm/bpf_memcontrol.c
+++ b/mm/bpf_memcontrol.c
@@ -204,10 +204,11 @@ void memcontrol_bpf_online(struct mem_cgroup *memcg)
/*
* Because only functions bpf_memcg_ops_reg and bpf_memcg_ops_unreg
- * write to memcg->bpf_ops under the protection of cgroup_mutex,
- * ensuring that cgroup_mutex is already locked here allows safe
- * reading and writing of memcg->bpf_ops without needing to acquire
- * a lock on memcg_bpf_srcu.
+ * write to memcg->bpf_ops and memcg->bpf_ops_flags under the
+ * protection of cgroup_mutex, ensuring that cgroup_mutex is already
+ * locked here allows safe reading and writing of memcg->bpf_ops and
+ * memcg->bpf_ops_flags without needing to acquire a lock on
+ * memcg_bpf_srcu.
*/
lockdep_assert_held(&cgroup_mutex);
@@ -218,6 +219,7 @@ void memcontrol_bpf_online(struct mem_cgroup *memcg)
if (!ops)
return;
WRITE_ONCE(memcg->bpf_ops, ops);
+ memcg->bpf_ops_flags = parent_memcg->bpf_ops_flags;
/*
* If the BPF program implements it, call the online handler to
@@ -239,7 +241,7 @@ void memcontrol_bpf_offline(struct mem_cgroup *memcg)
{
struct memcg_bpf_ops *ops;
- /* Same with function memcontrol_bpf_online. */
+ /* Same locking rules as memcontrol_bpf_online(). */
lockdep_assert_held(&cgroup_mutex);
ops = READ_ONCE(memcg->bpf_ops);
@@ -335,48 +337,62 @@ static int bpf_memcg_ops_init_member(const struct btf_type *t,
return 0;
}
-/**
- * clean_memcg_bpf_ops - Clear BPF ops from a memory cgroup hierarchy
- * @memcg: Root memory cgroup to start from
- * @ops: The specific BPF ops to remove
- *
- * Walks the cgroup hierarchy and clears bpf_ops for any cgroup that
- * matches @ops.
- */
-static void clean_memcg_bpf_ops(struct mem_cgroup *memcg,
- struct memcg_bpf_ops *ops)
-{
- struct mem_cgroup *iter = NULL;
-
- while ((iter = mem_cgroup_iter(memcg, iter, NULL))) {
- if (READ_ONCE(iter->bpf_ops) == ops)
- WRITE_ONCE(iter->bpf_ops, NULL);
- }
-}
-
static int bpf_memcg_ops_reg(void *kdata, struct bpf_link *link)
{
struct bpf_struct_ops_link *ops_link
= container_of(link, struct bpf_struct_ops_link, link);
- struct memcg_bpf_ops *ops = kdata;
- struct mem_cgroup *memcg, *iter = NULL;
+ struct memcg_bpf_ops *ops = kdata, *old_ops;
+ struct mem_cgroup *memcg, *iter;
int err = 0;
+ if (ops_link->flags & ~BPF_F_ALLOW_OVERRIDE) {
+ pr_err("only BPF_F_ALLOW_OVERRIDE supported for struct_ops\n");
+ return -EOPNOTSUPP;
+ }
+
memcg = mem_cgroup_get_from_ino(ops_link->cgroup_id);
if (IS_ERR(memcg))
return PTR_ERR(memcg);
cgroup_lock();
+
+ /*
+ * Check if memcg has bpf_ops and whether it is inherited from
+ * parent.
+ * If inherited and BPF_F_ALLOW_OVERRIDE is set, allow override.
+ */
+ old_ops = READ_ONCE(memcg->bpf_ops);
+ if (old_ops) {
+ struct mem_cgroup *parent_memcg = parent_mem_cgroup(memcg);
+
+ if (!parent_memcg ||
+ !(memcg->bpf_ops_flags & BPF_F_ALLOW_OVERRIDE) ||
+ READ_ONCE(parent_memcg->bpf_ops) != old_ops) {
+ err = -EBUSY;
+ goto unlock_out;
+ }
+ }
+
+ /* Check for incompatible bpf_ops in descendants. */
+ iter = NULL;
while ((iter = mem_cgroup_iter(memcg, iter, NULL))) {
- if (READ_ONCE(iter->bpf_ops)) {
+ struct memcg_bpf_ops *iter_ops = READ_ONCE(iter->bpf_ops);
+
+ if (iter_ops && iter_ops != old_ops) {
+ /* cannot override existing bpf_ops of sub-cgroup. */
mem_cgroup_iter_break(memcg, iter);
err = -EBUSY;
- break;
+ goto unlock_out;
}
+ }
+
+ iter = NULL;
+ while ((iter = mem_cgroup_iter(memcg, iter, NULL))) {
WRITE_ONCE(iter->bpf_ops, ops);
+ iter->bpf_ops_flags = ops_link->flags;
}
- if (err)
- clean_memcg_bpf_ops(memcg, ops);
+
+unlock_out:
cgroup_unlock();
mem_cgroup_put(memcg);
@@ -390,13 +406,31 @@ static void bpf_memcg_ops_unreg(void *kdata, struct bpf_link *link)
= container_of(link, struct bpf_struct_ops_link, link);
struct memcg_bpf_ops *ops = kdata;
struct mem_cgroup *memcg;
+ struct mem_cgroup *iter;
+ struct memcg_bpf_ops *parent_bpf_ops = NULL;
+ u32 parent_bpf_ops_flags = 0;
memcg = mem_cgroup_get_from_ino(ops_link->cgroup_id);
if (IS_ERR_OR_NULL(memcg))
goto out;
cgroup_lock();
- clean_memcg_bpf_ops(memcg, ops);
+
+ /* Get the parent bpf_ops and bpf_ops_flags */
+ iter = parent_mem_cgroup(memcg);
+ if (iter) {
+ parent_bpf_ops = READ_ONCE(iter->bpf_ops);
+ parent_bpf_ops_flags = iter->bpf_ops_flags;
+ }
+
+ iter = NULL;
+ while ((iter = mem_cgroup_iter(memcg, iter, NULL))) {
+ if (READ_ONCE(iter->bpf_ops) == ops) {
+ WRITE_ONCE(iter->bpf_ops, parent_bpf_ops);
+ iter->bpf_ops_flags = parent_bpf_ops_flags;
+ }
+ }
+
cgroup_unlock();
mem_cgroup_put(memcg);
--
2.43.0
next prev parent reply other threads:[~2026-02-04 9:01 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-04 8:56 [RFC PATCH bpf-next v6 00/12] mm: memcontrol: Add BPF hooks for memory controller Hui Zhu
2026-02-04 8:56 ` [RFC PATCH bpf-next v6 01/12] bpf: move bpf_struct_ops_link into bpf.h Hui Zhu
2026-02-04 8:56 ` [RFC PATCH bpf-next v6 02/12] bpf: initial support for attaching struct ops to cgroups Hui Zhu
2026-02-04 8:56 ` [RFC PATCH bpf-next v6 03/12] bpf: mark struct oom_control's memcg field as TRUSTED_OR_NULL Hui Zhu
2026-02-04 8:56 ` [RFC PATCH bpf-next v6 04/12] mm: define mem_cgroup_get_from_ino() outside of CONFIG_SHRINKER_DEBUG Hui Zhu
2026-02-04 8:56 ` [RFC PATCH bpf-next v6 05/12] libbpf: introduce bpf_map__attach_struct_ops_opts() Hui Zhu
2026-02-04 9:28 ` bot+bpf-ci
2026-02-04 8:56 ` [RFC PATCH bpf-next v6 06/12] bpf: Pass flags in bpf_link_create for struct_ops Hui Zhu
2026-02-04 9:28 ` bot+bpf-ci
2026-02-04 9:00 ` [RFC PATCH bpf-next v6 07/12] libbpf: Support passing user-defined flags " Hui Zhu
2026-02-04 9:28 ` bot+bpf-ci
2026-02-04 9:00 ` [RFC PATCH bpf-next v6 08/12] mm: memcontrol: Add BPF struct_ops for memory controller Hui Zhu
2026-02-04 9:00 ` [RFC PATCH bpf-next v6 09/12] selftests/bpf: Add tests for memcg_bpf_ops Hui Zhu
2026-02-04 9:00 ` Hui Zhu [this message]
2026-02-04 9:00 ` [RFC PATCH bpf-next v6 11/12] selftests/bpf: Add test for memcg_bpf_ops hierarchies Hui Zhu
2026-02-04 9:28 ` bot+bpf-ci
2026-02-04 9:00 ` [RFC PATCH bpf-next v6 12/12] samples/bpf: Add memcg priority control example Hui Zhu
2026-02-04 9:28 ` bot+bpf-ci
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=274f34842dcc1c73c43f05cd1bcac6763107cbe6.1770194182.git.zhuhui@kylinos.cn \
--to=hui.zhu@linux.dev \
--cc=a.s.protopopov@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=ameryhung@gmail.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=brauner@kernel.org \
--cc=brgerst@gmail.com \
--cc=cgroups@vger.kernel.org \
--cc=chenridong@huaweicloud.com \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=eddyz87@gmail.com \
--cc=geliang@kernel.org \
--cc=hannes@cmpxchg.org \
--cc=haoluo@google.com \
--cc=hawk@kernel.org \
--cc=inwardvessel@gmail.com \
--cc=jeffxu@chromium.org \
--cc=jiayuan.chen@linux.dev \
--cc=john.fastabend@gmail.com \
--cc=jolsa@kernel.org \
--cc=kees@kernel.org \
--cc=kernel@jfarr.cc \
--cc=kerneljasonxing@gmail.com \
--cc=kpsingh@kernel.org \
--cc=kuba@kernel.org \
--cc=lance.yang@linux.dev \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=martin.lau@linux.dev \
--cc=masahiroy@kernel.org \
--cc=mhocko@kernel.org \
--cc=mkoutny@suse.com \
--cc=muchun.song@linux.dev \
--cc=nathan@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=ojeda@kernel.org \
--cc=paul.chaignon@gmail.com \
--cc=peterz@infradead.org \
--cc=rdunlap@infradead.org \
--cc=roman.gushchin@linux.dev \
--cc=sdf@fomichev.me \
--cc=shakeel.butt@linux.dev \
--cc=shuah@kernel.org \
--cc=song@kernel.org \
--cc=tj@kernel.org \
--cc=willemb@google.com \
--cc=yonghong.song@linux.dev \
--cc=zhuhui@kylinos.cn \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox