[PATCH bpf-next v3 02/17] bpf: allow attaching struct_ops to cgroups

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Roman Gushchin <roman.gushchin@linux.dev>
To: bpf@vger.kernel.org
Cc: Michal Hocko <mhocko@suse.com>,
	Alexei Starovoitov <ast@kernel.org>,
	Matt Bobrowski <mattbobrowski@google.com>,
	Shakeel Butt <shakeel.butt@linux.dev>,
	JP Kobryn <inwardvessel@gmail.com>,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	Suren Baghdasaryan <surenb@google.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Roman Gushchin <roman.gushchin@linux.dev>
Subject: [PATCH bpf-next v3 02/17] bpf: allow attaching struct_ops to cgroups
Date: Mon, 26 Jan 2026 18:44:05 -0800	[thread overview]
Message-ID: <20260127024421.494929-3-roman.gushchin@linux.dev> (raw)
In-Reply-To: <20260127024421.494929-1-roman.gushchin@linux.dev>

Introduce an ability to attach bpf struct_ops'es to cgroups.

From user's standpoint it works in the following way:
a user passes a BPF_F_CGROUP_FD flag and specifies the target cgroup
fd while creating a struct_ops link. As the result, the bpf struct_ops
link will be created and attached to a cgroup.

The cgroup.bpf structure maintains a list of attached struct ops links.
If the cgroup is getting deleted, attached struct ops'es are getting
auto-detached and the userspace program gets a notification.

This change doesn't answer the question how bpf programs belonging
to these struct ops'es will be executed. It will be done individually
for every bpf struct ops which supports this.

Please, note that unlike "normal" bpf programs, struct ops'es
are not propagated to cgroup sub-trees.

Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
---
 include/linux/bpf-cgroup-defs.h |  3 ++
 include/linux/bpf-cgroup.h      | 16 +++++++++
 include/linux/bpf.h             |  3 ++
 include/uapi/linux/bpf.h        |  3 ++
 kernel/bpf/bpf_struct_ops.c     | 59 ++++++++++++++++++++++++++++++---
 kernel/bpf/cgroup.c             | 46 +++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h  |  1 +
 7 files changed, 127 insertions(+), 4 deletions(-)

diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h
index c9e6b26abab6..6c5e37190dad 100644
--- a/include/linux/bpf-cgroup-defs.h
+++ b/include/linux/bpf-cgroup-defs.h
@@ -71,6 +71,9 @@ struct cgroup_bpf {
 	/* temp storage for effective prog array used by prog_attach/detach */
 	struct bpf_prog_array *inactive;
 
+	/* list of bpf struct ops links */
+	struct list_head struct_ops_links;
+
 	/* reference counter used to detach bpf programs after cgroup removal */
 	struct percpu_ref refcnt;
 
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 2f535331f926..a6c327257006 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -423,6 +423,11 @@ int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
 int cgroup_bpf_prog_query(const union bpf_attr *attr,
 			  union bpf_attr __user *uattr);
 
+int cgroup_bpf_attach_struct_ops(struct cgroup *cgrp,
+				 struct bpf_struct_ops_link *link);
+void cgroup_bpf_detach_struct_ops(struct cgroup *cgrp,
+				  struct bpf_struct_ops_link *link);
+
 const struct bpf_func_proto *
 cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
 #else
@@ -451,6 +456,17 @@ static inline int cgroup_bpf_link_attach(const union bpf_attr *attr,
 	return -EINVAL;
 }
 
+static inline int cgroup_bpf_attach_struct_ops(struct cgroup *cgrp,
+					       struct bpf_struct_ops_link *link)
+{
+	return -EINVAL;
+}
+
+static inline void cgroup_bpf_detach_struct_ops(struct cgroup *cgrp,
+						struct bpf_struct_ops_link *link)
+{
+}
+
 static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
 					union bpf_attr __user *uattr)
 {
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 899dd911dc82..391888eb257c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1894,6 +1894,9 @@ struct bpf_raw_tp_link {
 struct bpf_struct_ops_link {
 	struct bpf_link link;
 	struct bpf_map __rcu *map;
+	struct cgroup *cgroup;
+	bool cgroup_removed;
+	struct list_head list;
 	wait_queue_head_t wait_hup;
 };
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 44e7dbc278e3..28544e8af1cd 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1237,6 +1237,7 @@ enum bpf_perf_event_type {
 #define BPF_F_AFTER		(1U << 4)
 #define BPF_F_ID		(1U << 5)
 #define BPF_F_PREORDER		(1U << 6)
+#define BPF_F_CGROUP_FD		(1U << 7)
 #define BPF_F_LINK		BPF_F_LINK /* 1 << 13 */
 
 /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
@@ -6775,6 +6776,8 @@ struct bpf_link_info {
 		} xdp;
 		struct {
 			__u32 map_id;
+			__u32 :32;
+			__u64 cgroup_id;
 		} struct_ops;
 		struct {
 			__u32 pf;
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index de01cf3025b3..2e361e22cfa0 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -13,6 +13,8 @@
 #include <linux/btf_ids.h>
 #include <linux/rcupdate_wait.h>
 #include <linux/poll.h>
+#include <linux/bpf-cgroup.h>
+#include <linux/cgroup.h>
 
 struct bpf_struct_ops_value {
 	struct bpf_struct_ops_common_value common;
@@ -1220,6 +1222,10 @@ static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link)
 		st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link);
 		bpf_map_put(&st_map->map);
 	}
+
+	if (st_link->cgroup)
+		cgroup_bpf_detach_struct_ops(st_link->cgroup, st_link);
+
 	kfree(st_link);
 }
 
@@ -1228,6 +1234,7 @@ static void bpf_struct_ops_map_link_show_fdinfo(const struct bpf_link *link,
 {
 	struct bpf_struct_ops_link *st_link;
 	struct bpf_map *map;
+	u64 cgrp_id = 0;
 
 	st_link = container_of(link, struct bpf_struct_ops_link, link);
 	rcu_read_lock();
@@ -1235,6 +1242,14 @@ static void bpf_struct_ops_map_link_show_fdinfo(const struct bpf_link *link,
 	if (map)
 		seq_printf(seq, "map_id:\t%d\n", map->id);
 	rcu_read_unlock();
+
+	cgroup_lock();
+	if (st_link->cgroup)
+		cgrp_id = cgroup_id(st_link->cgroup);
+	cgroup_unlock();
+
+	if (cgrp_id)
+		seq_printf(seq, "cgroup_id:\t%llu\n", cgrp_id);
 }
 
 static int bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link,
@@ -1242,6 +1257,7 @@ static int bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link,
 {
 	struct bpf_struct_ops_link *st_link;
 	struct bpf_map *map;
+	u64 cgrp_id = 0;
 
 	st_link = container_of(link, struct bpf_struct_ops_link, link);
 	rcu_read_lock();
@@ -1249,6 +1265,13 @@ static int bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link,
 	if (map)
 		info->struct_ops.map_id = map->id;
 	rcu_read_unlock();
+
+	cgroup_lock();
+	if (st_link->cgroup)
+		cgrp_id = cgroup_id(st_link->cgroup);
+	cgroup_unlock();
+
+	info->struct_ops.cgroup_id = cgrp_id;
 	return 0;
 }
 
@@ -1327,6 +1350,9 @@ static int bpf_struct_ops_map_link_detach(struct bpf_link *link)
 
 	mutex_unlock(&update_mutex);
 
+	if (st_link->cgroup)
+		cgroup_bpf_detach_struct_ops(st_link->cgroup, st_link);
+
 	wake_up_interruptible_poll(&st_link->wait_hup, EPOLLHUP);
 
 	return 0;
@@ -1339,6 +1365,9 @@ static __poll_t bpf_struct_ops_map_link_poll(struct file *file,
 
 	poll_wait(file, &st_link->wait_hup, pts);
 
+	if (st_link->cgroup_removed)
+		return EPOLLHUP;
+
 	return rcu_access_pointer(st_link->map) ? 0 : EPOLLHUP;
 }
 
@@ -1357,8 +1386,12 @@ int bpf_struct_ops_link_create(union bpf_attr *attr)
 	struct bpf_link_primer link_primer;
 	struct bpf_struct_ops_map *st_map;
 	struct bpf_map *map;
+	struct cgroup *cgrp;
 	int err;
 
+	if (attr->link_create.flags & ~BPF_F_CGROUP_FD)
+		return -EINVAL;
+
 	map = bpf_map_get(attr->link_create.map_fd);
 	if (IS_ERR(map))
 		return PTR_ERR(map);
@@ -1378,11 +1411,26 @@ int bpf_struct_ops_link_create(union bpf_attr *attr)
 	bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, &bpf_struct_ops_map_lops, NULL,
 		      attr->link_create.attach_type);
 
+	init_waitqueue_head(&link->wait_hup);
+
+	if (attr->link_create.flags & BPF_F_CGROUP_FD) {
+		cgrp = cgroup_get_from_fd(attr->link_create.target_fd);
+		if (IS_ERR(cgrp)) {
+			err = PTR_ERR(cgrp);
+			goto err_out;
+		}
+		link->cgroup = cgrp;
+		err = cgroup_bpf_attach_struct_ops(cgrp, link);
+		if (err) {
+			cgroup_put(cgrp);
+			link->cgroup = NULL;
+			goto err_out;
+		}
+	}
+
 	err = bpf_link_prime(&link->link, &link_primer);
 	if (err)
-		goto err_out;
-
-	init_waitqueue_head(&link->wait_hup);
+		goto err_put_cgroup;
 
 	/* Hold the update_mutex such that the subsystem cannot
 	 * do link->ops->detach() before the link is fully initialized.
@@ -1393,13 +1441,16 @@ int bpf_struct_ops_link_create(union bpf_attr *attr)
 		mutex_unlock(&update_mutex);
 		bpf_link_cleanup(&link_primer);
 		link = NULL;
-		goto err_out;
+		goto err_put_cgroup;
 	}
 	RCU_INIT_POINTER(link->map, map);
 	mutex_unlock(&update_mutex);
 
 	return bpf_link_settle(&link_primer);
 
+err_put_cgroup:
+	if (link && link->cgroup)
+		cgroup_bpf_detach_struct_ops(link->cgroup, link);
 err_out:
 	bpf_map_put(map);
 	kfree(link);
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 69988af44b37..7b1903be6f69 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -16,6 +16,7 @@
 #include <linux/bpf-cgroup.h>
 #include <linux/bpf_lsm.h>
 #include <linux/bpf_verifier.h>
+#include <linux/poll.h>
 #include <net/sock.h>
 #include <net/bpf_sk_storage.h>
 
@@ -307,12 +308,23 @@ static void cgroup_bpf_release(struct work_struct *work)
 					       bpf.release_work);
 	struct bpf_prog_array *old_array;
 	struct list_head *storages = &cgrp->bpf.storages;
+	struct bpf_struct_ops_link *st_link, *st_tmp;
 	struct bpf_cgroup_storage *storage, *stmp;
+	LIST_HEAD(st_links);
 
 	unsigned int atype;
 
 	cgroup_lock();
 
+	list_splice_init(&cgrp->bpf.struct_ops_links, &st_links);
+	list_for_each_entry_safe(st_link, st_tmp, &st_links, list) {
+		st_link->cgroup = NULL;
+		st_link->cgroup_removed = true;
+		cgroup_put(cgrp);
+		if (IS_ERR(bpf_link_inc_not_zero(&st_link->link)))
+			list_del(&st_link->list);
+	}
+
 	for (atype = 0; atype < ARRAY_SIZE(cgrp->bpf.progs); atype++) {
 		struct hlist_head *progs = &cgrp->bpf.progs[atype];
 		struct bpf_prog_list *pl;
@@ -346,6 +358,11 @@ static void cgroup_bpf_release(struct work_struct *work)
 
 	cgroup_unlock();
 
+	list_for_each_entry_safe(st_link, st_tmp, &st_links, list) {
+		st_link->link.ops->detach(&st_link->link);
+		bpf_link_put(&st_link->link);
+	}
+
 	for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
 		cgroup_bpf_put(p);
 
@@ -525,6 +542,7 @@ static int cgroup_bpf_inherit(struct cgroup *cgrp)
 		INIT_HLIST_HEAD(&cgrp->bpf.progs[i]);
 
 	INIT_LIST_HEAD(&cgrp->bpf.storages);
+	INIT_LIST_HEAD(&cgrp->bpf.struct_ops_links);
 
 	for (i = 0; i < NR; i++)
 		if (compute_effective_progs(cgrp, i, &arrays[i]))
@@ -2759,3 +2777,31 @@ cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return NULL;
 	}
 }
+
+int cgroup_bpf_attach_struct_ops(struct cgroup *cgrp,
+				 struct bpf_struct_ops_link *link)
+{
+	int ret = 0;
+
+	cgroup_lock();
+	if (percpu_ref_is_zero(&cgrp->bpf.refcnt)) {
+		ret = -EBUSY;
+		goto out;
+	}
+	list_add_tail(&link->list, &cgrp->bpf.struct_ops_links);
+out:
+	cgroup_unlock();
+	return ret;
+}
+
+void cgroup_bpf_detach_struct_ops(struct cgroup *cgrp,
+				  struct bpf_struct_ops_link *link)
+{
+	cgroup_lock();
+	if (link->cgroup == cgrp) {
+		list_del(&link->list);
+		link->cgroup = NULL;
+		cgroup_put(cgrp);
+	}
+	cgroup_unlock();
+}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 3ca7d76e05f0..d5492e60744a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1237,6 +1237,7 @@ enum bpf_perf_event_type {
 #define BPF_F_AFTER		(1U << 4)
 #define BPF_F_ID		(1U << 5)
 #define BPF_F_PREORDER		(1U << 6)
+#define BPF_F_CGROUP_FD		(1U << 7)
 #define BPF_F_LINK		BPF_F_LINK /* 1 << 13 */
 
 /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
-- 
2.52.0

next prev parent reply	other threads:[~2026-01-27  2:44 UTC|newest]

Thread overview: 63+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-27  2:44 [PATCH bpf-next v3 00/17] mm: BPF OOM Roman Gushchin
2026-01-27  2:44 ` [PATCH bpf-next v3 01/17] bpf: move bpf_struct_ops_link into bpf.h Roman Gushchin
2026-01-27  5:50   ` Yafang Shao
2026-01-28 11:28   ` Matt Bobrowski
2026-01-27  2:44 ` Roman Gushchin [this message]
2026-01-27  3:08   ` [PATCH bpf-next v3 02/17] bpf: allow attaching struct_ops to cgroups bot+bpf-ci
2026-01-27  5:49   ` Yafang Shao
2026-01-28  3:10   ` Josh Don
2026-01-28 18:52     ` Roman Gushchin
2026-01-28 11:25   ` Matt Bobrowski
2026-01-28 19:18     ` Roman Gushchin
2026-01-27  2:44 ` [PATCH bpf-next v3 03/17] libbpf: fix return value on memory allocation failure Roman Gushchin
2026-01-27  5:52   ` Yafang Shao
2026-01-27  2:44 ` [PATCH bpf-next v3 04/17] libbpf: introduce bpf_map__attach_struct_ops_opts() Roman Gushchin
2026-01-27  3:08   ` bot+bpf-ci
2026-01-27  2:44 ` [PATCH bpf-next v3 05/17] bpf: mark struct oom_control's memcg field as TRUSTED_OR_NULL Roman Gushchin
2026-01-27  6:06   ` Yafang Shao
2026-02-02  4:56   ` Matt Bobrowski
2026-01-27  2:44 ` [PATCH bpf-next v3 06/17] mm: define mem_cgroup_get_from_ino() outside of CONFIG_SHRINKER_DEBUG Roman Gushchin
2026-01-27  6:12   ` Yafang Shao
2026-02-02  3:50   ` Shakeel Butt
2026-01-27  2:44 ` [PATCH bpf-next v3 07/17] mm: introduce BPF OOM struct ops Roman Gushchin
2026-01-27  9:38   ` Michal Hocko
2026-01-27 21:12     ` Roman Gushchin
2026-01-28  8:00       ` Michal Hocko
2026-01-28 18:44         ` Roman Gushchin
2026-02-02  4:06       ` Matt Bobrowski
2026-01-28  3:26   ` Josh Don
2026-01-28 19:03     ` Roman Gushchin
2026-01-28 11:19   ` Michal Hocko
2026-01-28 18:53     ` Roman Gushchin
2026-01-29 21:00   ` Martin KaFai Lau
2026-01-30 23:29     ` Roman Gushchin
2026-02-02 20:27       ` Martin KaFai Lau
2026-01-27  2:44 ` [PATCH bpf-next v3 08/17] mm: introduce bpf_oom_kill_process() bpf kfunc Roman Gushchin
2026-01-27 20:21   ` Martin KaFai Lau
2026-01-27 20:47     ` Roman Gushchin
2026-02-02  4:49   ` Matt Bobrowski
2026-01-27  2:44 ` [PATCH bpf-next v3 09/17] mm: introduce bpf_out_of_memory() BPF kfunc Roman Gushchin
2026-01-28 20:21   ` Matt Bobrowski
2026-01-27  2:44 ` [PATCH bpf-next v3 10/17] mm: introduce bpf_task_is_oom_victim() kfunc Roman Gushchin
2026-02-02  5:39   ` Matt Bobrowski
2026-02-02 17:30     ` Alexei Starovoitov
2026-02-03  0:14       ` Roman Gushchin
2026-02-03 13:23         ` Michal Hocko
2026-02-03 16:31           ` Alexei Starovoitov
2026-02-04  9:02             ` Michal Hocko
2026-02-05  0:12               ` Alexei Starovoitov
2026-01-27  2:44 ` [PATCH bpf-next v3 11/17] bpf: selftests: introduce read_cgroup_file() helper Roman Gushchin
2026-01-27  3:08   ` bot+bpf-ci
2026-01-27  2:44 ` [PATCH bpf-next v3 12/17] bpf: selftests: BPF OOM struct ops test Roman Gushchin
2026-01-27  2:44 ` [PATCH bpf-next v3 13/17] sched: psi: add a trace point to psi_avgs_work() Roman Gushchin
2026-01-27  2:44 ` [PATCH bpf-next v3 14/17] sched: psi: add cgroup_id field to psi_group structure Roman Gushchin
2026-01-27  2:44 ` [PATCH bpf-next v3 15/17] bpf: allow calling bpf_out_of_memory() from a PSI tracepoint Roman Gushchin
2026-01-27  9:02 ` [PATCH bpf-next v3 00/17] mm: BPF OOM Michal Hocko
2026-01-27 21:01   ` Roman Gushchin
2026-01-28  8:06     ` Michal Hocko
2026-01-28 16:59       ` Alexei Starovoitov
2026-01-28 18:23         ` Roman Gushchin
2026-01-28 18:53           ` Alexei Starovoitov
2026-02-02  3:26         ` Matt Bobrowski
2026-02-02 17:50           ` Alexei Starovoitov
2026-02-04 23:52             ` Matt Bobrowski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260127024421.494929-3-roman.gushchin@linux.dev \
    --to=roman.gushchin@linux.dev \
    --cc=akpm@linux-foundation.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=hannes@cmpxchg.org \
    --cc=inwardvessel@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mattbobrowski@google.com \
    --cc=mhocko@suse.com \
    --cc=shakeel.butt@linux.dev \
    --cc=surenb@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox