* [PATCH bpf-next v3 16/17] bpf: selftests: add config for psi
@ 2026-01-27 2:46 Roman Gushchin
2026-01-27 2:46 ` [PATCH bpf-next v3 17/17] bpf: selftests: PSI struct ops test Roman Gushchin
0 siblings, 1 reply; 2+ messages in thread
From: Roman Gushchin @ 2026-01-27 2:46 UTC (permalink / raw)
To: bpf
Cc: Michal Hocko, Alexei Starovoitov, Matt Bobrowski, Shakeel Butt,
JP Kobryn, linux-kernel, linux-mm, Suren Baghdasaryan,
Johannes Weiner, Andrew Morton, Song Liu, Roman Gushchin
From: JP Kobryn <inwardvessel@gmail.com>
Include CONFIG_PSI to allow dependent tests to build.
Suggested-by: Song Liu <song@kernel.org>
Signed-off-by: JP Kobryn <inwardvessel@gmail.com>
Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
---
tools/testing/selftests/bpf/config | 1 +
1 file changed, 1 insertion(+)
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 558839e3c185..e7dcb1aed4df 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -111,6 +111,7 @@ CONFIG_IP6_NF_IPTABLES=y
CONFIG_IP6_NF_FILTER=y
CONFIG_NF_NAT=y
CONFIG_PACKET=y
+CONFIG_PSI=y
CONFIG_RC_CORE=y
CONFIG_SAMPLES=y
CONFIG_SAMPLE_LIVEPATCH=m
--
2.52.0
^ permalink raw reply [flat|nested] 2+ messages in thread
* [PATCH bpf-next v3 17/17] bpf: selftests: PSI struct ops test
2026-01-27 2:46 [PATCH bpf-next v3 16/17] bpf: selftests: add config for psi Roman Gushchin
@ 2026-01-27 2:46 ` Roman Gushchin
0 siblings, 0 replies; 2+ messages in thread
From: Roman Gushchin @ 2026-01-27 2:46 UTC (permalink / raw)
To: bpf
Cc: Michal Hocko, Alexei Starovoitov, Matt Bobrowski, Shakeel Butt,
JP Kobryn, linux-kernel, linux-mm, Suren Baghdasaryan,
Johannes Weiner, Andrew Morton, Roman Gushchin
Add a PSI struct ops test.
The test creates a cgroup with two child sub-cgroups, sets up
memory.high for one of those and puts there a memory hungry
process (initially frozen).
The memory hungry task is creating a high memory pressure in one
memory cgroup, which triggers a PSI event. The PSI BPF handler
declares a memcg oom in the corresponding cgroup.
Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
---
.../selftests/bpf/prog_tests/test_psi.c | 225 ++++++++++++++++++
tools/testing/selftests/bpf/progs/test_psi.c | 90 +++++++
2 files changed, 315 insertions(+)
create mode 100644 tools/testing/selftests/bpf/prog_tests/test_psi.c
create mode 100644 tools/testing/selftests/bpf/progs/test_psi.c
diff --git a/tools/testing/selftests/bpf/prog_tests/test_psi.c b/tools/testing/selftests/bpf/prog_tests/test_psi.c
new file mode 100644
index 000000000000..170c6f6a1a35
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_psi.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include <bpf/bpf.h>
+
+#include "cgroup_helpers.h"
+#include "test_psi.skel.h"
+
+enum psi_res {
+ PSI_IO,
+ PSI_MEM,
+ PSI_CPU,
+ PSI_IRQ,
+ NR_PSI_RESOURCES,
+};
+
+struct cgroup_desc {
+ const char *path;
+ unsigned long long id;
+ int pid;
+ int fd;
+ size_t target;
+ size_t high;
+ bool victim;
+};
+
+#define MB (1024 * 1024)
+
+static struct cgroup_desc cgroups[] = {
+ { .path = "/psi_test" },
+ { .path = "/psi_test/cg1" },
+ { .path = "/psi_test/cg2", .target = 500 * MB,
+ .high = 40 * MB, .victim = true },
+};
+
+static int spawn_task(struct cgroup_desc *desc)
+{
+ char *ptr;
+ int pid;
+
+ pid = fork();
+ if (pid < 0)
+ return pid;
+
+ if (pid > 0) {
+ /* parent */
+ desc->pid = pid;
+ return 0;
+ }
+
+ /* child */
+ ptr = (char *)malloc(desc->target);
+ if (!ptr)
+ _exit(ENOMEM);
+
+ memset(ptr, 'a', desc->target);
+
+ while (1)
+ sleep(1000);
+
+ return 0;
+}
+
+static void setup_environment(void)
+{
+ int i, err;
+
+ err = setup_cgroup_environment();
+ if (!ASSERT_OK(err, "setup_cgroup_environment"))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(cgroups); i++) {
+ cgroups[i].fd = create_and_get_cgroup(cgroups[i].path);
+ if (!ASSERT_GE(cgroups[i].fd, 0, "create_and_get_cgroup"))
+ goto cleanup;
+
+ cgroups[i].id = get_cgroup_id(cgroups[i].path);
+ if (!ASSERT_GT(cgroups[i].id, 0, "get_cgroup_id"))
+ goto cleanup;
+
+ /* Freeze the top-level cgroup and enable the memory controller */
+ if (i == 0) {
+ err = write_cgroup_file(cgroups[i].path, "cgroup.freeze", "1");
+ if (!ASSERT_OK(err, "freeze cgroup"))
+ goto cleanup;
+
+ err = write_cgroup_file(cgroups[i].path, "cgroup.subtree_control",
+ "+memory");
+ if (!ASSERT_OK(err, "enable memory controller"))
+ goto cleanup;
+ }
+
+ /* Set memory.high */
+ if (cgroups[i].high) {
+ char buf[256];
+
+ snprintf(buf, sizeof(buf), "%lu", cgroups[i].high);
+ err = write_cgroup_file(cgroups[i].path, "memory.high", buf);
+ if (!ASSERT_OK(err, "set memory.high"))
+ goto cleanup;
+
+ snprintf(buf, sizeof(buf), "0");
+ write_cgroup_file(cgroups[i].path, "memory.swap.max", buf);
+ }
+
+ /* Spawn tasks creating memory pressure */
+ if (cgroups[i].target) {
+ char buf[256];
+
+ err = spawn_task(&cgroups[i]);
+ if (!ASSERT_OK(err, "spawn task"))
+ goto cleanup;
+
+ snprintf(buf, sizeof(buf), "%d", cgroups[i].pid);
+ err = write_cgroup_file(cgroups[i].path, "cgroup.procs", buf);
+ if (!ASSERT_OK(err, "put child into a cgroup"))
+ goto cleanup;
+ }
+ }
+
+ return;
+
+cleanup:
+ cleanup_cgroup_environment();
+}
+
+static int run_and_wait_for_oom(void)
+{
+ int ret = -1;
+ bool first = true;
+ char buf[4096] = {};
+ ssize_t size;
+
+ /* Unfreeze the top-level cgroup */
+ ret = write_cgroup_file(cgroups[0].path, "cgroup.freeze", "0");
+ if (!ASSERT_OK(ret, "unfreeze cgroup"))
+ return -1;
+
+ for (;;) {
+ int i, status;
+ pid_t pid = wait(&status);
+
+ if (pid == -1) {
+ if (errno == EINTR)
+ continue;
+ /* ECHILD */
+ break;
+ }
+
+ if (!first)
+ continue;
+ first = false;
+
+ /* Check which process was terminated first */
+ for (i = 0; i < ARRAY_SIZE(cgroups); i++) {
+ if (!ASSERT_OK(cgroups[i].victim !=
+ (pid == cgroups[i].pid),
+ "correct process was killed")) {
+ ret = -1;
+ break;
+ }
+
+ if (!cgroups[i].victim)
+ continue;
+
+ /* Check the memcg oom counter */
+ size = read_cgroup_file(cgroups[i].path, "memory.events",
+ buf, sizeof(buf));
+ if (!ASSERT_OK(size <= 0, "read memory.events")) {
+ ret = -1;
+ break;
+ }
+
+ if (!ASSERT_OK(strstr(buf, "oom_kill 1") == NULL,
+ "oom_kill count check")) {
+ ret = -1;
+ break;
+ }
+ }
+
+ /* Kill all remaining tasks */
+ for (i = 0; i < ARRAY_SIZE(cgroups); i++)
+ if (cgroups[i].pid && cgroups[i].pid != pid)
+ kill(cgroups[i].pid, SIGKILL);
+ }
+
+ return ret;
+}
+
+void test_psi(void)
+{
+ struct test_psi *skel;
+ int cgroup_fd;
+ int err;
+
+ setup_environment();
+
+ skel = test_psi__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ goto cleanup;
+
+ skel->bss->high_pressure_cgroup_id = cgroups[2].id;
+ skel->bss->my_pid = getpid();
+
+ err = test_psi__attach(skel);
+ if (CHECK_FAIL(err))
+ goto cleanup;
+
+ /* Delete the first cgroup, it used to trigger offline handler */
+ remove_cgroup(cgroups[1].path);
+
+ /* Create new cgroup */
+ cgroup_fd = create_and_get_cgroup("/psi_test_new");
+ if (!ASSERT_GT(cgroup_fd, 0, "create_and_get_cgroup"))
+ goto cleanup;
+
+ /* Unfreeze all child tasks and create the memory pressure */
+ err = run_and_wait_for_oom();
+ CHECK_FAIL(err);
+
+ close(cgroup_fd);
+cleanup:
+ cleanup_cgroup_environment();
+ test_psi__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_psi.c b/tools/testing/selftests/bpf/progs/test_psi.c
new file mode 100644
index 000000000000..6efd5c995ce0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_psi.c
@@ -0,0 +1,90 @@
+#include "vmlinux.h"
+#include "bpf_experimental.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+/* cgroup which will experience the high memory pressure */
+u64 high_pressure_cgroup_id;
+u32 my_pid = 0;
+
+/* last total full memory pressure value */
+u64 last_mem_full_total = 0;
+
+extern struct task_struct *bpf_task_from_pid(s32 pid) __ksym;
+extern void bpf_task_release(struct task_struct *p) __ksym;
+
+struct elem {
+ struct bpf_task_work tw;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} tw_map SEC(".maps");
+
+static int psi_oom_work(struct bpf_map *map, void *key, void *value)
+{
+ struct cgroup *cgrp;
+ struct mem_cgroup *memcg;
+
+ cgrp = bpf_cgroup_from_id(high_pressure_cgroup_id);
+ if (!cgrp)
+ return 0;
+
+ memcg = bpf_get_mem_cgroup(&cgrp->self);
+ if (memcg) {
+ bpf_out_of_memory(memcg, 0, BPF_OOM_FLAGS_WAIT_ON_OOM_LOCK);
+ bpf_put_mem_cgroup(memcg);
+ }
+
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
+
+static void schedule_oom_work(void)
+{
+ struct task_struct *task;
+ struct elem *val;
+ int key = 0;
+
+ task = bpf_task_from_pid(my_pid);
+ if (task) {
+ val = bpf_map_lookup_elem(&tw_map, &key);
+ if (val)
+ bpf_task_work_schedule_signal(task, &val->tw,
+ &tw_map, psi_oom_work);
+ bpf_task_release(task);
+ }
+}
+
+SEC("tp_btf/psi_avgs_work")
+int BPF_PROG(psi_avgs, struct psi_group *group)
+{
+ u64 current_total;
+ u64 growth;
+
+ /* Monitor only a single target cgroup */
+ if (group->cgroup_id != high_pressure_cgroup_id)
+ return 0;
+
+ /* Check for memory pressure */
+ current_total = BPF_CORE_READ(group, total[PSI_MEM_FULL]);
+ if (last_mem_full_total == 0) {
+ last_mem_full_total = current_total;
+ return 0;
+ }
+
+ growth = current_total - last_mem_full_total;
+ last_mem_full_total = current_total;
+
+ /* Declare an OOM if growth > 50ms within the update period */
+ if (growth > 50000000)
+ schedule_oom_work();
+
+ return 0;
+}
--
2.52.0
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2026-01-27 2:46 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2026-01-27 2:46 [PATCH bpf-next v3 16/17] bpf: selftests: add config for psi Roman Gushchin
2026-01-27 2:46 ` [PATCH bpf-next v3 17/17] bpf: selftests: PSI struct ops test Roman Gushchin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox