From: Yonghong Song <yonghong.song@linux.dev>
To: JP Kobryn <inwardvessel@gmail.com>,
shakeel.butt@linux.dev, andrii@kernel.org, ast@kernel.org,
mkoutny@suse.com, yosryahmed@google.com, hannes@cmpxchg.org,
tj@kernel.org, akpm@linux-foundation.org
Cc: linux-kernel@vger.kernel.org, cgroups@vger.kernel.org,
linux-mm@kvack.org, bpf@vger.kernel.org, kernel-team@meta.com
Subject: Re: [PATCH v2 2/2] memcg: selftests for memcg stat kfuncs
Date: Wed, 15 Oct 2025 22:04:36 -0700 [thread overview]
Message-ID: <f1558b5d-41be-4f56-8428-d5ae63d696ea@linux.dev> (raw)
In-Reply-To: <20251015190813.80163-3-inwardvessel@gmail.com>
On 10/15/25 12:08 PM, JP Kobryn wrote:
> Add test coverage for the kfuncs that fetch memcg stats. Using some common
> stats, test before and after scenarios ensuring that the given stat
> increases by some arbitrary amount. The stats selected cover the three
> categories represented by the enums: node_stat_item, memcg_stat_item,
> vm_event_item.
>
> Since only a subset of all stats are queried, use a static struct made up
> of fields for each stat. Write to the struct with the fetched values when
> the bpf program is invoked and read the fields in the user mode program for
> verification.
>
> Signed-off-by: JP Kobryn <inwardvessel@gmail.com>
> ---
> .../testing/selftests/bpf/cgroup_iter_memcg.h | 18 ++
> .../bpf/prog_tests/cgroup_iter_memcg.c | 295 ++++++++++++++++++
> .../selftests/bpf/progs/cgroup_iter_memcg.c | 61 ++++
> 3 files changed, 374 insertions(+)
> create mode 100644 tools/testing/selftests/bpf/cgroup_iter_memcg.h
> create mode 100644 tools/testing/selftests/bpf/prog_tests/cgroup_iter_memcg.c
> create mode 100644 tools/testing/selftests/bpf/progs/cgroup_iter_memcg.c
>
> diff --git a/tools/testing/selftests/bpf/cgroup_iter_memcg.h b/tools/testing/selftests/bpf/cgroup_iter_memcg.h
> new file mode 100644
> index 000000000000..5f4c6502d9f1
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/cgroup_iter_memcg.h
> @@ -0,0 +1,18 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
> +#ifndef __CGROUP_ITER_MEMCG_H
> +#define __CGROUP_ITER_MEMCG_H
> +
> +struct memcg_query {
> + /* some node_stat_item's */
> + long nr_anon_mapped;
> + long nr_shmem;
> + long nr_file_pages;
> + long nr_file_mapped;
> + /* some memcg_stat_item */
> + long memcg_kmem;
> + /* some vm_event_item */
> + long pgfault;
> +};
> +
> +#endif /* __CGROUP_ITER_MEMCG_H */
> diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_iter_memcg.c b/tools/testing/selftests/bpf/prog_tests/cgroup_iter_memcg.c
> new file mode 100644
> index 000000000000..264dc3c9ec30
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_iter_memcg.c
> @@ -0,0 +1,295 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
> +#include <test_progs.h>
> +#include <bpf/libbpf.h>
> +#include <bpf/btf.h>
> +#include <fcntl.h>
> +#include <sys/mman.h>
> +#include <unistd.h>
> +#include "cgroup_helpers.h"
> +#include "cgroup_iter_memcg.h"
> +#include "cgroup_iter_memcg.skel.h"
> +
> +int read_stats(struct bpf_link *link)
static int read_stats(...)
> +{
> + int fd, ret = 0;
> + ssize_t bytes;
> +
> + fd = bpf_iter_create(bpf_link__fd(link));
> + if (!ASSERT_OK_FD(fd, "bpf_iter_create"))
> + return 1;
> +
> + /*
> + * Invoke iter program by reading from its fd. We're not expecting any
> + * data to be written by the bpf program so the result should be zero.
> + * Results will be read directly through the custom data section
> + * accessible through skel->data_query.memcg_query.
> + */
> + bytes = read(fd, NULL, 0);
> + if (!ASSERT_EQ(bytes, 0, "read fd"))
> + ret = 1;
> +
> + close(fd);
> + return ret;
> +}
> +
> +static void test_anon(struct bpf_link *link,
> + struct memcg_query *memcg_query)
Alignment between arguments? Actually two arguments can be in the same line.
> +{
> + void *map;
> + size_t len;
> + long val;
> +
> + len = sysconf(_SC_PAGESIZE) * 1024;
> +
> + if (!ASSERT_OK(read_stats(link), "read stats"))
> + return;
> +
> + val = memcg_query->nr_anon_mapped;
> + if (!ASSERT_GE(val, 0, "initial anon mapped val"))
> + return;
> +
> + /*
> + * Increase memcg anon usage by mapping and writing
> + * to a new anon region.
> + */
> + map = mmap(NULL, len, PROT_READ | PROT_WRITE,
> + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
All arguments can be in the same line.
> + if (!ASSERT_NEQ(map, MAP_FAILED, "mmap anon"))
> + return;
> +
> + memset(map, 1, len);
> +
> + if (!ASSERT_OK(read_stats(link), "read stats"))
> + goto cleanup;
> +
> + ASSERT_GT(memcg_query->nr_anon_mapped, val, "final anon mapped val");
> +
> +cleanup:
> + munmap(map, len);
> +}
> +
> +static void test_file(struct bpf_link *link,
> + struct memcg_query *memcg_query)
Arguments can be in the same line. Some other examples below.
> +{
> + void *map;
> + size_t len;
> + long val_pages, val_mapped;
> + FILE *f;
> + int fd;
> +
> + len = sysconf(_SC_PAGESIZE) * 1024;
> +
> + if (!ASSERT_OK(read_stats(link), "read stats"))
> + return;
> +
> + val_pages = memcg_query->nr_file_pages;
> + if (!ASSERT_GE(val_pages, 0, "initial file val"))
> + return;
> + val_mapped = memcg_query->nr_file_mapped;
> + if (!ASSERT_GE(val_mapped, 0, "initial file mapped val"))
> + return;
> +
> + /*
> + * Increase memcg file usage by creating and writing
> + * to a temoprary mapped file.
> + */
> + f = tmpfile();
> + if (!ASSERT_OK_PTR(f, "tmpfile"))
> + return;
> + fd = fileno(f);
> + if (!ASSERT_OK_FD(fd, "open fd"))
> + return;
> + if (!ASSERT_OK(ftruncate(fd, len), "ftruncate"))
> + goto cleanup_fd;
> +
> + map = mmap(NULL, len, PROT_READ | PROT_WRITE,
> + MAP_SHARED, fd, 0);
ditto.
> + if (!ASSERT_NEQ(map, MAP_FAILED, "mmap file"))
> + goto cleanup_fd;
> +
> + memset(map, 1, len);
> +
> + if (!ASSERT_OK(read_stats(link), "read stats"))
> + goto cleanup_map;
> +
> + ASSERT_GT(memcg_query->nr_file_pages, val_pages, "final file value");
> + ASSERT_GT(memcg_query->nr_file_mapped, val_mapped,
> + "final file mapped value");
ditto.
> +
> +cleanup_map:
> + munmap(map, len);
> +cleanup_fd:
> + close(fd);
> +}
> +
> +static void test_shmem(struct bpf_link *link,
> + struct memcg_query *memcg_query)
ditto.
> +{
> + size_t len;
> + int fd;
> + void *map;
> + long val;
> +
> + len = sysconf(_SC_PAGESIZE) * 1024;
> +
> + if (!ASSERT_OK(read_stats(link), "read stats"))
> + return;
> +
> + val = memcg_query->nr_shmem;
> + if (!ASSERT_GE(val, 0, "init shmem val"))
> + return;
> +
> + /*
> + * Increase memcg shmem usage by creating and writing
> + * to a shmem object.
> + */
> + fd = shm_open("/tmp_shmem", O_CREAT | O_RDWR, 0644);
> + if (!ASSERT_OK_FD(fd, "shm_open"))
> + return;
> +
> + if (!ASSERT_OK(ftruncate(fd, len), "ftruncate"))
> + goto cleanup_fd;
> +
> + map = mmap(NULL, len, PROT_READ | PROT_WRITE,
> + MAP_SHARED, fd, 0);
ditto.
> + if (!ASSERT_NEQ(map, MAP_FAILED, "mmap shmem"))
> + goto cleanup_fd;
> +
> + memset(map, 1, len);
> +
> + if (!ASSERT_OK(read_stats(link), "read stats"))
> + goto cleanup_map;
> +
> + ASSERT_GT(memcg_query->nr_shmem, val, "final shmem value");
> +
> +cleanup_map:
> + munmap(map, len);
> +cleanup_fd:
> + close(fd);
> + shm_unlink("/tmp_shmem");
> +}
> +
> +static void test_kmem(struct bpf_link *link,
> + struct memcg_query *memcg_query)
ditto.
> +{
> + int fds[2];
> + int err;
> + ssize_t bytes;
> + size_t len;
> + char *buf;
> + long val;
> +
> + len = sysconf(_SC_PAGESIZE) * 1024;
> +
> + if (!ASSERT_OK(read_stats(link), "read stats"))
> + return;
> +
> + val = memcg_query->memcg_kmem;
> + if (!ASSERT_GE(val, 0, "initial kmem val"))
> + return;
> +
> + err = pipe2(fds, O_NONBLOCK);
> + if (!ASSERT_OK(err, "pipe"))
> + return;
> +
> + buf = malloc(len);
buf could be NULL?
> + memset(buf, 1, len);
> + bytes = write(fds[1], buf, len);
> + if (!ASSERT_GT(bytes, 0, "write"))
> + goto cleanup;
> +
> + if (!ASSERT_OK(read_stats(link), "read stats"))
> + goto cleanup;
> +
> + ASSERT_GT(memcg_query->memcg_kmem, val, "kmem value");
> +
> +cleanup:
> + free(buf);
> + close(fds[0]);
> + close(fds[1]);
> +}
> +
> +static void test_pgfault(struct bpf_link *link,
> + struct memcg_query *memcg_query)
ditto.
> +{
> + void *map;
> + size_t len;
> + long val;
> +
> + len = sysconf(_SC_PAGESIZE) * 1024;
> +
> + if (!ASSERT_OK(read_stats(link), "read stats"))
> + return;
> +
> + val = memcg_query->pgfault;
> + if (!ASSERT_GE(val, 0, "initial pgfault val"))
> + return;
> +
> + /* Create region to use for triggering a page fault. */
> + map = mmap(NULL, len, PROT_READ | PROT_WRITE,
> + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> + if (!ASSERT_NEQ(map, MAP_FAILED, "mmap anon"))
> + return;
> +
> + /* Trigger page fault. */
> + memset(map, 1, len);
> +
> + if (!ASSERT_OK(read_stats(link), "read stats"))
> + goto cleanup;
> +
> + ASSERT_GT(memcg_query->pgfault, val, "final pgfault val");
> +
> +cleanup:
> + munmap(map, len);
> +}
> +
> +void test_cgroup_iter_memcg(void)
> +{
> + char *cgroup_rel_path = "/cgroup_iter_memcg_test";
> + struct cgroup_iter_memcg *skel;
> + struct bpf_link *link;
> + int cgroup_fd, err;
> +
> + cgroup_fd = cgroup_setup_and_join(cgroup_rel_path);
> + if (!ASSERT_OK_FD(cgroup_fd, "cgroup_setup_and_join"))
> + return;
> +
> + skel = cgroup_iter_memcg__open();
> + if (!ASSERT_OK_PTR(skel, "cgroup_iter_memcg__open"))
> + goto cleanup_cgroup_fd;
> +
> + err = cgroup_iter_memcg__load(skel);
> + if (!ASSERT_OK(err, "cgroup_iter_memcg__load"))
> + goto cleanup_skel;
The above two can be combined with cgroup_iter_memcg__open_and_load().
> +
> + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
> + union bpf_iter_link_info linfo = {
> + .cgroup.cgroup_fd = cgroup_fd,
> + .cgroup.order = BPF_CGROUP_ITER_SELF_ONLY,
> + };
> + opts.link_info = &linfo;
> + opts.link_info_len = sizeof(linfo);
> +
> + link = bpf_program__attach_iter(skel->progs.cgroup_memcg_query, &opts);
> + if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter"))
> + goto cleanup_cgroup_fd;
goto cleanup_skel;
> +
> + if (test__start_subtest("cgroup_iter_memcg__anon"))
> + test_anon(link, &skel->data_query->memcg_query);
> + if (test__start_subtest("cgroup_iter_memcg__shmem"))
> + test_shmem(link, &skel->data_query->memcg_query);
> + if (test__start_subtest("cgroup_iter_memcg__file"))
> + test_file(link, &skel->data_query->memcg_query);
> + if (test__start_subtest("cgroup_iter_memcg__kmem"))
> + test_kmem(link, &skel->data_query->memcg_query);
> + if (test__start_subtest("cgroup_iter_memcg__pgfault"))
> + test_pgfault(link, &skel->data_query->memcg_query);
> +
> + bpf_link__destroy(link);
> +cleanup_skel:
> + cgroup_iter_memcg__destroy(skel);
> +cleanup_cgroup_fd:
> + close(cgroup_fd);
> + cleanup_cgroup_environment();
> +}
> diff --git a/tools/testing/selftests/bpf/progs/cgroup_iter_memcg.c b/tools/testing/selftests/bpf/progs/cgroup_iter_memcg.c
> new file mode 100644
> index 000000000000..0d913d72b68d
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/cgroup_iter_memcg.c
> @@ -0,0 +1,61 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
> +#include <vmlinux.h>
> +#include <bpf/bpf_core_read.h>
> +#include "cgroup_iter_memcg.h"
> +
> +char _license[] SEC("license") = "GPL";
> +
> +extern void memcg_flush_stats(struct cgroup *cgrp) __ksym;
> +extern unsigned long memcg_stat_fetch(struct cgroup *cgrp,
> + enum memcg_stat_item item) __ksym;
> +extern unsigned long memcg_node_stat_fetch(struct cgroup *cgrp,
> + enum node_stat_item item) __ksym;
> +extern unsigned long memcg_vm_event_fetch(struct cgroup *cgrp,
> + enum vm_event_item item) __ksym;
The above four extern functions are not needed. They should be included
in vmlinux.h if the latest pahole version (1.30) is used.
> +
> +/* The latest values read are stored here. */
> +struct memcg_query memcg_query SEC(".data.query");
> +
> +/*
> + * Helpers for fetching any of the three different types of memcg stats.
> + * BPF core macros are used to ensure an enumerator is present in the given
> + * kernel. Falling back on -1 indicates its absence.
> + */
> +#define node_stat_fetch_if_exists(cgrp, item) \
> + bpf_core_enum_value_exists(enum node_stat_item, item) ? \
> + memcg_node_stat_fetch((cgrp), bpf_core_enum_value( \
> + enum node_stat_item, item)) : -1
> +
> +#define memcg_stat_fetch_if_exists(cgrp, item) \
> + bpf_core_enum_value_exists(enum memcg_stat_item, item) ? \
> + memcg_node_stat_fetch((cgrp), bpf_core_enum_value( \
> + enum memcg_stat_item, item)) : -1
> +
> +#define vm_event_fetch_if_exists(cgrp, item) \
> + bpf_core_enum_value_exists(enum vm_event_item, item) ? \
> + memcg_vm_event_fetch((cgrp), bpf_core_enum_value( \
> + enum vm_event_item, item)) : -1
> +
> +SEC("iter.s/cgroup")
> +int cgroup_memcg_query(struct bpf_iter__cgroup *ctx)
> +{
> + struct cgroup *cgrp = ctx->cgroup;
> +
> + if (!cgrp)
> + return 1;
> +
> + memcg_flush_stats(cgrp);
> +
> + memcg_query.nr_anon_mapped = node_stat_fetch_if_exists(cgrp,
> + NR_ANON_MAPPED);
> + memcg_query.nr_shmem = node_stat_fetch_if_exists(cgrp, NR_SHMEM);
> + memcg_query.nr_file_pages = node_stat_fetch_if_exists(cgrp,
> + NR_FILE_PAGES);
> + memcg_query.nr_file_mapped = node_stat_fetch_if_exists(cgrp,
> + NR_FILE_MAPPED);
> + memcg_query.memcg_kmem = memcg_stat_fetch_if_exists(cgrp, MEMCG_KMEM);
> + memcg_query.pgfault = vm_event_fetch_if_exists(cgrp, PGFAULT);
There is a type mismatch:
+struct memcg_query {
+ /* some node_stat_item's */
+ long nr_anon_mapped;
+ long nr_shmem;
+ long nr_file_pages;
+ long nr_file_mapped;
+ /* some memcg_stat_item */
+ long memcg_kmem;
+ /* some vm_event_item */
+ long pgfault;
+};
memcg_query.nr_anon_mapped is long, but node_stat_fetch_if_exists
(...) return value type is unsigned long. It would be good if two
types are the same.
> +
> + return 0;
> +}
next prev parent reply other threads:[~2025-10-16 5:04 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-15 19:08 [PATCH v2 0/2] memcg: reading memcg stats more efficiently JP Kobryn
2025-10-15 19:08 ` [PATCH v2 1/2] memcg: introduce kfuncs for fetching memcg stats JP Kobryn
2025-10-15 20:48 ` Shakeel Butt
2025-10-15 23:12 ` Song Liu
2025-10-16 4:18 ` Yonghong Song
2025-10-16 20:28 ` JP Kobryn
2025-10-16 22:28 ` kernel test robot
2025-10-15 19:08 ` [PATCH v2 2/2] memcg: selftests for memcg stat kfuncs JP Kobryn
2025-10-15 23:17 ` Shakeel Butt
2025-10-16 5:04 ` Yonghong Song [this message]
2025-10-16 20:45 ` JP Kobryn
2025-10-15 20:46 ` [PATCH v2 0/2] memcg: reading memcg stats more efficiently Shakeel Butt
2025-10-16 0:21 ` JP Kobryn
2025-10-16 1:10 ` Roman Gushchin
2025-10-16 20:26 ` JP Kobryn
2025-10-16 23:00 ` Roman Gushchin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=f1558b5d-41be-4f56-8428-d5ae63d696ea@linux.dev \
--to=yonghong.song@linux.dev \
--cc=akpm@linux-foundation.org \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=cgroups@vger.kernel.org \
--cc=hannes@cmpxchg.org \
--cc=inwardvessel@gmail.com \
--cc=kernel-team@meta.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mkoutny@suse.com \
--cc=shakeel.butt@linux.dev \
--cc=tj@kernel.org \
--cc=yosryahmed@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox