linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Alexei Starovoitov <alexei.starovoitov@gmail.com>
To: Yafang Shao <laoar.shao@gmail.com>
Cc: ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
	kafai@fb.com, songliubraving@fb.com, yhs@fb.com,
	john.fastabend@gmail.com, kpsingh@kernel.org, sdf@google.com,
	haoluo@google.com, jolsa@kernel.org, hannes@cmpxchg.org,
	mhocko@kernel.org, roman.gushchin@linux.dev, shakeelb@google.com,
	songmuchun@bytedance.com, akpm@linux-foundation.org,
	netdev@vger.kernel.org, bpf@vger.kernel.org, linux-mm@kvack.org
Subject: Re: [RFC PATCH bpf-next 15/15] bpf: Introduce selectable memcg for bpf map
Date: Mon, 1 Aug 2022 21:55:31 -0700	[thread overview]
Message-ID: <20220802045531.6oi2pt3fyjhotmjo@macbook-pro-3.dhcp.thefacebook.com> (raw)
In-Reply-To: <20220729152316.58205-16-laoar.shao@gmail.com>

On Fri, Jul 29, 2022 at 03:23:16PM +0000, Yafang Shao wrote:
> A new member memcg_fd is introduced into bpf attr of BPF_MAP_CREATE
> command, which is the fd of an opened cgroup directory. In this cgroup,
> the memory subsystem must be enabled. This value is valid only when
> BPF_F_SELECTABLE_MEMCG is set in map_flags. Once the kernel get the
> memory cgroup from this fd, it will set this memcg into bpf map, then
> all the subsequent memory allocation of this map will be charge to the
> memcg.
> 
> The map creation paths in libbpf are also changed consequently.
> 
> Currently it is only supported for cgroup2 directory.
> 
> The usage of this new member as follows,
> 	struct bpf_map_create_opts map_opts = {
> 		.sz = sizeof(map_opts),
> 		.map_flags = BPF_F_SELECTABLE_MEMCG,
> 	};
> 	int memcg_fd, int map_fd;
> 	int key, value;
> 
> 	memcg_fd = open("/cgroup2", O_DIRECTORY);
> 	if (memcg_fd < 0) {
> 		perror("memcg dir open");
> 		return -1;
> 	}
> 
> 	map_opts.memcg_fd = memcg_fd;
> 	map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, "map_for_memcg",
> 				sizeof(key), sizeof(value),
> 				1024, &map_opts);
> 	if (map_fd <= 0) {
> 		perror("map create");
> 		return -1;
> 	}

Overall the api extension makes sense.
The flexibility of selecting memcg is useful.

> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> ---
>  include/uapi/linux/bpf.h       |  2 ++
>  kernel/bpf/syscall.c           | 47 ++++++++++++++++++++++++++--------
>  tools/include/uapi/linux/bpf.h |  2 ++
>  tools/lib/bpf/bpf.c            |  1 +
>  tools/lib/bpf/bpf.h            |  3 ++-
>  tools/lib/bpf/libbpf.c         |  2 ++
>  6 files changed, 46 insertions(+), 11 deletions(-)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index d5fc1ea70b59..a6e02c8be924 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -1296,6 +1296,8 @@ union bpf_attr {
>  						   * struct stored as the
>  						   * map value
>  						   */
> +		__s32	memcg_fd;	/* selectable memcg */
> +		__s32	:32;		/* hole */

new fields cannot be inserted in the middle of uapi struct.

>  		/* Any per-map-type extra fields
>  		 *
>  		 * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 6401cc417fa9..9900e2b87315 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -402,14 +402,30 @@ void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
>  }
>  
>  #ifdef CONFIG_MEMCG_KMEM
> -static void bpf_map_save_memcg(struct bpf_map *map)
> +static int bpf_map_save_memcg(struct bpf_map *map, union bpf_attr *attr)
>  {
> -	/* Currently if a map is created by a process belonging to the root
> -	 * memory cgroup, get_obj_cgroup_from_current() will return NULL.
> -	 * So we have to check map->objcg for being NULL each time it's
> -	 * being used.
> -	 */
> -	map->objcg = get_obj_cgroup_from_current();
> +	struct obj_cgroup *objcg;
> +	struct cgroup *cgrp;
> +
> +	if (attr->map_flags & BPF_F_SELECTABLE_MEMCG) {

The flag is unnecessary. Just add memcg_fd to the end of attr and use != 0
as a condition that it should be used instead of get_obj_cgroup_from_current().
There are other parts of bpf uapi that have similar fd handling logic.

> +		cgrp = cgroup_get_from_fd(attr->memcg_fd);
> +		if (IS_ERR(cgrp))
> +			return -EINVAL;
> +
> +		objcg = get_obj_cgroup_from_cgroup(cgrp);
> +		if (IS_ERR(objcg))
> +			return PTR_ERR(objcg);
> +	} else {
> +		/* Currently if a map is created by a process belonging to the root
> +		 * memory cgroup, get_obj_cgroup_from_current() will return NULL.
> +		 * So we have to check map->objcg for being NULL each time it's
> +		 * being used.
> +		 */
> +		objcg = get_obj_cgroup_from_current();
> +	}
> +
> +	map->objcg = objcg;
> +	return 0;
>  }
>  
>  static void bpf_map_release_memcg(struct bpf_map *map)
> @@ -485,8 +501,9 @@ void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
>  }
>  
>  #else
> -static void bpf_map_save_memcg(struct bpf_map *map)
> +static int bpf_map_save_memcg(struct bpf_map *map, union bpf_attr *attr)
>  {
> +	return 0;
>  }
>  
>  static void bpf_map_release_memcg(struct bpf_map *map)
> @@ -530,13 +547,18 @@ void *bpf_map_container_alloc(union bpf_attr *attr, u64 size, int numa_node)

High level uapi struct should not be passed into low level helper like this.
Pls pass memcg_fd instead.

>  {
>  	struct bpf_map *map;
>  	void *container;
> +	int ret;
>  
>  	container = __bpf_map_area_alloc(size, numa_node, false);
>  	if (!container)
>  		return ERR_PTR(-ENOMEM);
>  
>  	map = (struct bpf_map *)container;
> -	bpf_map_save_memcg(map);
> +	ret = bpf_map_save_memcg(map, attr);
> +	if (ret) {
> +		bpf_map_area_free(container);
> +		return ERR_PTR(ret);
> +	}
>  
>  	return container;
>  }
> @@ -547,6 +569,7 @@ void *bpf_map_container_mmapable_alloc(union bpf_attr *attr, u64 size,
>  	struct bpf_map *map;
>  	void *container;
>  	void *ptr;
> +	int ret;
>  
>  	/* kmalloc'ed memory can't be mmap'ed, use explicit vmalloc */
>  	ptr = __bpf_map_area_alloc(size, numa_node, true);
> @@ -555,7 +578,11 @@ void *bpf_map_container_mmapable_alloc(union bpf_attr *attr, u64 size,
>  
>  	container = ptr + align - offset;
>  	map = (struct bpf_map *)container;
> -	bpf_map_save_memcg(map);
> +	ret = bpf_map_save_memcg(map, attr);
> +	if (ret) {
> +		bpf_map_area_free(ptr);
> +		return ERR_PTR(ret);
> +	}
>  
>  	return ptr;
>  }
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index d5fc1ea70b59..a6e02c8be924 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -1296,6 +1296,8 @@ union bpf_attr {
>  						   * struct stored as the
>  						   * map value
>  						   */
> +		__s32	memcg_fd;	/* selectable memcg */
> +		__s32	:32;		/* hole */
>  		/* Any per-map-type extra fields
>  		 *
>  		 * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the
> diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
> index 5eb0df90eb2b..662ce5808386 100644
> --- a/tools/lib/bpf/bpf.c
> +++ b/tools/lib/bpf/bpf.c
> @@ -199,6 +199,7 @@ int bpf_map_create(enum bpf_map_type map_type,
>  	attr.map_extra = OPTS_GET(opts, map_extra, 0);
>  	attr.numa_node = OPTS_GET(opts, numa_node, 0);
>  	attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0);
> +	attr.memcg_fd = OPTS_GET(opts, memcg_fd, 0);
>  
>  	fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz);
>  	return libbpf_err_errno(fd);
> diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
> index 88a7cc4bd76f..481aad49422b 100644
> --- a/tools/lib/bpf/bpf.h
> +++ b/tools/lib/bpf/bpf.h
> @@ -51,8 +51,9 @@ struct bpf_map_create_opts {
>  
>  	__u32 numa_node;
>  	__u32 map_ifindex;
> +	__u32 memcg_fd;
>  };
> -#define bpf_map_create_opts__last_field map_ifindex
> +#define bpf_map_create_opts__last_field memcg_fd
>  
>  LIBBPF_API int bpf_map_create(enum bpf_map_type map_type,
>  			      const char *map_name,
> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> index 50d41815f431..86916d550031 100644
> --- a/tools/lib/bpf/libbpf.c
> +++ b/tools/lib/bpf/libbpf.c
> @@ -505,6 +505,7 @@ struct bpf_map {
>  	bool pinned;
>  	bool reused;
>  	bool autocreate;
> +	__s32 memcg_fd;
>  	__u64 map_extra;
>  };
>  
> @@ -4928,6 +4929,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
>  	create_attr.map_ifindex = map->map_ifindex;
>  	create_attr.map_flags = def->map_flags;
>  	create_attr.numa_node = map->numa_node;
> +	create_attr.memcg_fd = map->memcg_fd;
>  	create_attr.map_extra = map->map_extra;
>  
>  	if (bpf_map__is_struct_ops(map))
> -- 
> 2.17.1
> 


  reply	other threads:[~2022-08-02  4:55 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-29 15:23 [RFC PATCH bpf-next 00/15] " Yafang Shao
2022-07-29 15:23 ` [RFC PATCH bpf-next 01/15] bpf: Remove unneeded memset in queue_stack_map creation Yafang Shao
2022-07-29 15:23 ` [RFC PATCH bpf-next 02/15] bpf: Use bpf_map_area_free instread of kvfree Yafang Shao
2022-07-29 15:23 ` [RFC PATCH bpf-next 03/15] bpf: Make __GFP_NOWARN consistent in bpf map creation Yafang Shao
2022-07-29 15:23 ` [RFC PATCH bpf-next 04/15] bpf: Use bpf_map_area_alloc consistently on " Yafang Shao
2022-07-29 15:23 ` [RFC PATCH bpf-next 05/15] bpf: Introduce helpers for container of struct bpf_map Yafang Shao
2022-08-02  4:58   ` Alexei Starovoitov
2022-08-02 13:47     ` Yafang Shao
2022-07-29 15:23 ` [RFC PATCH bpf-next 06/15] bpf: Use bpf_map_container_alloc helpers in various bpf maps Yafang Shao
2022-07-29 15:23 ` [RFC PATCH bpf-next 07/15] bpf: Define bpf_map_get_memcg for !CONFIG_MEMCG_KMEM Yafang Shao
2022-07-29 15:23 ` [RFC PATCH bpf-next 08/15] bpf: Use scope-based charge for bpf_map_area_alloc Yafang Shao
2022-07-29 15:23 ` [RFC PATCH bpf-next 09/15] bpf: Use bpf_map_kzalloc in arraymap Yafang Shao
2022-07-29 15:23 ` [RFC PATCH bpf-next 10/15] bpf: Use bpf_map_pages_alloc in ringbuf Yafang Shao
2022-08-01 23:16   ` Andrii Nakryiko
2022-08-02 13:31     ` Yafang Shao
2022-08-02 18:00       ` Andrii Nakryiko
2022-08-03 13:27         ` Yafang Shao
2022-07-29 15:23 ` [RFC PATCH bpf-next 11/15] bpf: Use bpf_map_kvcalloc in bpf_local_storage Yafang Shao
2022-07-29 15:23 ` [RFC PATCH bpf-next 12/15] mm, memcg: Add new helper get_obj_cgroup_from_cgroup Yafang Shao
2022-07-29 15:23 ` [RFC PATCH bpf-next 13/15] bpf: Add new parameter into bpf_map_container_alloc Yafang Shao
2022-07-29 15:23 ` [RFC PATCH bpf-next 14/15] bpf: Add new map flag BPF_F_SELECTABLE_MEMCG Yafang Shao
2022-07-29 15:23 ` [RFC PATCH bpf-next 15/15] bpf: Introduce selectable memcg for bpf map Yafang Shao
2022-08-02  4:55   ` Alexei Starovoitov [this message]
2022-08-02 13:47     ` Yafang Shao

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220802045531.6oi2pt3fyjhotmjo@macbook-pro-3.dhcp.thefacebook.com \
    --to=alexei.starovoitov@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=hannes@cmpxchg.org \
    --cc=haoluo@google.com \
    --cc=john.fastabend@gmail.com \
    --cc=jolsa@kernel.org \
    --cc=kafai@fb.com \
    --cc=kpsingh@kernel.org \
    --cc=laoar.shao@gmail.com \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=roman.gushchin@linux.dev \
    --cc=sdf@google.com \
    --cc=shakeelb@google.com \
    --cc=songliubraving@fb.com \
    --cc=songmuchun@bytedance.com \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox