[RFC PATCH bpf-next 05/10] mm: Add helper to recharge kmalloc'ed address

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Yafang Shao <laoar.shao@gmail.com>
To: ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
	kafai@fb.com, songliubraving@fb.com, yhs@fb.com,
	john.fastabend@gmail.com, kpsingh@kernel.org,
	quentin@isovalent.com, hannes@cmpxchg.org, mhocko@kernel.org,
	roman.gushchin@linux.dev, shakeelb@google.com,
	songmuchun@bytedance.com, akpm@linux-foundation.org,
	cl@linux.com, penberg@kernel.org, rientjes@google.com,
	iamjoonsoo.kim@lge.com, vbabka@suse.cz
Cc: linux-mm@kvack.org, bpf@vger.kernel.org,
	Yafang Shao <laoar.shao@gmail.com>
Subject: [RFC PATCH bpf-next 05/10] mm: Add helper to recharge kmalloc'ed address
Date: Sun, 19 Jun 2022 15:50:27 +0000	[thread overview]
Message-ID: <20220619155032.32515-6-laoar.shao@gmail.com> (raw)
In-Reply-To: <20220619155032.32515-1-laoar.shao@gmail.com>

This patch introduces a helper to recharge the corresponding pages of a
given kmalloc'ed address. The recharge is divided into three steps,
  - pre charge to the new memcg
    To make sure once we uncharge from the old memcg, we can always charge
    to the new memcg succeesfully. If we can't pre charge to the new memcg,
    we won't allow it to be uncharged from the old memcg.
  - uncharge from the old memcg
    After pre charge to the new memcg, we can uncharge from the old memcg.
  - post charge to the new memcg
    Modify the counters of the new memcg.

Sometimes we may want to recharge many kmalloc'ed addresses to the same
memcg, in that case we should pre charge all these addresses first, then
do the uncharge and finnally do the post charge. But it may happens that
after succeesfully pre charge some address we fail to pre charge a new
address, then we have to cancel the finished pre charge, so charge err is
introduced for this purpose.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 include/linux/slab.h |  17 ++++++
 mm/slab.c            |  85 +++++++++++++++++++++++++++++
 mm/slob.c            |   7 +++
 mm/slub.c            | 125 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 234 insertions(+)

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 0fefdf528e0d..18ab30aa8fe8 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -194,6 +194,23 @@ bool kmem_valid_obj(void *object);
 void kmem_dump_obj(void *object);
 #endif
 
+/*
+ * The recharge will be separated into three steps:
+ *	MEMCG_KMEM_PRE_CHARGE  : pre charge to the new memcg
+ *	MEMCG_KMEM_UNCHARGE    : uncharge from the old memcg
+ *	MEMCG_KMEM_POST_CHARGE : post charge to the new memcg
+ * and an error handler:
+ *	MEMCG_KMEM_CHARGE_ERR  : in pre charge state, we may succeed to
+ *	                         charge some objp's but fail to charge
+ *	                         a new one, then in this case we should
+ *	                         uncharge the already charged objp's.
+ */
+#define MEMCG_KMEM_PRE_CHARGE	0
+#define MEMCG_KMEM_UNCHARGE	1
+#define MEMCG_KMEM_POST_CHARGE	2
+#define MEMCG_KMEM_CHARGE_ERR	3
+bool krecharge(const void *objp, int step);
+
 /*
  * Some archs want to perform DMA into kmalloc caches and need a guaranteed
  * alignment larger than the alignment of a 64-bit integer.
diff --git a/mm/slab.c b/mm/slab.c
index f8cd00f4ba13..4795014edd30 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3798,6 +3798,91 @@ void kfree(const void *objp)
 }
 EXPORT_SYMBOL(kfree);
 
+bool krecharge(const void *objp, int step)
+{
+	void *object = (void *)objp;
+	struct obj_cgroup *objcg_old;
+	struct obj_cgroup *objcg_new;
+	struct obj_cgroup **objcgs;
+	struct kmem_cache *s;
+	struct slab *slab;
+	unsigned long flags;
+	unsigned int off;
+
+	WARN_ON(!in_task());
+
+	if (unlikely(ZERO_OR_NULL_PTR(objp)))
+		return true;
+
+	if (!memcg_kmem_enabled())
+		return true;
+
+	local_irq_save(flags);
+	s = virt_to_cache(objp);
+	if (!s)
+		goto out;
+
+	if (!(s->flags & SLAB_ACCOUNT))
+		goto out;
+
+	slab = virt_to_slab(object);
+	if (!slab)
+		goto out;
+
+	objcgs = slab_objcgs(slab);
+	if (!objcgs)
+		goto out;
+
+	off = obj_to_index(s, slab, object);
+	objcg_old = objcgs[off];
+	if (!objcg_old && step != MEMCG_KMEM_POST_CHARGE)
+		goto out;
+
+	/*
+	 *  The recharge can be separated into three steps,
+	 *  1. Pre charge to the new memcg
+	 *  2. Uncharge from the old memcg
+	 *  3. Charge to the new memcg
+	 */
+	switch (step) {
+	case MEMCG_KMEM_PRE_CHARGE:
+		/* Pre recharge */
+		objcg_new = get_obj_cgroup_from_current();
+		WARN_ON(!objcg_new);
+		if (obj_cgroup_charge(objcg_new, GFP_KERNEL, obj_full_size(s))) {
+			obj_cgroup_put(objcg_new);
+			local_irq_restore(flags);
+			return false;
+		}
+		break;
+	case MEMCG_KMEM_UNCHARGE:
+		/* Uncharge from the old memcg */
+		obj_cgroup_uncharge(objcg_old, obj_full_size(s));
+		objcgs[off] = NULL;
+		mod_objcg_state(objcg_old, slab_pgdat(slab), cache_vmstat_idx(s),
+				-obj_full_size(s));
+		obj_cgroup_put(objcg_old);
+		break;
+	case MEMCG_KMEM_POST_CHARGE:
+		/* Charge to the new memcg */
+		objcg_new = obj_cgroup_from_current();
+		objcgs[off] = objcg_new;
+		mod_objcg_state(objcg_new, slab_pgdat(slab), cache_vmstat_idx(s), obj_full_size(s));
+		break;
+	case MEMCG_KMEM_CHARGE_ERR:
+		objcg_new = obj_cgroup_from_current();
+		obj_cgroup_uncharge(objcg_new, obj_full_size(s));
+		obj_cgroup_put(objcg_new);
+		break;
+	}
+
+out:
+	local_irq_restore(flags);
+
+	return true;
+}
+EXPORT_SYMBOL(krecharge);
+
 /*
  * This initializes kmem_cache_node or resizes various caches for all nodes.
  */
diff --git a/mm/slob.c b/mm/slob.c
index f47811f09aca..6d68ad57b4a2 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -574,6 +574,13 @@ void kfree(const void *block)
 }
 EXPORT_SYMBOL(kfree);
 
+/* kmemcg is no supported for SLOB */
+bool krecharge(const void *block, int step)
+{
+	return true;
+}
+EXPORT_SYMBOL(krecharge);
+
 /* can't use ksize for kmem_cache_alloc memory, only kmalloc */
 size_t __ksize(const void *block)
 {
diff --git a/mm/slub.c b/mm/slub.c
index e5535020e0fd..ef6475ed6407 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4556,6 +4556,131 @@ void kfree(const void *x)
 }
 EXPORT_SYMBOL(kfree);
 
+bool krecharge(const void *x, int step)
+{
+	void *object = (void *)x;
+	struct obj_cgroup *objcg_old;
+	struct obj_cgroup *objcg_new;
+	struct obj_cgroup **objcgs;
+	struct kmem_cache *s;
+	struct folio *folio;
+	struct slab *slab;
+	unsigned int off;
+
+	WARN_ON(!in_task());
+
+	if (!memcg_kmem_enabled())
+		return true;
+
+	if (unlikely(ZERO_OR_NULL_PTR(x)))
+		return true;
+
+	folio = virt_to_folio(x);
+	if (unlikely(!folio_test_slab(folio))) {
+		unsigned int order = folio_order(folio);
+		struct page *page;
+
+		switch (step) {
+		case MEMCG_KMEM_PRE_CHARGE:
+			objcg_new = get_obj_cgroup_from_current();
+			WARN_ON(!objcg_new);
+			/* Try charge current memcg */
+			if (obj_cgroup_charge_pages(objcg_new, GFP_KERNEL,
+						    1 << order)) {
+				obj_cgroup_put(objcg_new);
+				return false;
+			}
+			break;
+		case MEMCG_KMEM_UNCHARGE:
+			/* Uncharge folio memcg */
+			objcg_old = __folio_objcg(folio);
+			page = folio_page(folio, 0);
+			WARN_ON(!objcg_old);
+			obj_cgroup_uncharge_pages(objcg_old, 1 << order);
+			mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
+						-(PAGE_SIZE << order));
+			page->memcg_data = 0;
+			obj_cgroup_put(objcg_old);
+			break;
+		case MEMCG_KMEM_POST_CHARGE:
+			/* Set current memcg to folio page */
+			objcg_new = obj_cgroup_from_current();
+			page = folio_page(folio, 0);
+			page->memcg_data = (unsigned long)objcg_new | MEMCG_DATA_KMEM;
+			mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
+						-(PAGE_SIZE << order));
+			break;
+		case MEMCG_KMEM_CHARGE_ERR:
+			objcg_new = obj_cgroup_from_current();
+			obj_cgroup_uncharge_pages(objcg_new, 1 << order);
+			obj_cgroup_put(objcg_new);
+			break;
+		}
+		return true;
+	}
+
+	slab = folio_slab(folio);
+	if (!slab)
+		return true;
+
+	s = slab->slab_cache;
+	if (!(s->flags & SLAB_ACCOUNT))
+		return true;
+
+	objcgs = slab_objcgs(slab);
+	if (!objcgs)
+		return true;
+	off = obj_to_index(s, slab, object);
+	objcg_old = objcgs[off];
+	/* In step MEMCG_KMEM_UNCHARGE, the objcg will set to NULL. */
+	if (!objcg_old && step != MEMCG_KMEM_POST_CHARGE)
+		return true;
+
+	/*
+	 *  The recharge can be separated into three steps,
+	 *  1. Pre charge to the new memcg
+	 *  2. Uncharge from the old memcg
+	 *  3. Charge to the new memcg
+	 */
+	switch (step) {
+	case MEMCG_KMEM_PRE_CHARGE:
+		/*
+		 * Before uncharge from the old memcg, we must pre charge the new memcg
+		 * first, to make sure it always succeed to recharge to the new memcg
+		 * after uncharge from the old memcg.
+		 */
+		objcg_new = get_obj_cgroup_from_current();
+		WARN_ON(!objcg_new);
+		if (obj_cgroup_charge(objcg_new, GFP_KERNEL, obj_full_size(s))) {
+			obj_cgroup_put(objcg_new);
+			return false;
+		}
+		break;
+	case MEMCG_KMEM_UNCHARGE:
+		/* Uncharge from old memcg */
+		obj_cgroup_uncharge(objcg_old, obj_full_size(s));
+		objcgs[off] = NULL;
+		mod_objcg_state(objcg_old, slab_pgdat(slab), cache_vmstat_idx(s),
+				-obj_full_size(s));
+		obj_cgroup_put(objcg_old);
+		break;
+	case MEMCG_KMEM_POST_CHARGE:
+		/* Charge to the new memcg */
+		objcg_new = obj_cgroup_from_current();
+		objcgs[off] = objcg_new;
+		mod_objcg_state(objcg_new, slab_pgdat(slab), cache_vmstat_idx(s), obj_full_size(s));
+		break;
+	case MEMCG_KMEM_CHARGE_ERR:
+		objcg_new = obj_cgroup_from_current();
+		obj_cgroup_uncharge(objcg_new, obj_full_size(s));
+		obj_cgroup_put(objcg_new);
+		break;
+	}
+
+	return true;
+}
+EXPORT_SYMBOL(krecharge);
+
 #define SHRINK_PROMOTE_MAX 32
 
 /*
-- 
2.17.1

next prev parent reply	other threads:[~2022-06-19 15:50 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-19 15:50 [RFC PATCH bpf-next 00/10] bpf, mm: Recharge pages when reuse bpf map Yafang Shao
2022-06-19 15:50 ` [RFC PATCH bpf-next 01/10] mm, memcg: Add a new helper memcg_should_recharge() Yafang Shao
2022-06-19 15:50 ` [RFC PATCH bpf-next 02/10] bpftool: Show memcg info of bpf map Yafang Shao
2022-06-19 15:50 ` [RFC PATCH bpf-next 03/10] mm, memcg: Add new helper obj_cgroup_from_current() Yafang Shao
2022-06-23  3:01   ` Roman Gushchin
2022-06-25 13:54     ` Yafang Shao
2022-06-26  1:52       ` Roman Gushchin
2022-06-19 15:50 ` [RFC PATCH bpf-next 04/10] mm, memcg: Make obj_cgroup_{charge, uncharge}_pages public Yafang Shao
2022-06-19 15:50 ` Yafang Shao [this message]
2022-06-19 15:50 ` [RFC PATCH bpf-next 06/10] mm: Add helper to recharge vmalloc'ed address Yafang Shao
2022-06-19 15:50 ` [RFC PATCH bpf-next 07/10] mm: Add helper to recharge percpu address Yafang Shao
2022-06-23  5:25   ` Dennis Zhou
2022-06-25 14:18     ` Yafang Shao
2022-06-19 15:50 ` [RFC PATCH bpf-next 08/10] bpf: Recharge memory when reuse bpf map Yafang Shao
2022-06-19 15:50 ` [RFC PATCH bpf-next 09/10] bpf: Make bpf_map_{save, release}_memcg public Yafang Shao
2022-06-19 15:50 ` [RFC PATCH bpf-next 10/10] bpf: Support recharge for hash map Yafang Shao
2022-06-21 23:28 ` [RFC PATCH bpf-next 00/10] bpf, mm: Recharge pages when reuse bpf map Alexei Starovoitov
2022-06-22 14:03   ` Yafang Shao
2022-06-23  3:29 ` Roman Gushchin
2022-06-25  3:26   ` Yafang Shao
2022-06-26  3:28     ` Roman Gushchin
2022-06-26  3:32       ` Roman Gushchin
2022-06-26  6:38         ` Yafang Shao
2022-06-26  6:25       ` Yafang Shao
2022-07-02  4:23         ` Roman Gushchin
2022-07-02 15:24           ` Yafang Shao
2022-07-02 15:33             ` Roman Gushchin
2022-06-27  0:40     ` Alexei Starovoitov
2022-06-27 15:02       ` Yafang Shao

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220619155032.32515-6-laoar.shao@gmail.com \
    --to=laoar.shao@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=cl@linux.com \
    --cc=daniel@iogearbox.net \
    --cc=hannes@cmpxchg.org \
    --cc=iamjoonsoo.kim@lge.com \
    --cc=john.fastabend@gmail.com \
    --cc=kafai@fb.com \
    --cc=kpsingh@kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=penberg@kernel.org \
    --cc=quentin@isovalent.com \
    --cc=rientjes@google.com \
    --cc=roman.gushchin@linux.dev \
    --cc=shakeelb@google.com \
    --cc=songliubraving@fb.com \
    --cc=songmuchun@bytedance.com \
    --cc=vbabka@suse.cz \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox