linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Kefeng Wang <wangkefeng.wang@huawei.com>
To: Shakeel Butt <shakeel.butt@linux.dev>,
	Matthew Wilcox <willy@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Michal Hocko <mhocko@kernel.org>,
	Roman Gushchin <roman.gushchin@linux.dev>,
	Muchun Song <muchun.song@linux.dev>, <linux-mm@kvack.org>,
	<cgroups@vger.kernel.org>, Uladzislau Rezki <urezki@gmail.com>,
	Christoph Hellwig <hch@infradead.org>,
	Lorenzo Stoakes <lstoakes@gmail.com>
Subject: Re: [PATCH] mm: memcontrol: remove page_memcg()
Date: Thu, 23 May 2024 16:57:21 +0800	[thread overview]
Message-ID: <da794fd1-562a-4e75-b79f-fc4067aa1941@huawei.com> (raw)
In-Reply-To: <tcdr5cm3djarfeiwar6q7qvxjdgkb7r5pcb7j6pzqejnbslsgz@2pnnlbwmfzdu>



On 2024/5/22 3:29, Shakeel Butt wrote:
> On Tue, May 21, 2024 at 03:44:21PM +0100, Matthew Wilcox wrote:
>> On Tue, May 21, 2024 at 09:15:56PM +0800, Kefeng Wang wrote:
>>> The page_memcg() only called by mod_memcg_page_state(), so squash it to
>>> cleanup page_memcg().
>>
>> This isn't wrong, except that the entire usage of memcg is wrong in the
>> only two callers of mod_memcg_page_state():
>>
>> $ git grep mod_memcg_page_state
>> include/linux/memcontrol.h:static inline void mod_memcg_page_state(struct page *page,
>> include/linux/memcontrol.h:static inline void mod_memcg_page_state(struct page *page,
>> mm/vmalloc.c:           mod_memcg_page_state(page, MEMCG_VMALLOC, -1);
>> mm/vmalloc.c:                   mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC, 1);
>>
>> The memcg should not be attached to the individual pages that make up a
>> vmalloc allocation.  Rather, it should be managed by the vmalloc
>> allocation itself.  I don't have the knowledge to poke around inside
>> vmalloc right now, but maybe somebody else could take that on.
> 
> Are you concerned about accessing just memcg or any field of the
> sub-page? There are drivers accessing fields of pages allocated through
> vmalloc. Some details at 3b8000ae185c ("mm/vmalloc: huge vmalloc backing
> pages should be split rather than compound").

Maybe Matthew want something shown below, move the memcg MEMCG_VMALLOC 
stat update from per-page to per-vmalloc-allocation? It should be speed 
up the statistic after conversion.

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index e4a631ec430b..89f115623124 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -55,6 +55,9 @@ struct vm_struct {
  	unsigned long		size;
  	unsigned long		flags;
  	struct page		**pages;
+#ifdef CONFIG_MEMCG_KMEM
+	struct obj_cgroup	*objcg;
+#endif
  #ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC
  	unsigned int		page_order;
  #endif
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 5d3aa2dc88a8..3e28c382f604 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -3001,6 +3001,49 @@ static inline void set_vm_area_page_order(struct 
vm_struct *vm, unsigned int ord
  #endif
  }

+#ifdef CONFIG_MEMCG_KMEM
+static void vmalloc_memcg_alloc_hook(struct vm_struct *area, gfp_t gfp,
+				     int nr_pages)
+{
+	struct obj_cgroup *objcg;
+
+	if (!memcg_kmem_online() || !(gfp & __GFP_ACCOUNT))
+		return;
+
+	objcg = get_obj_cgroup_from_current();
+	if (objcg)
+		return;
+
+	area->objcg = objcg;
+
+	rcu_read_lock();
+	mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_VMALLOC, nr_pages);
+	rcu_read_unlock();
+}
+
+static void vmalloc_memcg_free_hook(struct vm_struct *area)
+{
+	struct obj_cgroup *objcg = area->objcg;
+
+	if (!objcg)
+		return;
+
+	rcu_read_lock();
+	mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_VMALLOC, -area->nr_pages);
+	rcu_read_unlock();
+
+	obj_cgroup_put(objcg);
+}
+#else
+static void vmalloc_memcg_alloc_hook(struct vm_struct *area, gfp_t gfp,
+				     int nr_pages)
+{
+}
+static void vmalloc_memcg_free_hook(struct vm_struct *area)
+{
+}
+#endif
+
  /**
   * vm_area_add_early - add vmap area early during boot
   * @vm: vm_struct to add
@@ -3338,7 +3381,6 @@ void vfree(const void *addr)
  		struct page *page = vm->pages[i];

  		BUG_ON(!page);
-		mod_memcg_page_state(page, MEMCG_VMALLOC, -1);
  		/*
  		 * High-order allocs for huge vmallocs are split, so
  		 * can be freed as an array of order-0 allocations
@@ -3347,6 +3389,7 @@ void vfree(const void *addr)
  		cond_resched();
  	}
  	atomic_long_sub(vm->nr_pages, &nr_vmalloc_pages);
+	vmalloc_memcg_free_hook(vm);
  	kvfree(vm->pages);
  	kfree(vm);
  }
@@ -3643,12 +3686,7 @@ static void *__vmalloc_area_node(struct vm_struct 
*area, gfp_t gfp_mask,
  		node, page_order, nr_small_pages, area->pages);

  	atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
-	if (gfp_mask & __GFP_ACCOUNT) {
-		int i;
-
-		for (i = 0; i < area->nr_pages; i++)
-			mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC, 1);
-	}
+	vmalloc_memcg_alloc_hook(area, gfp_mask, area->nr_pages);

  	/*
  	 * If not enough pages were obtained to accomplish an


  reply	other threads:[~2024-05-23  8:57 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-05-21 13:15 Kefeng Wang
2024-05-21 13:30 ` Michal Hocko
2024-05-21 14:21 ` Matthew Wilcox
2024-05-23  9:43   ` Kefeng Wang
2024-05-21 14:44 ` Matthew Wilcox
2024-05-21 16:03   ` Michal Hocko
2024-05-21 19:29   ` Shakeel Butt
2024-05-23  8:57     ` Kefeng Wang [this message]
2024-05-23 13:31     ` Matthew Wilcox
2024-05-23 15:41       ` Shakeel Butt
2024-05-23 16:34         ` Matthew Wilcox
2024-05-31 22:51           ` Shakeel Butt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=da794fd1-562a-4e75-b79f-fc4067aa1941@huawei.com \
    --to=wangkefeng.wang@huawei.com \
    --cc=akpm@linux-foundation.org \
    --cc=cgroups@vger.kernel.org \
    --cc=hannes@cmpxchg.org \
    --cc=hch@infradead.org \
    --cc=linux-mm@kvack.org \
    --cc=lstoakes@gmail.com \
    --cc=mhocko@kernel.org \
    --cc=muchun.song@linux.dev \
    --cc=roman.gushchin@linux.dev \
    --cc=shakeel.butt@linux.dev \
    --cc=urezki@gmail.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox