linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] mm: memcontrol: remove page_memcg()
@ 2024-05-21 13:15 Kefeng Wang
  2024-05-21 13:30 ` Michal Hocko
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Kefeng Wang @ 2024-05-21 13:15 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Johannes Weiner, Michal Hocko, Roman Gushchin, Shakeel Butt,
	Muchun Song, linux-mm, cgroups, Kefeng Wang

The page_memcg() only called by mod_memcg_page_state(), so squash it to
cleanup page_memcg().

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 include/linux/memcontrol.h | 14 ++------------
 mm/memcontrol.c            |  2 +-
 2 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 030d34e9d117..8abc70cc7219 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -443,11 +443,6 @@ static inline struct mem_cgroup *folio_memcg(struct folio *folio)
 	return __folio_memcg(folio);
 }
 
-static inline struct mem_cgroup *page_memcg(struct page *page)
-{
-	return folio_memcg(page_folio(page));
-}
-
 /**
  * folio_memcg_rcu - Locklessly get the memory cgroup associated with a folio.
  * @folio: Pointer to the folio.
@@ -1014,7 +1009,7 @@ static inline void mod_memcg_page_state(struct page *page,
 		return;
 
 	rcu_read_lock();
-	memcg = page_memcg(page);
+	memcg = folio_memcg(page_folio(page));
 	if (memcg)
 		mod_memcg_state(memcg, idx, val);
 	rcu_read_unlock();
@@ -1133,11 +1128,6 @@ static inline struct mem_cgroup *folio_memcg(struct folio *folio)
 	return NULL;
 }
 
-static inline struct mem_cgroup *page_memcg(struct page *page)
-{
-	return NULL;
-}
-
 static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio)
 {
 	WARN_ON_ONCE(!rcu_read_lock_held());
@@ -1636,7 +1626,7 @@ static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec,
 	spin_unlock_irqrestore(&lruvec->lru_lock, flags);
 }
 
-/* Test requires a stable page->memcg binding, see page_memcg() */
+/* Test requires a stable page->memcg binding, see folio_memcg() */
 static inline bool folio_matches_lruvec(struct folio *folio,
 		struct lruvec *lruvec)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 54070687aad2..72833f6f0944 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3811,7 +3811,7 @@ void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
 #endif /* CONFIG_MEMCG_KMEM */
 
 /*
- * Because page_memcg(head) is not set on tails, set it now.
+ * Because folio_memcg(head) is not set on tails, set it now.
  */
 void split_page_memcg(struct page *head, int old_order, int new_order)
 {
-- 
2.41.0



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] mm: memcontrol: remove page_memcg()
  2024-05-21 13:15 [PATCH] mm: memcontrol: remove page_memcg() Kefeng Wang
@ 2024-05-21 13:30 ` Michal Hocko
  2024-05-21 14:21 ` Matthew Wilcox
  2024-05-21 14:44 ` Matthew Wilcox
  2 siblings, 0 replies; 12+ messages in thread
From: Michal Hocko @ 2024-05-21 13:30 UTC (permalink / raw)
  To: Kefeng Wang
  Cc: Andrew Morton, Johannes Weiner, Roman Gushchin, Shakeel Butt,
	Muchun Song, linux-mm, cgroups

On Tue 21-05-24 21:15:56, Kefeng Wang wrote:
> The page_memcg() only called by mod_memcg_page_state(), so squash it to
> cleanup page_memcg().
> 
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>

Acked-by: Michal Hocko <mhocko@suse.com>

Thanks!

> ---
>  include/linux/memcontrol.h | 14 ++------------
>  mm/memcontrol.c            |  2 +-
>  2 files changed, 3 insertions(+), 13 deletions(-)
> 
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 030d34e9d117..8abc70cc7219 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -443,11 +443,6 @@ static inline struct mem_cgroup *folio_memcg(struct folio *folio)
>  	return __folio_memcg(folio);
>  }
>  
> -static inline struct mem_cgroup *page_memcg(struct page *page)
> -{
> -	return folio_memcg(page_folio(page));
> -}
> -
>  /**
>   * folio_memcg_rcu - Locklessly get the memory cgroup associated with a folio.
>   * @folio: Pointer to the folio.
> @@ -1014,7 +1009,7 @@ static inline void mod_memcg_page_state(struct page *page,
>  		return;
>  
>  	rcu_read_lock();
> -	memcg = page_memcg(page);
> +	memcg = folio_memcg(page_folio(page));
>  	if (memcg)
>  		mod_memcg_state(memcg, idx, val);
>  	rcu_read_unlock();
> @@ -1133,11 +1128,6 @@ static inline struct mem_cgroup *folio_memcg(struct folio *folio)
>  	return NULL;
>  }
>  
> -static inline struct mem_cgroup *page_memcg(struct page *page)
> -{
> -	return NULL;
> -}
> -
>  static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio)
>  {
>  	WARN_ON_ONCE(!rcu_read_lock_held());
> @@ -1636,7 +1626,7 @@ static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec,
>  	spin_unlock_irqrestore(&lruvec->lru_lock, flags);
>  }
>  
> -/* Test requires a stable page->memcg binding, see page_memcg() */
> +/* Test requires a stable page->memcg binding, see folio_memcg() */
>  static inline bool folio_matches_lruvec(struct folio *folio,
>  		struct lruvec *lruvec)
>  {
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 54070687aad2..72833f6f0944 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -3811,7 +3811,7 @@ void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
>  #endif /* CONFIG_MEMCG_KMEM */
>  
>  /*
> - * Because page_memcg(head) is not set on tails, set it now.
> + * Because folio_memcg(head) is not set on tails, set it now.
>   */
>  void split_page_memcg(struct page *head, int old_order, int new_order)
>  {
> -- 
> 2.41.0

-- 
Michal Hocko
SUSE Labs


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] mm: memcontrol: remove page_memcg()
  2024-05-21 13:15 [PATCH] mm: memcontrol: remove page_memcg() Kefeng Wang
  2024-05-21 13:30 ` Michal Hocko
@ 2024-05-21 14:21 ` Matthew Wilcox
  2024-05-23  9:43   ` Kefeng Wang
  2024-05-21 14:44 ` Matthew Wilcox
  2 siblings, 1 reply; 12+ messages in thread
From: Matthew Wilcox @ 2024-05-21 14:21 UTC (permalink / raw)
  To: Kefeng Wang
  Cc: Andrew Morton, Johannes Weiner, Michal Hocko, Roman Gushchin,
	Shakeel Butt, Muchun Song, linux-mm, cgroups

On Tue, May 21, 2024 at 09:15:56PM +0800, Kefeng Wang wrote:
> -/* Test requires a stable page->memcg binding, see page_memcg() */
> +/* Test requires a stable page->memcg binding, see folio_memcg() */

folio->memcg, not page->memcg


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] mm: memcontrol: remove page_memcg()
  2024-05-21 13:15 [PATCH] mm: memcontrol: remove page_memcg() Kefeng Wang
  2024-05-21 13:30 ` Michal Hocko
  2024-05-21 14:21 ` Matthew Wilcox
@ 2024-05-21 14:44 ` Matthew Wilcox
  2024-05-21 16:03   ` Michal Hocko
  2024-05-21 19:29   ` Shakeel Butt
  2 siblings, 2 replies; 12+ messages in thread
From: Matthew Wilcox @ 2024-05-21 14:44 UTC (permalink / raw)
  To: Kefeng Wang
  Cc: Andrew Morton, Johannes Weiner, Michal Hocko, Roman Gushchin,
	Shakeel Butt, Muchun Song, linux-mm, cgroups, Uladzislau Rezki,
	Christoph Hellwig, Lorenzo Stoakes

On Tue, May 21, 2024 at 09:15:56PM +0800, Kefeng Wang wrote:
> The page_memcg() only called by mod_memcg_page_state(), so squash it to
> cleanup page_memcg().

This isn't wrong, except that the entire usage of memcg is wrong in the
only two callers of mod_memcg_page_state():

$ git grep mod_memcg_page_state
include/linux/memcontrol.h:static inline void mod_memcg_page_state(struct page *page,
include/linux/memcontrol.h:static inline void mod_memcg_page_state(struct page *page,
mm/vmalloc.c:           mod_memcg_page_state(page, MEMCG_VMALLOC, -1);
mm/vmalloc.c:                   mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC, 1);

The memcg should not be attached to the individual pages that make up a
vmalloc allocation.  Rather, it should be managed by the vmalloc
allocation itself.  I don't have the knowledge to poke around inside
vmalloc right now, but maybe somebody else could take that on.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] mm: memcontrol: remove page_memcg()
  2024-05-21 14:44 ` Matthew Wilcox
@ 2024-05-21 16:03   ` Michal Hocko
  2024-05-21 19:29   ` Shakeel Butt
  1 sibling, 0 replies; 12+ messages in thread
From: Michal Hocko @ 2024-05-21 16:03 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Kefeng Wang, Andrew Morton, Johannes Weiner, Roman Gushchin,
	Shakeel Butt, Muchun Song, linux-mm, cgroups, Uladzislau Rezki,
	Christoph Hellwig, Lorenzo Stoakes

On Tue 21-05-24 15:44:21, Matthew Wilcox wrote:
> On Tue, May 21, 2024 at 09:15:56PM +0800, Kefeng Wang wrote:
> > The page_memcg() only called by mod_memcg_page_state(), so squash it to
> > cleanup page_memcg().
> 
> This isn't wrong, except that the entire usage of memcg is wrong in the
> only two callers of mod_memcg_page_state():
> 
> $ git grep mod_memcg_page_state
> include/linux/memcontrol.h:static inline void mod_memcg_page_state(struct page *page,
> include/linux/memcontrol.h:static inline void mod_memcg_page_state(struct page *page,
> mm/vmalloc.c:           mod_memcg_page_state(page, MEMCG_VMALLOC, -1);
> mm/vmalloc.c:                   mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC, 1);
> 
> The memcg should not be attached to the individual pages that make up a
> vmalloc allocation.  Rather, it should be managed by the vmalloc
> allocation itself.  I don't have the knowledge to poke around inside
> vmalloc right now, but maybe somebody else could take that on.

This would make sense as a follow up.

-- 
Michal Hocko
SUSE Labs


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] mm: memcontrol: remove page_memcg()
  2024-05-21 14:44 ` Matthew Wilcox
  2024-05-21 16:03   ` Michal Hocko
@ 2024-05-21 19:29   ` Shakeel Butt
  2024-05-23  8:57     ` Kefeng Wang
  2024-05-23 13:31     ` Matthew Wilcox
  1 sibling, 2 replies; 12+ messages in thread
From: Shakeel Butt @ 2024-05-21 19:29 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Kefeng Wang, Andrew Morton, Johannes Weiner, Michal Hocko,
	Roman Gushchin, Muchun Song, linux-mm, cgroups, Uladzislau Rezki,
	Christoph Hellwig, Lorenzo Stoakes

On Tue, May 21, 2024 at 03:44:21PM +0100, Matthew Wilcox wrote:
> On Tue, May 21, 2024 at 09:15:56PM +0800, Kefeng Wang wrote:
> > The page_memcg() only called by mod_memcg_page_state(), so squash it to
> > cleanup page_memcg().
> 
> This isn't wrong, except that the entire usage of memcg is wrong in the
> only two callers of mod_memcg_page_state():
> 
> $ git grep mod_memcg_page_state
> include/linux/memcontrol.h:static inline void mod_memcg_page_state(struct page *page,
> include/linux/memcontrol.h:static inline void mod_memcg_page_state(struct page *page,
> mm/vmalloc.c:           mod_memcg_page_state(page, MEMCG_VMALLOC, -1);
> mm/vmalloc.c:                   mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC, 1);
> 
> The memcg should not be attached to the individual pages that make up a
> vmalloc allocation.  Rather, it should be managed by the vmalloc
> allocation itself.  I don't have the knowledge to poke around inside
> vmalloc right now, but maybe somebody else could take that on.

Are you concerned about accessing just memcg or any field of the
sub-page? There are drivers accessing fields of pages allocated through
vmalloc. Some details at 3b8000ae185c ("mm/vmalloc: huge vmalloc backing
pages should be split rather than compound").


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] mm: memcontrol: remove page_memcg()
  2024-05-21 19:29   ` Shakeel Butt
@ 2024-05-23  8:57     ` Kefeng Wang
  2024-05-23 13:31     ` Matthew Wilcox
  1 sibling, 0 replies; 12+ messages in thread
From: Kefeng Wang @ 2024-05-23  8:57 UTC (permalink / raw)
  To: Shakeel Butt, Matthew Wilcox
  Cc: Andrew Morton, Johannes Weiner, Michal Hocko, Roman Gushchin,
	Muchun Song, linux-mm, cgroups, Uladzislau Rezki,
	Christoph Hellwig, Lorenzo Stoakes



On 2024/5/22 3:29, Shakeel Butt wrote:
> On Tue, May 21, 2024 at 03:44:21PM +0100, Matthew Wilcox wrote:
>> On Tue, May 21, 2024 at 09:15:56PM +0800, Kefeng Wang wrote:
>>> The page_memcg() only called by mod_memcg_page_state(), so squash it to
>>> cleanup page_memcg().
>>
>> This isn't wrong, except that the entire usage of memcg is wrong in the
>> only two callers of mod_memcg_page_state():
>>
>> $ git grep mod_memcg_page_state
>> include/linux/memcontrol.h:static inline void mod_memcg_page_state(struct page *page,
>> include/linux/memcontrol.h:static inline void mod_memcg_page_state(struct page *page,
>> mm/vmalloc.c:           mod_memcg_page_state(page, MEMCG_VMALLOC, -1);
>> mm/vmalloc.c:                   mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC, 1);
>>
>> The memcg should not be attached to the individual pages that make up a
>> vmalloc allocation.  Rather, it should be managed by the vmalloc
>> allocation itself.  I don't have the knowledge to poke around inside
>> vmalloc right now, but maybe somebody else could take that on.
> 
> Are you concerned about accessing just memcg or any field of the
> sub-page? There are drivers accessing fields of pages allocated through
> vmalloc. Some details at 3b8000ae185c ("mm/vmalloc: huge vmalloc backing
> pages should be split rather than compound").

Maybe Matthew want something shown below, move the memcg MEMCG_VMALLOC 
stat update from per-page to per-vmalloc-allocation? It should be speed 
up the statistic after conversion.

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index e4a631ec430b..89f115623124 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -55,6 +55,9 @@ struct vm_struct {
  	unsigned long		size;
  	unsigned long		flags;
  	struct page		**pages;
+#ifdef CONFIG_MEMCG_KMEM
+	struct obj_cgroup	*objcg;
+#endif
  #ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC
  	unsigned int		page_order;
  #endif
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 5d3aa2dc88a8..3e28c382f604 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -3001,6 +3001,49 @@ static inline void set_vm_area_page_order(struct 
vm_struct *vm, unsigned int ord
  #endif
  }

+#ifdef CONFIG_MEMCG_KMEM
+static void vmalloc_memcg_alloc_hook(struct vm_struct *area, gfp_t gfp,
+				     int nr_pages)
+{
+	struct obj_cgroup *objcg;
+
+	if (!memcg_kmem_online() || !(gfp & __GFP_ACCOUNT))
+		return;
+
+	objcg = get_obj_cgroup_from_current();
+	if (objcg)
+		return;
+
+	area->objcg = objcg;
+
+	rcu_read_lock();
+	mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_VMALLOC, nr_pages);
+	rcu_read_unlock();
+}
+
+static void vmalloc_memcg_free_hook(struct vm_struct *area)
+{
+	struct obj_cgroup *objcg = area->objcg;
+
+	if (!objcg)
+		return;
+
+	rcu_read_lock();
+	mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_VMALLOC, -area->nr_pages);
+	rcu_read_unlock();
+
+	obj_cgroup_put(objcg);
+}
+#else
+static void vmalloc_memcg_alloc_hook(struct vm_struct *area, gfp_t gfp,
+				     int nr_pages)
+{
+}
+static void vmalloc_memcg_free_hook(struct vm_struct *area)
+{
+}
+#endif
+
  /**
   * vm_area_add_early - add vmap area early during boot
   * @vm: vm_struct to add
@@ -3338,7 +3381,6 @@ void vfree(const void *addr)
  		struct page *page = vm->pages[i];

  		BUG_ON(!page);
-		mod_memcg_page_state(page, MEMCG_VMALLOC, -1);
  		/*
  		 * High-order allocs for huge vmallocs are split, so
  		 * can be freed as an array of order-0 allocations
@@ -3347,6 +3389,7 @@ void vfree(const void *addr)
  		cond_resched();
  	}
  	atomic_long_sub(vm->nr_pages, &nr_vmalloc_pages);
+	vmalloc_memcg_free_hook(vm);
  	kvfree(vm->pages);
  	kfree(vm);
  }
@@ -3643,12 +3686,7 @@ static void *__vmalloc_area_node(struct vm_struct 
*area, gfp_t gfp_mask,
  		node, page_order, nr_small_pages, area->pages);

  	atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
-	if (gfp_mask & __GFP_ACCOUNT) {
-		int i;
-
-		for (i = 0; i < area->nr_pages; i++)
-			mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC, 1);
-	}
+	vmalloc_memcg_alloc_hook(area, gfp_mask, area->nr_pages);

  	/*
  	 * If not enough pages were obtained to accomplish an


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] mm: memcontrol: remove page_memcg()
  2024-05-21 14:21 ` Matthew Wilcox
@ 2024-05-23  9:43   ` Kefeng Wang
  0 siblings, 0 replies; 12+ messages in thread
From: Kefeng Wang @ 2024-05-23  9:43 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Andrew Morton, Johannes Weiner, Michal Hocko, Roman Gushchin,
	Shakeel Butt, Muchun Song, linux-mm, cgroups



On 2024/5/21 22:21, Matthew Wilcox wrote:
> On Tue, May 21, 2024 at 09:15:56PM +0800, Kefeng Wang wrote:
>> -/* Test requires a stable page->memcg binding, see page_memcg() */
>> +/* Test requires a stable page->memcg binding, see folio_memcg() */
> 
> folio->memcg, not page->memcg

OK, will fix.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] mm: memcontrol: remove page_memcg()
  2024-05-21 19:29   ` Shakeel Butt
  2024-05-23  8:57     ` Kefeng Wang
@ 2024-05-23 13:31     ` Matthew Wilcox
  2024-05-23 15:41       ` Shakeel Butt
  1 sibling, 1 reply; 12+ messages in thread
From: Matthew Wilcox @ 2024-05-23 13:31 UTC (permalink / raw)
  To: Shakeel Butt
  Cc: Kefeng Wang, Andrew Morton, Johannes Weiner, Michal Hocko,
	Roman Gushchin, Muchun Song, linux-mm, cgroups, Uladzislau Rezki,
	Christoph Hellwig, Lorenzo Stoakes

On Tue, May 21, 2024 at 12:29:39PM -0700, Shakeel Butt wrote:
> On Tue, May 21, 2024 at 03:44:21PM +0100, Matthew Wilcox wrote:
> > The memcg should not be attached to the individual pages that make up a
> > vmalloc allocation.  Rather, it should be managed by the vmalloc
> > allocation itself.  I don't have the knowledge to poke around inside
> > vmalloc right now, but maybe somebody else could take that on.
> 
> Are you concerned about accessing just memcg or any field of the
> sub-page? There are drivers accessing fields of pages allocated through
> vmalloc. Some details at 3b8000ae185c ("mm/vmalloc: huge vmalloc backing
> pages should be split rather than compound").

Thanks for the pointer, and fb_deferred_io_fault() is already on my
hitlist for abusing struct page.

My primary concern is that we should track the entire allocation as a
single object rather than tracking each page individually.  That means
assigning the vmalloc allocation to a memcg rather than assigning each
page to a memcg.  It's a lot less overhead to increment the counter once
per allocation rather than once per page in the allocation!

But secondarily, yes, pages allocated by vmalloc probably don't need
any per-page state, other than tracking the vmalloc allocation they're
assigned to.  We'll see how that theory turns out.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] mm: memcontrol: remove page_memcg()
  2024-05-23 13:31     ` Matthew Wilcox
@ 2024-05-23 15:41       ` Shakeel Butt
  2024-05-23 16:34         ` Matthew Wilcox
  0 siblings, 1 reply; 12+ messages in thread
From: Shakeel Butt @ 2024-05-23 15:41 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Kefeng Wang, Andrew Morton, Johannes Weiner, Michal Hocko,
	Roman Gushchin, Muchun Song, linux-mm, cgroups, Uladzislau Rezki,
	Christoph Hellwig, Lorenzo Stoakes

On Thu, May 23, 2024 at 02:31:05PM +0100, Matthew Wilcox wrote:
> On Tue, May 21, 2024 at 12:29:39PM -0700, Shakeel Butt wrote:
> > On Tue, May 21, 2024 at 03:44:21PM +0100, Matthew Wilcox wrote:
> > > The memcg should not be attached to the individual pages that make up a
> > > vmalloc allocation.  Rather, it should be managed by the vmalloc
> > > allocation itself.  I don't have the knowledge to poke around inside
> > > vmalloc right now, but maybe somebody else could take that on.
> > 
> > Are you concerned about accessing just memcg or any field of the
> > sub-page? There are drivers accessing fields of pages allocated through
> > vmalloc. Some details at 3b8000ae185c ("mm/vmalloc: huge vmalloc backing
> > pages should be split rather than compound").
> 
> Thanks for the pointer, and fb_deferred_io_fault() is already on my
> hitlist for abusing struct page.
> 
> My primary concern is that we should track the entire allocation as a
> single object rather than tracking each page individually.  That means
> assigning the vmalloc allocation to a memcg rather than assigning each
> page to a memcg.  It's a lot less overhead to increment the counter once
> per allocation rather than once per page in the allocation!
> 
> But secondarily, yes, pages allocated by vmalloc probably don't need
> any per-page state, other than tracking the vmalloc allocation they're
> assigned to.  We'll see how that theory turns out.

I think the tricky part would be vmalloc having pages spanning multiple
nodes which is not an issue for MEMCG_VMALLOC stat but the vmap based
kernel stack (CONFIG_VMAP_STACK) metric NR_KERNEL_STACK_KB cares about
that information.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] mm: memcontrol: remove page_memcg()
  2024-05-23 15:41       ` Shakeel Butt
@ 2024-05-23 16:34         ` Matthew Wilcox
  2024-05-31 22:51           ` Shakeel Butt
  0 siblings, 1 reply; 12+ messages in thread
From: Matthew Wilcox @ 2024-05-23 16:34 UTC (permalink / raw)
  To: Shakeel Butt
  Cc: Kefeng Wang, Andrew Morton, Johannes Weiner, Michal Hocko,
	Roman Gushchin, Muchun Song, linux-mm, cgroups, Uladzislau Rezki,
	Christoph Hellwig, Lorenzo Stoakes

On Thu, May 23, 2024 at 08:41:25AM -0700, Shakeel Butt wrote:
> On Thu, May 23, 2024 at 02:31:05PM +0100, Matthew Wilcox wrote:
> > On Tue, May 21, 2024 at 12:29:39PM -0700, Shakeel Butt wrote:
> > > On Tue, May 21, 2024 at 03:44:21PM +0100, Matthew Wilcox wrote:
> > > > The memcg should not be attached to the individual pages that make up a
> > > > vmalloc allocation.  Rather, it should be managed by the vmalloc
> > > > allocation itself.  I don't have the knowledge to poke around inside
> > > > vmalloc right now, but maybe somebody else could take that on.
> > > 
> > > Are you concerned about accessing just memcg or any field of the
> > > sub-page? There are drivers accessing fields of pages allocated through
> > > vmalloc. Some details at 3b8000ae185c ("mm/vmalloc: huge vmalloc backing
> > > pages should be split rather than compound").
> > 
> > Thanks for the pointer, and fb_deferred_io_fault() is already on my
> > hitlist for abusing struct page.
> > 
> > My primary concern is that we should track the entire allocation as a
> > single object rather than tracking each page individually.  That means
> > assigning the vmalloc allocation to a memcg rather than assigning each
> > page to a memcg.  It's a lot less overhead to increment the counter once
> > per allocation rather than once per page in the allocation!
> > 
> > But secondarily, yes, pages allocated by vmalloc probably don't need
> > any per-page state, other than tracking the vmalloc allocation they're
> > assigned to.  We'll see how that theory turns out.
> 
> I think the tricky part would be vmalloc having pages spanning multiple
> nodes which is not an issue for MEMCG_VMALLOC stat but the vmap based
> kernel stack (CONFIG_VMAP_STACK) metric NR_KERNEL_STACK_KB cares about
> that information.

Yes, we'll have to handle mod_lruvec_page_state() differently since that
stat is tracked per node.  Or we could stop tracking that stat per node.
Is it useful to track it per node?  Why is it useful to track kernel
stacks per node, but not track vmalloc allocations per node?


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] mm: memcontrol: remove page_memcg()
  2024-05-23 16:34         ` Matthew Wilcox
@ 2024-05-31 22:51           ` Shakeel Butt
  0 siblings, 0 replies; 12+ messages in thread
From: Shakeel Butt @ 2024-05-31 22:51 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Kefeng Wang, Andrew Morton, Johannes Weiner, Michal Hocko,
	Roman Gushchin, Muchun Song, linux-mm, cgroups, Uladzislau Rezki,
	Christoph Hellwig, Lorenzo Stoakes

On Thu, May 23, 2024 at 05:34:27PM GMT, Matthew Wilcox wrote:
> On Thu, May 23, 2024 at 08:41:25AM -0700, Shakeel Butt wrote:
> > On Thu, May 23, 2024 at 02:31:05PM +0100, Matthew Wilcox wrote:
> > > On Tue, May 21, 2024 at 12:29:39PM -0700, Shakeel Butt wrote:
> > > > On Tue, May 21, 2024 at 03:44:21PM +0100, Matthew Wilcox wrote:
> > > > > The memcg should not be attached to the individual pages that make up a
> > > > > vmalloc allocation.  Rather, it should be managed by the vmalloc
> > > > > allocation itself.  I don't have the knowledge to poke around inside
> > > > > vmalloc right now, but maybe somebody else could take that on.
> > > > 
> > > > Are you concerned about accessing just memcg or any field of the
> > > > sub-page? There are drivers accessing fields of pages allocated through
> > > > vmalloc. Some details at 3b8000ae185c ("mm/vmalloc: huge vmalloc backing
> > > > pages should be split rather than compound").
> > > 
> > > Thanks for the pointer, and fb_deferred_io_fault() is already on my
> > > hitlist for abusing struct page.
> > > 
> > > My primary concern is that we should track the entire allocation as a
> > > single object rather than tracking each page individually.  That means
> > > assigning the vmalloc allocation to a memcg rather than assigning each
> > > page to a memcg.  It's a lot less overhead to increment the counter once
> > > per allocation rather than once per page in the allocation!
> > > 
> > > But secondarily, yes, pages allocated by vmalloc probably don't need
> > > any per-page state, other than tracking the vmalloc allocation they're
> > > assigned to.  We'll see how that theory turns out.
> > 
> > I think the tricky part would be vmalloc having pages spanning multiple
> > nodes which is not an issue for MEMCG_VMALLOC stat but the vmap based
> > kernel stack (CONFIG_VMAP_STACK) metric NR_KERNEL_STACK_KB cares about
> > that information.
> 
> Yes, we'll have to handle mod_lruvec_page_state() differently since that
> stat is tracked per node.  Or we could stop tracking that stat per node.
> Is it useful to track it per node?  Why is it useful to track kernel
> stacks per node, but not track vmalloc allocations per node?

This is a good question and other than that there are user visible APIs
(per numa meminfo & memory.numa_stat), I don't have a good answer.


^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2024-05-31 22:51 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-05-21 13:15 [PATCH] mm: memcontrol: remove page_memcg() Kefeng Wang
2024-05-21 13:30 ` Michal Hocko
2024-05-21 14:21 ` Matthew Wilcox
2024-05-23  9:43   ` Kefeng Wang
2024-05-21 14:44 ` Matthew Wilcox
2024-05-21 16:03   ` Michal Hocko
2024-05-21 19:29   ` Shakeel Butt
2024-05-23  8:57     ` Kefeng Wang
2024-05-23 13:31     ` Matthew Wilcox
2024-05-23 15:41       ` Shakeel Butt
2024-05-23 16:34         ` Matthew Wilcox
2024-05-31 22:51           ` Shakeel Butt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox