* [PATCH] mm: fix vmalloc memcg accounting issue
@ 2024-08-30 11:39 Zhiguo Jiang
2024-08-30 11:58 ` Michal Hocko
0 siblings, 1 reply; 2+ messages in thread
From: Zhiguo Jiang @ 2024-08-30 11:39 UTC (permalink / raw)
To: Andrew Morton, Uladzislau Rezki, Christoph Hellwig, linux-mm,
linux-kernel
Cc: opensource.kernel, Zhiguo Jiang
The oepration of adding 1 for the MEMCG_VMALLOC count value has a
judgment "if (gfp_mask & __GFP_ACCOUNT)" in vmalloc(), but subtracting
1 does not have this judgment in vfree(), which leads to the non-aligned
count value operation. This patch fixes this issue.
Signed-off-by: Zhiguo Jiang <justinjiang@vivo.com>
---
include/linux/vmalloc.h | 1 +
mm/vmalloc.c | 13 ++++++++++---
2 files changed, 11 insertions(+), 3 deletions(-)
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index ad2ce7a6ab7a..0253feec371f
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -38,6 +38,7 @@ struct iov_iter; /* in uio.h */
#define VM_DEFER_KMEMLEAK 0
#endif
#define VM_SPARSE 0x00001000 /* sparse vm_area. not all pages are present. */
+#define VM_MEMCG_ACCOUNT 0x00002000 /* mark vm pages alloced with __GFP_ACCOUNT for memcg accounting. */
/* bits [20..32] reserved for arch specific ioremap internals */
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index d977c280b1c4..aff1bf7a8798
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -3125,6 +3125,12 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
size += PAGE_SIZE;
area->flags = flags;
+ /*
+ * Set memcg accounting flag in vm_struct, used for
+ * vfree() align vmalloc here.
+ */
+ if (gfp_mask & __GFP_ACCOUNT)
+ area->flags |= VM_MEMCG_ACCOUNT;
area->caller = caller;
va = alloc_vmap_area(size, align, start, end, node, gfp_mask, 0, area);
@@ -3367,7 +3373,9 @@ void vfree(const void *addr)
struct page *page = vm->pages[i];
BUG_ON(!page);
- mod_memcg_page_state(page, MEMCG_VMALLOC, -1);
+
+ if (vm->flags & VM_MEMCG_ACCOUNT)
+ mod_memcg_page_state(page, MEMCG_VMALLOC, -1);
/*
* High-order allocs for huge vmallocs are split, so
* can be freed as an array of order-0 allocations
@@ -3662,7 +3670,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
node, page_order, nr_small_pages, area->pages);
atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
- if (gfp_mask & __GFP_ACCOUNT) {
+ if (area->flags & VM_MEMCG_ACCOUNT) {
int i;
for (i = 0; i < area->nr_pages; i++)
@@ -3813,7 +3821,6 @@ void *__vmalloc_node_range_noprof(unsigned long size, unsigned long align,
}
goto fail;
}
-
/*
* Prepare arguments for __vmalloc_area_node() and
* kasan_unpoison_vmalloc().
--
2.39.0
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH] mm: fix vmalloc memcg accounting issue
2024-08-30 11:39 [PATCH] mm: fix vmalloc memcg accounting issue Zhiguo Jiang
@ 2024-08-30 11:58 ` Michal Hocko
0 siblings, 0 replies; 2+ messages in thread
From: Michal Hocko @ 2024-08-30 11:58 UTC (permalink / raw)
To: Zhiguo Jiang
Cc: Andrew Morton, Uladzislau Rezki, Christoph Hellwig, linux-mm,
linux-kernel, opensource.kernel
On Fri 30-08-24 19:39:56, Zhiguo Jiang wrote:
> The oepration of adding 1 for the MEMCG_VMALLOC count value has a
> judgment "if (gfp_mask & __GFP_ACCOUNT)" in vmalloc(), but subtracting
> 1 does not have this judgment in vfree(), which leads to the non-aligned
> count value operation. This patch fixes this issue.
Are you really observing this or have you just concluded this from
reading the code?
AFAIR mod_memcg_page_state will not account if page doesn't have memcg
associated with it which means it is not charged.
> Signed-off-by: Zhiguo Jiang <justinjiang@vivo.com>
> ---
> include/linux/vmalloc.h | 1 +
> mm/vmalloc.c | 13 ++++++++++---
> 2 files changed, 11 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
> index ad2ce7a6ab7a..0253feec371f
> --- a/include/linux/vmalloc.h
> +++ b/include/linux/vmalloc.h
> @@ -38,6 +38,7 @@ struct iov_iter; /* in uio.h */
> #define VM_DEFER_KMEMLEAK 0
> #endif
> #define VM_SPARSE 0x00001000 /* sparse vm_area. not all pages are present. */
> +#define VM_MEMCG_ACCOUNT 0x00002000 /* mark vm pages alloced with __GFP_ACCOUNT for memcg accounting. */
>
> /* bits [20..32] reserved for arch specific ioremap internals */
>
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index d977c280b1c4..aff1bf7a8798
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -3125,6 +3125,12 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
> size += PAGE_SIZE;
>
> area->flags = flags;
> + /*
> + * Set memcg accounting flag in vm_struct, used for
> + * vfree() align vmalloc here.
> + */
> + if (gfp_mask & __GFP_ACCOUNT)
> + area->flags |= VM_MEMCG_ACCOUNT;
> area->caller = caller;
>
> va = alloc_vmap_area(size, align, start, end, node, gfp_mask, 0, area);
> @@ -3367,7 +3373,9 @@ void vfree(const void *addr)
> struct page *page = vm->pages[i];
>
> BUG_ON(!page);
> - mod_memcg_page_state(page, MEMCG_VMALLOC, -1);
> +
> + if (vm->flags & VM_MEMCG_ACCOUNT)
> + mod_memcg_page_state(page, MEMCG_VMALLOC, -1);
> /*
> * High-order allocs for huge vmallocs are split, so
> * can be freed as an array of order-0 allocations
> @@ -3662,7 +3670,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
> node, page_order, nr_small_pages, area->pages);
>
> atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
> - if (gfp_mask & __GFP_ACCOUNT) {
> + if (area->flags & VM_MEMCG_ACCOUNT) {
> int i;
>
> for (i = 0; i < area->nr_pages; i++)
> @@ -3813,7 +3821,6 @@ void *__vmalloc_node_range_noprof(unsigned long size, unsigned long align,
> }
> goto fail;
> }
> -
> /*
> * Prepare arguments for __vmalloc_area_node() and
> * kasan_unpoison_vmalloc().
> --
> 2.39.0
>
--
Michal Hocko
SUSE Labs
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2024-08-30 11:58 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-08-30 11:39 [PATCH] mm: fix vmalloc memcg accounting issue Zhiguo Jiang
2024-08-30 11:58 ` Michal Hocko
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox