* [PATCH v4] memcg: add per-memcg vmalloc stat
@ 2022-01-04 22:23 Shakeel Butt
2022-01-05 8:43 ` Michal Hocko
0 siblings, 1 reply; 2+ messages in thread
From: Shakeel Butt @ 2022-01-04 22:23 UTC (permalink / raw)
To: Johannes Weiner, Michal Hocko, Roman Gushchin, Muchun Song
Cc: Andrew Morton, linux-mm, linux-kernel, Shakeel Butt
The kvmalloc* allocation functions can fallback to vmalloc allocations
and more often on long running machines. In addition the kernel does
have __GFP_ACCOUNT kvmalloc* calls. So, often on long running machines,
the memory.stat does not tell the complete picture which type of memory
is charged to the memcg. So add a per-memcg vmalloc stat.
Signed-off-by: Shakeel Butt <shakeelb@google.com>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Roman Gushchin <guro@fb.com>
---
Changes since v3:
- Removed area->page[0] checks and moved to page by page accounting as
suggested by Michal
Changes since v2:
- remove cast, per Muchun
- add area->page[0] check, per Michal
Changes since v1:
- page_memcg() within rcu lock as suggested by Muchun.
Documentation/admin-guide/cgroup-v2.rst | 3 +++
include/linux/memcontrol.h | 21 +++++++++++++++++++++
mm/memcontrol.c | 1 +
mm/vmalloc.c | 13 +++++++++++--
4 files changed, 36 insertions(+), 2 deletions(-)
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 82c8dc91b2be..5aa368d165da 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1314,6 +1314,9 @@ PAGE_SIZE multiple when read back.
sock (npn)
Amount of memory used in network transmission buffers
+ vmalloc (npn)
+ Amount of memory used for vmap backed memory.
+
shmem
Amount of cached filesystem data that is swap-backed,
such as tmpfs, shm segments, shared anonymous mmap()s
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d76dad703580..b72d75141e12 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -33,6 +33,7 @@ enum memcg_stat_item {
MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS,
MEMCG_SOCK,
MEMCG_PERCPU_B,
+ MEMCG_VMALLOC,
MEMCG_NR_STAT,
};
@@ -944,6 +945,21 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg,
local_irq_restore(flags);
}
+static inline void mod_memcg_page_state(struct page *page,
+ int idx, int val)
+{
+ struct mem_cgroup *memcg;
+
+ if (mem_cgroup_disabled())
+ return;
+
+ rcu_read_lock();
+ memcg = page_memcg(page);
+ if (memcg)
+ mod_memcg_state(memcg, idx, val);
+ rcu_read_unlock();
+}
+
static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
{
return READ_ONCE(memcg->vmstats.state[idx]);
@@ -1399,6 +1415,11 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg,
{
}
+static inline void mod_memcg_page_state(struct page *page,
+ int idx, int val)
+{
+}
+
static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
{
return 0;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 101b22a23096..56ed6b5dd328 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1375,6 +1375,7 @@ static const struct memory_stat memory_stats[] = {
{ "pagetables", NR_PAGETABLE },
{ "percpu", MEMCG_PERCPU_B },
{ "sock", MEMCG_SOCK },
+ { "vmalloc", MEMCG_VMALLOC },
{ "shmem", NR_SHMEM },
{ "file_mapped", NR_FILE_MAPPED },
{ "file_dirty", NR_FILE_DIRTY },
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index eb6e527a6b77..bdc7222f87d4 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -31,6 +31,7 @@
#include <linux/kmemleak.h>
#include <linux/atomic.h>
#include <linux/compiler.h>
+#include <linux/memcontrol.h>
#include <linux/llist.h>
#include <linux/bitops.h>
#include <linux/rbtree_augmented.h>
@@ -2624,12 +2625,13 @@ static void __vunmap(const void *addr, int deallocate_pages)
if (deallocate_pages) {
unsigned int page_order = vm_area_page_order(area);
- int i;
+ int i, step = 1U << page_order;
- for (i = 0; i < area->nr_pages; i += 1U << page_order) {
+ for (i = 0; i < area->nr_pages; i += step) {
struct page *page = area->pages[i];
BUG_ON(!page);
+ mod_memcg_page_state(page, MEMCG_VMALLOC, -step);
__free_pages(page, page_order);
cond_resched();
}
@@ -2964,6 +2966,13 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
page_order, nr_small_pages, area->pages);
atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
+ if (gfp_mask & __GFP_ACCOUNT) {
+ int i, step = 1U << page_order;
+
+ for (i = 0; i < area->nr_pages; i += step)
+ mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC,
+ step);
+ }
/*
* If not enough pages were obtained to accomplish an
--
2.34.1.448.ga2b2bfdf31-goog
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH v4] memcg: add per-memcg vmalloc stat
2022-01-04 22:23 [PATCH v4] memcg: add per-memcg vmalloc stat Shakeel Butt
@ 2022-01-05 8:43 ` Michal Hocko
0 siblings, 0 replies; 2+ messages in thread
From: Michal Hocko @ 2022-01-05 8:43 UTC (permalink / raw)
To: Shakeel Butt
Cc: Johannes Weiner, Roman Gushchin, Muchun Song, Andrew Morton,
linux-mm, linux-kernel
On Tue 04-01-22 14:23:41, Shakeel Butt wrote:
> The kvmalloc* allocation functions can fallback to vmalloc allocations
> and more often on long running machines. In addition the kernel does
> have __GFP_ACCOUNT kvmalloc* calls. So, often on long running machines,
> the memory.stat does not tell the complete picture which type of memory
> is charged to the memcg. So add a per-memcg vmalloc stat.
>
> Signed-off-by: Shakeel Butt <shakeelb@google.com>
> Reviewed-by: Muchun Song <songmuchun@bytedance.com>
> Acked-by: Roman Gushchin <guro@fb.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Thanks!
>
> ---
> Changes since v3:
> - Removed area->page[0] checks and moved to page by page accounting as
> suggested by Michal
>
> Changes since v2:
> - remove cast, per Muchun
> - add area->page[0] check, per Michal
>
> Changes since v1:
> - page_memcg() within rcu lock as suggested by Muchun.
>
> Documentation/admin-guide/cgroup-v2.rst | 3 +++
> include/linux/memcontrol.h | 21 +++++++++++++++++++++
> mm/memcontrol.c | 1 +
> mm/vmalloc.c | 13 +++++++++++--
> 4 files changed, 36 insertions(+), 2 deletions(-)
>
> diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
> index 82c8dc91b2be..5aa368d165da 100644
> --- a/Documentation/admin-guide/cgroup-v2.rst
> +++ b/Documentation/admin-guide/cgroup-v2.rst
> @@ -1314,6 +1314,9 @@ PAGE_SIZE multiple when read back.
> sock (npn)
> Amount of memory used in network transmission buffers
>
> + vmalloc (npn)
> + Amount of memory used for vmap backed memory.
> +
> shmem
> Amount of cached filesystem data that is swap-backed,
> such as tmpfs, shm segments, shared anonymous mmap()s
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index d76dad703580..b72d75141e12 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -33,6 +33,7 @@ enum memcg_stat_item {
> MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS,
> MEMCG_SOCK,
> MEMCG_PERCPU_B,
> + MEMCG_VMALLOC,
> MEMCG_NR_STAT,
> };
>
> @@ -944,6 +945,21 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg,
> local_irq_restore(flags);
> }
>
> +static inline void mod_memcg_page_state(struct page *page,
> + int idx, int val)
> +{
> + struct mem_cgroup *memcg;
> +
> + if (mem_cgroup_disabled())
> + return;
> +
> + rcu_read_lock();
> + memcg = page_memcg(page);
> + if (memcg)
> + mod_memcg_state(memcg, idx, val);
> + rcu_read_unlock();
> +}
> +
> static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
> {
> return READ_ONCE(memcg->vmstats.state[idx]);
> @@ -1399,6 +1415,11 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg,
> {
> }
>
> +static inline void mod_memcg_page_state(struct page *page,
> + int idx, int val)
> +{
> +}
> +
> static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
> {
> return 0;
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 101b22a23096..56ed6b5dd328 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1375,6 +1375,7 @@ static const struct memory_stat memory_stats[] = {
> { "pagetables", NR_PAGETABLE },
> { "percpu", MEMCG_PERCPU_B },
> { "sock", MEMCG_SOCK },
> + { "vmalloc", MEMCG_VMALLOC },
> { "shmem", NR_SHMEM },
> { "file_mapped", NR_FILE_MAPPED },
> { "file_dirty", NR_FILE_DIRTY },
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index eb6e527a6b77..bdc7222f87d4 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -31,6 +31,7 @@
> #include <linux/kmemleak.h>
> #include <linux/atomic.h>
> #include <linux/compiler.h>
> +#include <linux/memcontrol.h>
> #include <linux/llist.h>
> #include <linux/bitops.h>
> #include <linux/rbtree_augmented.h>
> @@ -2624,12 +2625,13 @@ static void __vunmap(const void *addr, int deallocate_pages)
>
> if (deallocate_pages) {
> unsigned int page_order = vm_area_page_order(area);
> - int i;
> + int i, step = 1U << page_order;
>
> - for (i = 0; i < area->nr_pages; i += 1U << page_order) {
> + for (i = 0; i < area->nr_pages; i += step) {
> struct page *page = area->pages[i];
>
> BUG_ON(!page);
> + mod_memcg_page_state(page, MEMCG_VMALLOC, -step);
> __free_pages(page, page_order);
> cond_resched();
> }
> @@ -2964,6 +2966,13 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
> page_order, nr_small_pages, area->pages);
>
> atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
> + if (gfp_mask & __GFP_ACCOUNT) {
> + int i, step = 1U << page_order;
> +
> + for (i = 0; i < area->nr_pages; i += step)
> + mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC,
> + step);
> + }
>
> /*
> * If not enough pages were obtained to accomplish an
> --
> 2.34.1.448.ga2b2bfdf31-goog
--
Michal Hocko
SUSE Labs
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2022-01-05 8:43 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-04 22:23 [PATCH v4] memcg: add per-memcg vmalloc stat Shakeel Butt
2022-01-05 8:43 ` Michal Hocko
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox