linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 02/10] mm: shrinker: Add a .to_text() method for shrinkers
       [not found] <20240824180454.3160385-1-kent.overstreet@linux.dev>
@ 2024-08-24 18:04 ` Kent Overstreet
  2024-08-24 18:04 ` [PATCH 03/10] mm: shrinker: Add new stats for .to_text() Kent Overstreet
  2024-08-24 18:04 ` [PATCH 04/10] mm: Centralize & improve oom reporting in show_mem.c Kent Overstreet
  2 siblings, 0 replies; 9+ messages in thread
From: Kent Overstreet @ 2024-08-24 18:04 UTC (permalink / raw)
  To: david, linux-fsdevel, linux-mm
  Cc: Kent Overstreet, Andrew Morton, Qi Zheng, Roman Gushchin, linux-mm

This adds a new callback method to shrinkers which they can use to
describe anything relevant to memory reclaim about their internal state,
for example object dirtyness.

This patch also adds shrinkers_to_text(), which reports on the top 10
shrinkers - by object count - in sorted order, to be used in OOM
reporting.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: linux-mm@kvack.org
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 include/linux/shrinker.h |  7 +++-
 mm/shrinker.c            | 73 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 78 insertions(+), 2 deletions(-)

diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 1a00be90d93a..6193612617a1 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -24,6 +24,8 @@ struct shrinker_info {
 	struct shrinker_info_unit *unit[];
 };
 
+struct seq_buf;
+
 /*
  * This struct is used to pass information from page reclaim to the shrinkers.
  * We consolidate the values for easier extension later.
@@ -80,10 +82,12 @@ struct shrink_control {
  * @flags determine the shrinker abilities, like numa awareness
  */
 struct shrinker {
+	const char *name;
 	unsigned long (*count_objects)(struct shrinker *,
 				       struct shrink_control *sc);
 	unsigned long (*scan_objects)(struct shrinker *,
 				      struct shrink_control *sc);
+	void (*to_text)(struct seq_buf *, struct shrinker *);
 
 	long batch;	/* reclaim batch size, 0 = default */
 	int seeks;	/* seeks to recreate an obj */
@@ -110,7 +114,6 @@ struct shrinker {
 #endif
 #ifdef CONFIG_SHRINKER_DEBUG
 	int debugfs_id;
-	const char *name;
 	struct dentry *debugfs_entry;
 #endif
 	/* objs pending delete, per node */
@@ -135,6 +138,8 @@ __printf(2, 3)
 struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...);
 void shrinker_register(struct shrinker *shrinker);
 void shrinker_free(struct shrinker *shrinker);
+void shrinker_to_text(struct seq_buf *, struct shrinker *);
+void shrinkers_to_text(struct seq_buf *);
 
 static inline bool shrinker_try_get(struct shrinker *shrinker)
 {
diff --git a/mm/shrinker.c b/mm/shrinker.c
index dc5d2a6fcfc4..ad52c269bb48 100644
--- a/mm/shrinker.c
+++ b/mm/shrinker.c
@@ -1,8 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/memcontrol.h>
+#include <linux/rculist.h>
 #include <linux/rwsem.h>
+#include <linux/seq_buf.h>
 #include <linux/shrinker.h>
-#include <linux/rculist.h>
 #include <trace/events/vmscan.h>
 
 #include "internal.h"
@@ -807,3 +808,73 @@ void shrinker_free(struct shrinker *shrinker)
 	call_rcu(&shrinker->rcu, shrinker_free_rcu_cb);
 }
 EXPORT_SYMBOL_GPL(shrinker_free);
+
+void shrinker_to_text(struct seq_buf *out, struct shrinker *shrinker)
+{
+	struct shrink_control sc = { .gfp_mask = GFP_KERNEL, };
+
+	seq_buf_puts(out, shrinker->name);
+	seq_buf_printf(out, " objects: %lu\n", shrinker->count_objects(shrinker, &sc));
+
+	if (shrinker->to_text) {
+		shrinker->to_text(out, shrinker);
+		seq_buf_puts(out, "\n");
+	}
+}
+
+/**
+ * shrinkers_to_text - Report on shrinkers with highest usage
+ *
+ * This reports on the top 10 shrinkers, by object counts, in sorted order:
+ * intended to be used for OOM reporting.
+ */
+void shrinkers_to_text(struct seq_buf *out)
+{
+	struct shrinker *shrinker;
+	struct shrinker_by_mem {
+		struct shrinker	*shrinker;
+		unsigned long	mem;
+	} shrinkers_by_mem[10];
+	int i, nr = 0;
+
+	if (!mutex_trylock(&shrinker_mutex)) {
+		seq_buf_puts(out, "(couldn't take shrinker lock)");
+		return;
+	}
+
+	list_for_each_entry(shrinker, &shrinker_list, list) {
+		struct shrink_control sc = { .gfp_mask = GFP_KERNEL, };
+		unsigned long mem = shrinker->count_objects(shrinker, &sc);
+
+		if (!mem || mem == SHRINK_STOP || mem == SHRINK_EMPTY)
+			continue;
+
+		for (i = 0; i < nr; i++)
+			if (mem < shrinkers_by_mem[i].mem)
+				break;
+
+		if (nr < ARRAY_SIZE(shrinkers_by_mem)) {
+			memmove(&shrinkers_by_mem[i + 1],
+				&shrinkers_by_mem[i],
+				sizeof(shrinkers_by_mem[0]) * (nr - i));
+			nr++;
+		} else if (i) {
+			i--;
+			memmove(&shrinkers_by_mem[0],
+				&shrinkers_by_mem[1],
+				sizeof(shrinkers_by_mem[0]) * i);
+		} else {
+			continue;
+		}
+
+		shrinkers_by_mem[i] = (struct shrinker_by_mem) {
+			.shrinker = shrinker,
+			.mem = mem,
+		};
+	}
+
+	for (i = nr - 1; i >= 0; --i)
+		shrinker_to_text(out, shrinkers_by_mem[i].shrinker);
+
+	mutex_unlock(&shrinker_mutex);
+}
-- 
2.45.2



^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 03/10] mm: shrinker: Add new stats for .to_text()
       [not found] <20240824180454.3160385-1-kent.overstreet@linux.dev>
  2024-08-24 18:04 ` [PATCH 02/10] mm: shrinker: Add a .to_text() method for shrinkers Kent Overstreet
@ 2024-08-24 18:04 ` Kent Overstreet
  2024-08-24 18:04 ` [PATCH 04/10] mm: Centralize & improve oom reporting in show_mem.c Kent Overstreet
  2 siblings, 0 replies; 9+ messages in thread
From: Kent Overstreet @ 2024-08-24 18:04 UTC (permalink / raw)
  To: david, linux-fsdevel, linux-mm
  Cc: Kent Overstreet, Andrew Morton, Qi Zheng, Roman Gushchin, linux-mm

Add a few new shrinker stats.

number of objects requested to free, number of objects freed:

Shrinkers won't necessarily free all objects requested for a variety of
reasons, but if the two counts are wildly different something is likely
amiss.

.scan_objects runtime:

If one shrinker is taking an excessive amount of time to free
objects that will block kswapd from running other shrinkers.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: linux-mm@kvack.org
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 include/linux/shrinker.h |  6 ++++++
 mm/shrinker.c            | 23 ++++++++++++++++++++++-
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 6193612617a1..106622ddac77 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -118,6 +118,12 @@ struct shrinker {
 #endif
 	/* objs pending delete, per node */
 	atomic_long_t *nr_deferred;
+
+	atomic_long_t	objects_requested_to_free;
+	atomic_long_t	objects_freed;
+	unsigned long	last_freed;	/* timestamp, in jiffies */
+	unsigned long	last_scanned;	/* timestamp, in jiffies */
+	atomic64_t	ns_run;
 };
 #define DEFAULT_SEEKS 2 /* A good number if you don't know better. */
 
diff --git a/mm/shrinker.c b/mm/shrinker.c
index ad52c269bb48..feaa8122afc9 100644
--- a/mm/shrinker.c
+++ b/mm/shrinker.c
@@ -430,13 +430,24 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
 	       total_scan >= freeable) {
 		unsigned long ret;
 		unsigned long nr_to_scan = min(batch_size, total_scan);
+		u64 start_time = ktime_get_ns();
+
+		atomic_long_add(nr_to_scan, &shrinker->objects_requested_to_free);
 
 		shrinkctl->nr_to_scan = nr_to_scan;
 		shrinkctl->nr_scanned = nr_to_scan;
 		ret = shrinker->scan_objects(shrinker, shrinkctl);
+
+		atomic64_add(ktime_get_ns() - start_time, &shrinker->ns_run);
 		if (ret == SHRINK_STOP)
 			break;
 		freed += ret;
+		unsigned long now = jiffies;
+		if (ret) {
+			atomic_long_add(ret, &shrinker->objects_freed);
+			shrinker->last_freed = now;
+		}
+		shrinker->last_scanned = now;
 
 		count_vm_events(SLABS_SCANNED, shrinkctl->nr_scanned);
 		total_scan -= shrinkctl->nr_scanned;
@@ -812,9 +823,19 @@ EXPORT_SYMBOL_GPL(shrinker_free);
 void shrinker_to_text(struct seq_buf *out, struct shrinker *shrinker)
 {
 	struct shrink_control sc = { .gfp_mask = GFP_KERNEL, };
+	unsigned long nr_freed = atomic_long_read(&shrinker->objects_freed);
 
 	seq_buf_puts(out, shrinker->name);
-	seq_buf_printf(out, " objects: %lu\n", shrinker->count_objects(shrinker, &sc));
+	seq_buf_putc(out, '\n');
+
+	seq_buf_printf(out, "objects:             %lu\n", shrinker->count_objects(shrinker, &sc));
+	seq_buf_printf(out, "requested to free:   %lu\n", atomic_long_read(&shrinker->objects_requested_to_free));
+	seq_buf_printf(out, "objects freed:       %lu\n", nr_freed);
+	seq_buf_printf(out, "last scanned:        %li sec ago\n", (jiffies - shrinker->last_scanned) / HZ);
+	seq_buf_printf(out, "last freed:          %li sec ago\n", (jiffies - shrinker->last_freed) / HZ);
+	seq_buf_printf(out, "ns per object freed: %llu\n", nr_freed
+		       ? div64_ul(atomic64_read(&shrinker->ns_run), nr_freed)
+		       : 0);
 
 	if (shrinker->to_text) {
 		shrinker->to_text(out, shrinker);
-- 
2.45.2



^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 04/10] mm: Centralize & improve oom reporting in show_mem.c
       [not found] <20240824180454.3160385-1-kent.overstreet@linux.dev>
  2024-08-24 18:04 ` [PATCH 02/10] mm: shrinker: Add a .to_text() method for shrinkers Kent Overstreet
  2024-08-24 18:04 ` [PATCH 03/10] mm: shrinker: Add new stats for .to_text() Kent Overstreet
@ 2024-08-24 18:04 ` Kent Overstreet
  2 siblings, 0 replies; 9+ messages in thread
From: Kent Overstreet @ 2024-08-24 18:04 UTC (permalink / raw)
  To: david, linux-fsdevel, linux-mm
  Cc: Kent Overstreet, Andrew Morton, Qi Zheng, Roman Gushchin, linux-mm

This patch:
 - Changes show_mem() to always report on slab usage
 - Instead of reporting on all slabs, we only report on top 10 slabs,
   and in sorted order
 - Also reports on shrinkers, with the new shrinkers_to_text().
   Shrinkers need to be included in OOM/allocation failure reporting
   because they're responsible for memory reclaim - if a shrinker isn't
   giving up its memory, we need to know which one and why.

More OOM reporting can be moved to show_mem.c and improved, this patch
is only a start.

New example output on OOM/memory allocation failure:

00177 Mem-Info:
00177 active_anon:13706 inactive_anon:32266 isolated_anon:16
00177  active_file:1653 inactive_file:1822 isolated_file:0
00177  unevictable:0 dirty:0 writeback:0
00177  slab_reclaimable:6242 slab_unreclaimable:11168
00177  mapped:3824 shmem:3 pagetables:1266 bounce:0
00177  kernel_misc_reclaimable:0
00177  free:4362 free_pcp:35 free_cma:0
00177 Node 0 active_anon:54824kB inactive_anon:129064kB active_file:6612kB inactive_file:7288kB unevictable:0kB isolated(anon):64kB isolated(file):0kB mapped:15296kB dirty:0kB writeback:0kB shmem:12kB writeback_tmp:0kB kernel_stack:3392kB pagetables:5064kB all_unreclaimable? no
00177 DMA free:2232kB boost:0kB min:88kB low:108kB high:128kB reserved_highatomic:0KB active_anon:2924kB inactive_anon:6596kB active_file:428kB inactive_file:384kB unevictable:0kB writepending:0kB present:15992kB managed:15360kB mlocked:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB
00177 lowmem_reserve[]: 0 426 426 426
00177 DMA32 free:15092kB boost:5836kB min:8432kB low:9080kB high:9728kB reserved_highatomic:0KB active_anon:52196kB inactive_anon:122392kB active_file:6176kB inactive_file:7068kB unevictable:0kB writepending:0kB present:507760kB managed:441816kB mlocked:0kB bounce:0kB free_pcp:72kB local_pcp:0kB free_cma:0kB
00177 lowmem_reserve[]: 0 0 0 0
00177 DMA: 284*4kB (UM) 53*8kB (UM) 21*16kB (U) 11*32kB (U) 0*64kB 0*128kB 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB = 2248kB
00177 DMA32: 2765*4kB (UME) 375*8kB (UME) 57*16kB (UM) 5*32kB (U) 0*64kB 0*128kB 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB = 15132kB
00177 4656 total pagecache pages
00177 1031 pages in swap cache
00177 Swap cache stats: add 6572399, delete 6572173, find 488603/3286476
00177 Free swap  = 509112kB
00177 Total swap = 2097148kB
00177 130938 pages RAM
00177 0 pages HighMem/MovableOnly
00177 16644 pages reserved
00177 Unreclaimable slab info:
00177 9p-fcall-cache    total: 8.25 MiB active: 8.25 MiB
00177 kernfs_node_cache total: 2.15 MiB active: 2.15 MiB
00177 kmalloc-64        total: 2.08 MiB active: 2.07 MiB
00177 task_struct       total: 1.95 MiB active: 1.95 MiB
00177 kmalloc-4k        total: 1.50 MiB active: 1.50 MiB
00177 signal_cache      total: 1.34 MiB active: 1.34 MiB
00177 kmalloc-2k        total: 1.16 MiB active: 1.16 MiB
00177 bch_inode_info    total: 1.02 MiB active: 922 KiB
00177 perf_event        total: 1.02 MiB active: 1.02 MiB
00177 biovec-max        total: 992 KiB active: 960 KiB
00177 Shrinkers:
00177 super_cache_scan: objects: 127
00177 super_cache_scan: objects: 106
00177 jbd2_journal_shrink_scan: objects: 32
00177 ext4_es_scan: objects: 32
00177 bch2_btree_cache_scan: objects: 8
00177   nr nodes:          24
00177   nr dirty:          0
00177   cannibalize lock:  0000000000000000
00177
00177 super_cache_scan: objects: 8
00177 super_cache_scan: objects: 1

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: linux-mm@kvack.org
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 mm/oom_kill.c    | 23 ---------------------
 mm/show_mem.c    | 43 +++++++++++++++++++++++++++++++++++++++
 mm/slab.h        |  6 ++++--
 mm/slab_common.c | 52 +++++++++++++++++++++++++++++++++++++++---------
 4 files changed, 90 insertions(+), 34 deletions(-)

diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 4d7a0004df2c..dc56239ff057 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -169,27 +169,6 @@ static bool oom_unkillable_task(struct task_struct *p)
 	return false;
 }
 
-/*
- * Check whether unreclaimable slab amount is greater than
- * all user memory(LRU pages).
- * dump_unreclaimable_slab() could help in the case that
- * oom due to too much unreclaimable slab used by kernel.
-*/
-static bool should_dump_unreclaim_slab(void)
-{
-	unsigned long nr_lru;
-
-	nr_lru = global_node_page_state(NR_ACTIVE_ANON) +
-		 global_node_page_state(NR_INACTIVE_ANON) +
-		 global_node_page_state(NR_ACTIVE_FILE) +
-		 global_node_page_state(NR_INACTIVE_FILE) +
-		 global_node_page_state(NR_ISOLATED_ANON) +
-		 global_node_page_state(NR_ISOLATED_FILE) +
-		 global_node_page_state(NR_UNEVICTABLE);
-
-	return (global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B) > nr_lru);
-}
-
 /**
  * oom_badness - heuristic function to determine which candidate task to kill
  * @p: task struct of which task we should calculate
@@ -464,8 +443,6 @@ static void dump_header(struct oom_control *oc)
 		mem_cgroup_print_oom_meminfo(oc->memcg);
 	else {
 		__show_mem(SHOW_MEM_FILTER_NODES, oc->nodemask, gfp_zone(oc->gfp_mask));
-		if (should_dump_unreclaim_slab())
-			dump_unreclaimable_slab();
 	}
 	if (sysctl_oom_dump_tasks)
 		dump_tasks(oc);
diff --git a/mm/show_mem.c b/mm/show_mem.c
index bdb439551eef..a8ea4c41ced5 100644
--- a/mm/show_mem.c
+++ b/mm/show_mem.c
@@ -7,15 +7,18 @@
 
 #include <linux/blkdev.h>
 #include <linux/cma.h>
+#include <linux/console.h>
 #include <linux/cpuset.h>
 #include <linux/highmem.h>
 #include <linux/hugetlb.h>
 #include <linux/mm.h>
 #include <linux/mmzone.h>
+#include <linux/seq_buf.h>
 #include <linux/swap.h>
 #include <linux/vmstat.h>
 
 #include "internal.h"
+#include "slab.h"
 #include "swap.h"
 
 atomic_long_t _totalram_pages __read_mostly;
@@ -397,10 +400,31 @@ static void show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_z
 	show_swap_cache_info();
 }
 
+static void print_string_as_lines(const char *prefix, const char *lines)
+{
+	if (!lines) {
+		printk("%s (null)\n", prefix);
+		return;
+	}
+
+	bool locked = console_trylock();
+
+	while (1) {
+		const char *p = strchrnul(lines, '\n');
+		printk("%s%.*s\n", prefix, (int) (p - lines), lines);
+		if (!*p)
+			break;
+		lines = p + 1;
+	}
+	if (locked)
+		console_unlock();
+}
+
 void __show_mem(unsigned int filter, nodemask_t *nodemask, int max_zone_idx)
 {
 	unsigned long total = 0, reserved = 0, highmem = 0;
 	struct zone *zone;
+	char *buf;
 
 	printk("Mem-Info:\n");
 	show_free_areas(filter, nodemask, max_zone_idx);
@@ -449,4 +473,23 @@ void __show_mem(unsigned int filter, nodemask_t *nodemask, int max_zone_idx)
 		}
 	}
 #endif
+
+	const unsigned buf_size = 8192;
+	buf = kmalloc(buf_size, GFP_ATOMIC);
+	if (buf) {
+		struct seq_buf s;
+
+		printk("Unreclaimable slab info:\n");
+		seq_buf_init(&s, buf, buf_size);
+		dump_unreclaimable_slab(&s);
+		print_string_as_lines(KERN_NOTICE, seq_buf_str(&s));
+
+		printk("Shrinkers:\n");
+		seq_buf_init(&s, buf, buf_size);
+		shrinkers_to_text(&s);
+		print_string_as_lines(KERN_NOTICE, seq_buf_str(&s));
+		/* previous output doesn't get flushed without this - why? */
+
+		kfree(buf);
+	}
 }
diff --git a/mm/slab.h b/mm/slab.h
index dcdb56b8e7f5..b523b3e3d9d3 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -611,10 +611,12 @@ static inline size_t slab_ksize(const struct kmem_cache *s)
 	return s->size;
 }
 
+struct seq_buf;
+
 #ifdef CONFIG_SLUB_DEBUG
-void dump_unreclaimable_slab(void);
+void dump_unreclaimable_slab(struct seq_buf *);
 #else
-static inline void dump_unreclaimable_slab(void)
+static inline void dump_unreclaimable_slab(struct seq_buf *out)
 {
 }
 #endif
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 40b582a014b8..bd50a57161cf 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -27,6 +27,7 @@
 #include <asm/tlbflush.h>
 #include <asm/page.h>
 #include <linux/memcontrol.h>
+#include <linux/seq_buf.h>
 #include <linux/stackdepot.h>
 
 #include "internal.h"
@@ -1181,10 +1182,15 @@ static int slab_show(struct seq_file *m, void *p)
 	return 0;
 }
 
-void dump_unreclaimable_slab(void)
+void dump_unreclaimable_slab(struct seq_buf *out)
 {
 	struct kmem_cache *s;
 	struct slabinfo sinfo;
+	struct slab_by_mem {
+		struct kmem_cache *s;
+		size_t total, active;
+	} slabs_by_mem[10], n;
+	int i, nr = 0;
 
 	/*
 	 * Here acquiring slab_mutex is risky since we don't prefer to get
@@ -1194,24 +1200,52 @@ void dump_unreclaimable_slab(void)
 	 * without acquiring the mutex.
 	 */
 	if (!mutex_trylock(&slab_mutex)) {
-		pr_warn("excessive unreclaimable slab but cannot dump stats\n");
+		seq_buf_puts(out, "excessive unreclaimable slab but cannot dump stats\n");
 		return;
 	}
 
-	pr_info("Unreclaimable slab info:\n");
-	pr_info("Name                      Used          Total\n");
-
 	list_for_each_entry(s, &slab_caches, list) {
 		if (s->flags & SLAB_RECLAIM_ACCOUNT)
 			continue;
 
 		get_slabinfo(s, &sinfo);
 
-		if (sinfo.num_objs > 0)
-			pr_info("%-17s %10luKB %10luKB\n", s->name,
-				(sinfo.active_objs * s->size) / 1024,
-				(sinfo.num_objs * s->size) / 1024);
+		if (!sinfo.num_objs)
+			continue;
+
+		n.s = s;
+		n.total = sinfo.num_objs * s->size;
+		n.active = sinfo.active_objs * s->size;
+
+		for (i = 0; i < nr; i++)
+			if (n.total < slabs_by_mem[i].total)
+				break;
+
+		if (nr < ARRAY_SIZE(slabs_by_mem)) {
+			memmove(&slabs_by_mem[i + 1],
+				&slabs_by_mem[i],
+				sizeof(slabs_by_mem[0]) * (nr - i));
+			nr++;
+		} else if (i) {
+			i--;
+			memmove(&slabs_by_mem[0],
+				&slabs_by_mem[1],
+				sizeof(slabs_by_mem[0]) * i);
+		} else {
+			continue;
+		}
+
+		slabs_by_mem[i] = n;
 	}
+
+	for (i = nr - 1; i >= 0; --i) {
+		seq_buf_printf(out, "%-17s total: ", slabs_by_mem[i].s->name);
+		seq_buf_human_readable_u64(out, slabs_by_mem[i].total, STRING_UNITS_2);
+		seq_buf_printf(out, " active: ");
+		seq_buf_human_readable_u64(out, slabs_by_mem[i].active, STRING_UNITS_2);
+		seq_buf_putc(out, '\n');
+	}
+
 	mutex_unlock(&slab_mutex);
 }
 
-- 
2.45.2



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 02/10] mm: shrinker: Add a .to_text() method for shrinkers
  2024-08-24 19:10 ` [PATCH 02/10] mm: shrinker: Add a .to_text() method for shrinkers Kent Overstreet
  2024-08-26  3:01   ` Qi Zheng
@ 2024-08-28  3:32   ` Dave Chinner
  1 sibling, 0 replies; 9+ messages in thread
From: Dave Chinner @ 2024-08-28  3:32 UTC (permalink / raw)
  To: Kent Overstreet
  Cc: linux-fsdevel, linux-mm, Andrew Morton, Qi Zheng, Roman Gushchin

On Sat, Aug 24, 2024 at 03:10:09PM -0400, Kent Overstreet wrote:
> This adds a new callback method to shrinkers which they can use to
> describe anything relevant to memory reclaim about their internal state,
> for example object dirtyness.
....

> +	if (!mutex_trylock(&shrinker_mutex)) {
> +		seq_buf_puts(out, "(couldn't take shrinker lock)");
> +		return;
> +	}

Please don't use the shrinker_mutex like this. There can be tens of
thousands of entries in the shrinker list (because memcgs) and
holding the shrinker_mutex for long running traversals like this is
known to cause latency problems for memcg reaping. If we are at
ENOMEM, the last thing we want to be doing is preventing memcgs from
being reaped.

> +	list_for_each_entry(shrinker, &shrinker_list, list) {
> +		struct shrink_control sc = { .gfp_mask = GFP_KERNEL, };

This iteration and counting setup is neither node or memcg aware.
For node aware shrinkers, this will only count the items freeable
on node 0, and ignore all the other memory in the system. For memcg
systems, it will also only scan the root memcg and so miss counting
any memory in memcg owned caches.

IOWs, the shrinker iteration mechanism needs to iterate both by NUMA
node and by memcg. On large machines with multiple nodes and hosting
thousands of memcgs, a total shrinker state iteration is has to walk
a -lot- of structures.

And example of this is drop_slab() - called from
/proc/sys/vm/drop_caches(). It does this to iterate all the
shrinkers for all the nodes and memcgs in the system:

static unsigned long drop_slab_node(int nid)
{
        unsigned long freed = 0;
        struct mem_cgroup *memcg = NULL;

        memcg = mem_cgroup_iter(NULL, NULL, NULL);
        do {
                freed += shrink_slab(GFP_KERNEL, nid, memcg, 0);
        } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);

        return freed;
}

void drop_slab(void)
{
        int nid;
        int shift = 0;
        unsigned long freed;

        do {
                freed = 0;
                for_each_online_node(nid) {
                        if (fatal_signal_pending(current))
                                return;

                        freed += drop_slab_node(nid);
                }
        } while ((freed >> shift++) > 1);
}

Hence any iteration for finding the 10 largest shrinkable caches in
the system needs to do something similar. Only, it needs to iterate
memcgs first and then aggregate object counts across all nodes for
shrinkers that are NUMA aware.

Because it needs direct access to the shrinkers, it will need to use
the RCU lock + refcount method of traversal because that's the only
safe way to go from memcg to shrinker instance. IOWs, it
needs to mirror the code in shrink_slab/shrink_slab_memcg to obtain
a safe reference to the relevant shrinker so it can call
->count_objects() and store a refcounted pointer to the shrinker(s)
that will get printed out after the scan is done....

Once the shrinker iteration is sorted out, I'll look further at the
rest of the code in this patch...

-Dave.
-- 
Dave Chinner
david@fromorbit.com


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 02/10] mm: shrinker: Add a .to_text() method for shrinkers
  2024-08-26  3:10       ` Qi Zheng
@ 2024-08-26  3:32         ` Kent Overstreet
  0 siblings, 0 replies; 9+ messages in thread
From: Kent Overstreet @ 2024-08-26  3:32 UTC (permalink / raw)
  To: Qi Zheng; +Cc: david, linux-fsdevel, linux-mm, Andrew Morton, Roman Gushchin

On Mon, Aug 26, 2024 at 11:10:08AM GMT, Qi Zheng wrote:
> The shrinker list may be very long, and can you guarantee that shrinkers
> other than bcachefs won't do anything blocking? ;)

It's not worth making this code more complicated.

If you'd written a clean simple iterator for RCU iteration it might have
been different.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 02/10] mm: shrinker: Add a .to_text() method for shrinkers
  2024-08-26  3:04     ` Kent Overstreet
@ 2024-08-26  3:10       ` Qi Zheng
  2024-08-26  3:32         ` Kent Overstreet
  0 siblings, 1 reply; 9+ messages in thread
From: Qi Zheng @ 2024-08-26  3:10 UTC (permalink / raw)
  To: Kent Overstreet
  Cc: david, linux-fsdevel, linux-mm, Andrew Morton, Roman Gushchin



On 2024/8/26 11:04, Kent Overstreet wrote:
> On Mon, Aug 26, 2024 at 11:01:50AM GMT, Qi Zheng wrote:
>>
>>
>> On 2024/8/25 03:10, Kent Overstreet wrote:
>>> This adds a new callback method to shrinkers which they can use to
>>> describe anything relevant to memory reclaim about their internal state,
>>> for example object dirtyness.
>>>
>>> This patch also adds shrinkers_to_text(), which reports on the top 10
>>> shrinkers - by object count - in sorted order, to be used in OOM
>>> reporting.
>>>
>>> Cc: Andrew Morton <akpm@linux-foundation.org>
>>> Cc: Qi Zheng <zhengqi.arch@bytedance.com>
>>> Cc: Roman Gushchin <roman.gushchin@linux.dev>
>>> Cc: linux-mm@kvack.org
>>> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
>>> ---
>>>    include/linux/shrinker.h |  7 +++-
>>>    mm/shrinker.c            | 73 +++++++++++++++++++++++++++++++++++++++-
>>>    2 files changed, 78 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
>>> index 1a00be90d93a..6193612617a1 100644
>>> --- a/include/linux/shrinker.h
>>> +++ b/include/linux/shrinker.h
>>> @@ -24,6 +24,8 @@ struct shrinker_info {
>>>    	struct shrinker_info_unit *unit[];
>>>    };
>>> +struct seq_buf;
>>> +
>>>    /*
>>>     * This struct is used to pass information from page reclaim to the shrinkers.
>>>     * We consolidate the values for easier extension later.
>>> @@ -80,10 +82,12 @@ struct shrink_control {
>>>     * @flags determine the shrinker abilities, like numa awareness
>>>     */
>>>    struct shrinker {
>>> +	const char *name;
>>>    	unsigned long (*count_objects)(struct shrinker *,
>>>    				       struct shrink_control *sc);
>>>    	unsigned long (*scan_objects)(struct shrinker *,
>>>    				      struct shrink_control *sc);
>>> +	void (*to_text)(struct seq_buf *, struct shrinker *);
>>>    	long batch;	/* reclaim batch size, 0 = default */
>>>    	int seeks;	/* seeks to recreate an obj */
>>> @@ -110,7 +114,6 @@ struct shrinker {
>>>    #endif
>>>    #ifdef CONFIG_SHRINKER_DEBUG
>>>    	int debugfs_id;
>>> -	const char *name;
>>>    	struct dentry *debugfs_entry;
>>>    #endif
>>>    	/* objs pending delete, per node */
>>> @@ -135,6 +138,8 @@ __printf(2, 3)
>>>    struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...);
>>>    void shrinker_register(struct shrinker *shrinker);
>>>    void shrinker_free(struct shrinker *shrinker);
>>> +void shrinker_to_text(struct seq_buf *, struct shrinker *);
>>> +void shrinkers_to_text(struct seq_buf *);
>>>    static inline bool shrinker_try_get(struct shrinker *shrinker)
>>>    {
>>> diff --git a/mm/shrinker.c b/mm/shrinker.c
>>> index dc5d2a6fcfc4..ad52c269bb48 100644
>>> --- a/mm/shrinker.c
>>> +++ b/mm/shrinker.c
>>> @@ -1,8 +1,9 @@
>>>    // SPDX-License-Identifier: GPL-2.0
>>>    #include <linux/memcontrol.h>
>>> +#include <linux/rculist.h>
>>>    #include <linux/rwsem.h>
>>> +#include <linux/seq_buf.h>
>>>    #include <linux/shrinker.h>
>>> -#include <linux/rculist.h>
>>>    #include <trace/events/vmscan.h>
>>>    #include "internal.h"
>>> @@ -807,3 +808,73 @@ void shrinker_free(struct shrinker *shrinker)
>>>    	call_rcu(&shrinker->rcu, shrinker_free_rcu_cb);
>>>    }
>>>    EXPORT_SYMBOL_GPL(shrinker_free);
>>> +
>>> +void shrinker_to_text(struct seq_buf *out, struct shrinker *shrinker)
>>> +{
>>> +	struct shrink_control sc = { .gfp_mask = GFP_KERNEL, };
>>> +
>>> +	seq_buf_puts(out, shrinker->name);
>>> +	seq_buf_printf(out, " objects: %lu\n", shrinker->count_objects(shrinker, &sc));
>>> +
>>> +	if (shrinker->to_text) {
>>> +		shrinker->to_text(out, shrinker);
>>> +		seq_buf_puts(out, "\n");
>>> +	}
>>> +}
>>> +
>>> +/**
>>> + * shrinkers_to_text - Report on shrinkers with highest usage
>>> + *
>>> + * This reports on the top 10 shrinkers, by object counts, in sorted order:
>>> + * intended to be used for OOM reporting.
>>> + */
>>> +void shrinkers_to_text(struct seq_buf *out)
>>> +{
>>> +	struct shrinker *shrinker;
>>> +	struct shrinker_by_mem {
>>> +		struct shrinker	*shrinker;
>>> +		unsigned long	mem;
>>> +	} shrinkers_by_mem[10];
>>> +	int i, nr = 0;
>>> +
>>> +	if (!mutex_trylock(&shrinker_mutex)) {
>>> +		seq_buf_puts(out, "(couldn't take shrinker lock)");
>>> +		return;
>>> +	}
>>
>> I remember I pointed out that the RCU + refcount method should be used
>> here. Otherwise you will block other shrinkers from
>> registering/unregistering, etc.
> 
> The more complex iteration isn't needed here - this is a slowpath
> function and we're not doing anything blocking inside it.

The shrinker list may be very long, and can you guarantee that shrinkers
other than bcachefs won't do anything blocking? ;)


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 02/10] mm: shrinker: Add a .to_text() method for shrinkers
  2024-08-26  3:01   ` Qi Zheng
@ 2024-08-26  3:04     ` Kent Overstreet
  2024-08-26  3:10       ` Qi Zheng
  0 siblings, 1 reply; 9+ messages in thread
From: Kent Overstreet @ 2024-08-26  3:04 UTC (permalink / raw)
  To: Qi Zheng; +Cc: david, linux-fsdevel, linux-mm, Andrew Morton, Roman Gushchin

On Mon, Aug 26, 2024 at 11:01:50AM GMT, Qi Zheng wrote:
> 
> 
> On 2024/8/25 03:10, Kent Overstreet wrote:
> > This adds a new callback method to shrinkers which they can use to
> > describe anything relevant to memory reclaim about their internal state,
> > for example object dirtyness.
> > 
> > This patch also adds shrinkers_to_text(), which reports on the top 10
> > shrinkers - by object count - in sorted order, to be used in OOM
> > reporting.
> > 
> > Cc: Andrew Morton <akpm@linux-foundation.org>
> > Cc: Qi Zheng <zhengqi.arch@bytedance.com>
> > Cc: Roman Gushchin <roman.gushchin@linux.dev>
> > Cc: linux-mm@kvack.org
> > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> > ---
> >   include/linux/shrinker.h |  7 +++-
> >   mm/shrinker.c            | 73 +++++++++++++++++++++++++++++++++++++++-
> >   2 files changed, 78 insertions(+), 2 deletions(-)
> > 
> > diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
> > index 1a00be90d93a..6193612617a1 100644
> > --- a/include/linux/shrinker.h
> > +++ b/include/linux/shrinker.h
> > @@ -24,6 +24,8 @@ struct shrinker_info {
> >   	struct shrinker_info_unit *unit[];
> >   };
> > +struct seq_buf;
> > +
> >   /*
> >    * This struct is used to pass information from page reclaim to the shrinkers.
> >    * We consolidate the values for easier extension later.
> > @@ -80,10 +82,12 @@ struct shrink_control {
> >    * @flags determine the shrinker abilities, like numa awareness
> >    */
> >   struct shrinker {
> > +	const char *name;
> >   	unsigned long (*count_objects)(struct shrinker *,
> >   				       struct shrink_control *sc);
> >   	unsigned long (*scan_objects)(struct shrinker *,
> >   				      struct shrink_control *sc);
> > +	void (*to_text)(struct seq_buf *, struct shrinker *);
> >   	long batch;	/* reclaim batch size, 0 = default */
> >   	int seeks;	/* seeks to recreate an obj */
> > @@ -110,7 +114,6 @@ struct shrinker {
> >   #endif
> >   #ifdef CONFIG_SHRINKER_DEBUG
> >   	int debugfs_id;
> > -	const char *name;
> >   	struct dentry *debugfs_entry;
> >   #endif
> >   	/* objs pending delete, per node */
> > @@ -135,6 +138,8 @@ __printf(2, 3)
> >   struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...);
> >   void shrinker_register(struct shrinker *shrinker);
> >   void shrinker_free(struct shrinker *shrinker);
> > +void shrinker_to_text(struct seq_buf *, struct shrinker *);
> > +void shrinkers_to_text(struct seq_buf *);
> >   static inline bool shrinker_try_get(struct shrinker *shrinker)
> >   {
> > diff --git a/mm/shrinker.c b/mm/shrinker.c
> > index dc5d2a6fcfc4..ad52c269bb48 100644
> > --- a/mm/shrinker.c
> > +++ b/mm/shrinker.c
> > @@ -1,8 +1,9 @@
> >   // SPDX-License-Identifier: GPL-2.0
> >   #include <linux/memcontrol.h>
> > +#include <linux/rculist.h>
> >   #include <linux/rwsem.h>
> > +#include <linux/seq_buf.h>
> >   #include <linux/shrinker.h>
> > -#include <linux/rculist.h>
> >   #include <trace/events/vmscan.h>
> >   #include "internal.h"
> > @@ -807,3 +808,73 @@ void shrinker_free(struct shrinker *shrinker)
> >   	call_rcu(&shrinker->rcu, shrinker_free_rcu_cb);
> >   }
> >   EXPORT_SYMBOL_GPL(shrinker_free);
> > +
> > +void shrinker_to_text(struct seq_buf *out, struct shrinker *shrinker)
> > +{
> > +	struct shrink_control sc = { .gfp_mask = GFP_KERNEL, };
> > +
> > +	seq_buf_puts(out, shrinker->name);
> > +	seq_buf_printf(out, " objects: %lu\n", shrinker->count_objects(shrinker, &sc));
> > +
> > +	if (shrinker->to_text) {
> > +		shrinker->to_text(out, shrinker);
> > +		seq_buf_puts(out, "\n");
> > +	}
> > +}
> > +
> > +/**
> > + * shrinkers_to_text - Report on shrinkers with highest usage
> > + *
> > + * This reports on the top 10 shrinkers, by object counts, in sorted order:
> > + * intended to be used for OOM reporting.
> > + */
> > +void shrinkers_to_text(struct seq_buf *out)
> > +{
> > +	struct shrinker *shrinker;
> > +	struct shrinker_by_mem {
> > +		struct shrinker	*shrinker;
> > +		unsigned long	mem;
> > +	} shrinkers_by_mem[10];
> > +	int i, nr = 0;
> > +
> > +	if (!mutex_trylock(&shrinker_mutex)) {
> > +		seq_buf_puts(out, "(couldn't take shrinker lock)");
> > +		return;
> > +	}
> 
> I remember I pointed out that the RCU + refcount method should be used
> here. Otherwise you will block other shrinkers from
> registering/unregistering, etc.

The more complex iteration isn't needed here - this is a slowpath
function and we're not doing anything blocking inside it.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 02/10] mm: shrinker: Add a .to_text() method for shrinkers
  2024-08-24 19:10 ` [PATCH 02/10] mm: shrinker: Add a .to_text() method for shrinkers Kent Overstreet
@ 2024-08-26  3:01   ` Qi Zheng
  2024-08-26  3:04     ` Kent Overstreet
  2024-08-28  3:32   ` Dave Chinner
  1 sibling, 1 reply; 9+ messages in thread
From: Qi Zheng @ 2024-08-26  3:01 UTC (permalink / raw)
  To: Kent Overstreet
  Cc: david, linux-fsdevel, linux-mm, Andrew Morton, Qi Zheng, Roman Gushchin



On 2024/8/25 03:10, Kent Overstreet wrote:
> This adds a new callback method to shrinkers which they can use to
> describe anything relevant to memory reclaim about their internal state,
> for example object dirtyness.
> 
> This patch also adds shrinkers_to_text(), which reports on the top 10
> shrinkers - by object count - in sorted order, to be used in OOM
> reporting.
> 
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Qi Zheng <zhengqi.arch@bytedance.com>
> Cc: Roman Gushchin <roman.gushchin@linux.dev>
> Cc: linux-mm@kvack.org
> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> ---
>   include/linux/shrinker.h |  7 +++-
>   mm/shrinker.c            | 73 +++++++++++++++++++++++++++++++++++++++-
>   2 files changed, 78 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
> index 1a00be90d93a..6193612617a1 100644
> --- a/include/linux/shrinker.h
> +++ b/include/linux/shrinker.h
> @@ -24,6 +24,8 @@ struct shrinker_info {
>   	struct shrinker_info_unit *unit[];
>   };
>   
> +struct seq_buf;
> +
>   /*
>    * This struct is used to pass information from page reclaim to the shrinkers.
>    * We consolidate the values for easier extension later.
> @@ -80,10 +82,12 @@ struct shrink_control {
>    * @flags determine the shrinker abilities, like numa awareness
>    */
>   struct shrinker {
> +	const char *name;
>   	unsigned long (*count_objects)(struct shrinker *,
>   				       struct shrink_control *sc);
>   	unsigned long (*scan_objects)(struct shrinker *,
>   				      struct shrink_control *sc);
> +	void (*to_text)(struct seq_buf *, struct shrinker *);
>   
>   	long batch;	/* reclaim batch size, 0 = default */
>   	int seeks;	/* seeks to recreate an obj */
> @@ -110,7 +114,6 @@ struct shrinker {
>   #endif
>   #ifdef CONFIG_SHRINKER_DEBUG
>   	int debugfs_id;
> -	const char *name;
>   	struct dentry *debugfs_entry;
>   #endif
>   	/* objs pending delete, per node */
> @@ -135,6 +138,8 @@ __printf(2, 3)
>   struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...);
>   void shrinker_register(struct shrinker *shrinker);
>   void shrinker_free(struct shrinker *shrinker);
> +void shrinker_to_text(struct seq_buf *, struct shrinker *);
> +void shrinkers_to_text(struct seq_buf *);
>   
>   static inline bool shrinker_try_get(struct shrinker *shrinker)
>   {
> diff --git a/mm/shrinker.c b/mm/shrinker.c
> index dc5d2a6fcfc4..ad52c269bb48 100644
> --- a/mm/shrinker.c
> +++ b/mm/shrinker.c
> @@ -1,8 +1,9 @@
>   // SPDX-License-Identifier: GPL-2.0
>   #include <linux/memcontrol.h>
> +#include <linux/rculist.h>
>   #include <linux/rwsem.h>
> +#include <linux/seq_buf.h>
>   #include <linux/shrinker.h>
> -#include <linux/rculist.h>
>   #include <trace/events/vmscan.h>
>   
>   #include "internal.h"
> @@ -807,3 +808,73 @@ void shrinker_free(struct shrinker *shrinker)
>   	call_rcu(&shrinker->rcu, shrinker_free_rcu_cb);
>   }
>   EXPORT_SYMBOL_GPL(shrinker_free);
> +
> +void shrinker_to_text(struct seq_buf *out, struct shrinker *shrinker)
> +{
> +	struct shrink_control sc = { .gfp_mask = GFP_KERNEL, };
> +
> +	seq_buf_puts(out, shrinker->name);
> +	seq_buf_printf(out, " objects: %lu\n", shrinker->count_objects(shrinker, &sc));
> +
> +	if (shrinker->to_text) {
> +		shrinker->to_text(out, shrinker);
> +		seq_buf_puts(out, "\n");
> +	}
> +}
> +
> +/**
> + * shrinkers_to_text - Report on shrinkers with highest usage
> + *
> + * This reports on the top 10 shrinkers, by object counts, in sorted order:
> + * intended to be used for OOM reporting.
> + */
> +void shrinkers_to_text(struct seq_buf *out)
> +{
> +	struct shrinker *shrinker;
> +	struct shrinker_by_mem {
> +		struct shrinker	*shrinker;
> +		unsigned long	mem;
> +	} shrinkers_by_mem[10];
> +	int i, nr = 0;
> +
> +	if (!mutex_trylock(&shrinker_mutex)) {
> +		seq_buf_puts(out, "(couldn't take shrinker lock)");
> +		return;
> +	}

I remember I pointed out that the RCU + refcount method should be used
here. Otherwise you will block other shrinkers from 
registering/unregistering, etc.

> +
> +	list_for_each_entry(shrinker, &shrinker_list, list) {
> +		struct shrink_control sc = { .gfp_mask = GFP_KERNEL, };
> +		unsigned long mem = shrinker->count_objects(shrinker, &sc);
> +
> +		if (!mem || mem == SHRINK_STOP || mem == SHRINK_EMPTY)
> +			continue;
> +
> +		for (i = 0; i < nr; i++)
> +			if (mem < shrinkers_by_mem[i].mem)
> +				break;
> +
> +		if (nr < ARRAY_SIZE(shrinkers_by_mem)) {
> +			memmove(&shrinkers_by_mem[i + 1],
> +				&shrinkers_by_mem[i],
> +				sizeof(shrinkers_by_mem[0]) * (nr - i));
> +			nr++;
> +		} else if (i) {
> +			i--;
> +			memmove(&shrinkers_by_mem[0],
> +				&shrinkers_by_mem[1],
> +				sizeof(shrinkers_by_mem[0]) * i);
> +		} else {
> +			continue;
> +		}
> +
> +		shrinkers_by_mem[i] = (struct shrinker_by_mem) {
> +			.shrinker = shrinker,
> +			.mem = mem,
> +		};
> +	}
> +
> +	for (i = nr - 1; i >= 0; --i)
> +		shrinker_to_text(out, shrinkers_by_mem[i].shrinker);
> +
> +	mutex_unlock(&shrinker_mutex);
> +}


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 02/10] mm: shrinker: Add a .to_text() method for shrinkers
  2024-08-24 19:10 [PATCH 00/10] shrinker debugging, .to_text() report (resend) Kent Overstreet
@ 2024-08-24 19:10 ` Kent Overstreet
  2024-08-26  3:01   ` Qi Zheng
  2024-08-28  3:32   ` Dave Chinner
  0 siblings, 2 replies; 9+ messages in thread
From: Kent Overstreet @ 2024-08-24 19:10 UTC (permalink / raw)
  To: david, linux-fsdevel, linux-mm
  Cc: Kent Overstreet, Andrew Morton, Qi Zheng, Roman Gushchin

This adds a new callback method to shrinkers which they can use to
describe anything relevant to memory reclaim about their internal state,
for example object dirtyness.

This patch also adds shrinkers_to_text(), which reports on the top 10
shrinkers - by object count - in sorted order, to be used in OOM
reporting.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: linux-mm@kvack.org
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 include/linux/shrinker.h |  7 +++-
 mm/shrinker.c            | 73 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 78 insertions(+), 2 deletions(-)

diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 1a00be90d93a..6193612617a1 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -24,6 +24,8 @@ struct shrinker_info {
 	struct shrinker_info_unit *unit[];
 };
 
+struct seq_buf;
+
 /*
  * This struct is used to pass information from page reclaim to the shrinkers.
  * We consolidate the values for easier extension later.
@@ -80,10 +82,12 @@ struct shrink_control {
  * @flags determine the shrinker abilities, like numa awareness
  */
 struct shrinker {
+	const char *name;
 	unsigned long (*count_objects)(struct shrinker *,
 				       struct shrink_control *sc);
 	unsigned long (*scan_objects)(struct shrinker *,
 				      struct shrink_control *sc);
+	void (*to_text)(struct seq_buf *, struct shrinker *);
 
 	long batch;	/* reclaim batch size, 0 = default */
 	int seeks;	/* seeks to recreate an obj */
@@ -110,7 +114,6 @@ struct shrinker {
 #endif
 #ifdef CONFIG_SHRINKER_DEBUG
 	int debugfs_id;
-	const char *name;
 	struct dentry *debugfs_entry;
 #endif
 	/* objs pending delete, per node */
@@ -135,6 +138,8 @@ __printf(2, 3)
 struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...);
 void shrinker_register(struct shrinker *shrinker);
 void shrinker_free(struct shrinker *shrinker);
+void shrinker_to_text(struct seq_buf *, struct shrinker *);
+void shrinkers_to_text(struct seq_buf *);
 
 static inline bool shrinker_try_get(struct shrinker *shrinker)
 {
diff --git a/mm/shrinker.c b/mm/shrinker.c
index dc5d2a6fcfc4..ad52c269bb48 100644
--- a/mm/shrinker.c
+++ b/mm/shrinker.c
@@ -1,8 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/memcontrol.h>
+#include <linux/rculist.h>
 #include <linux/rwsem.h>
+#include <linux/seq_buf.h>
 #include <linux/shrinker.h>
-#include <linux/rculist.h>
 #include <trace/events/vmscan.h>
 
 #include "internal.h"
@@ -807,3 +808,73 @@ void shrinker_free(struct shrinker *shrinker)
 	call_rcu(&shrinker->rcu, shrinker_free_rcu_cb);
 }
 EXPORT_SYMBOL_GPL(shrinker_free);
+
+void shrinker_to_text(struct seq_buf *out, struct shrinker *shrinker)
+{
+	struct shrink_control sc = { .gfp_mask = GFP_KERNEL, };
+
+	seq_buf_puts(out, shrinker->name);
+	seq_buf_printf(out, " objects: %lu\n", shrinker->count_objects(shrinker, &sc));
+
+	if (shrinker->to_text) {
+		shrinker->to_text(out, shrinker);
+		seq_buf_puts(out, "\n");
+	}
+}
+
+/**
+ * shrinkers_to_text - Report on shrinkers with highest usage
+ *
+ * This reports on the top 10 shrinkers, by object counts, in sorted order:
+ * intended to be used for OOM reporting.
+ */
+void shrinkers_to_text(struct seq_buf *out)
+{
+	struct shrinker *shrinker;
+	struct shrinker_by_mem {
+		struct shrinker	*shrinker;
+		unsigned long	mem;
+	} shrinkers_by_mem[10];
+	int i, nr = 0;
+
+	if (!mutex_trylock(&shrinker_mutex)) {
+		seq_buf_puts(out, "(couldn't take shrinker lock)");
+		return;
+	}
+
+	list_for_each_entry(shrinker, &shrinker_list, list) {
+		struct shrink_control sc = { .gfp_mask = GFP_KERNEL, };
+		unsigned long mem = shrinker->count_objects(shrinker, &sc);
+
+		if (!mem || mem == SHRINK_STOP || mem == SHRINK_EMPTY)
+			continue;
+
+		for (i = 0; i < nr; i++)
+			if (mem < shrinkers_by_mem[i].mem)
+				break;
+
+		if (nr < ARRAY_SIZE(shrinkers_by_mem)) {
+			memmove(&shrinkers_by_mem[i + 1],
+				&shrinkers_by_mem[i],
+				sizeof(shrinkers_by_mem[0]) * (nr - i));
+			nr++;
+		} else if (i) {
+			i--;
+			memmove(&shrinkers_by_mem[0],
+				&shrinkers_by_mem[1],
+				sizeof(shrinkers_by_mem[0]) * i);
+		} else {
+			continue;
+		}
+
+		shrinkers_by_mem[i] = (struct shrinker_by_mem) {
+			.shrinker = shrinker,
+			.mem = mem,
+		};
+	}
+
+	for (i = nr - 1; i >= 0; --i)
+		shrinker_to_text(out, shrinkers_by_mem[i].shrinker);
+
+	mutex_unlock(&shrinker_mutex);
+}
-- 
2.45.2



^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2024-08-28  3:32 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20240824180454.3160385-1-kent.overstreet@linux.dev>
2024-08-24 18:04 ` [PATCH 02/10] mm: shrinker: Add a .to_text() method for shrinkers Kent Overstreet
2024-08-24 18:04 ` [PATCH 03/10] mm: shrinker: Add new stats for .to_text() Kent Overstreet
2024-08-24 18:04 ` [PATCH 04/10] mm: Centralize & improve oom reporting in show_mem.c Kent Overstreet
2024-08-24 19:10 [PATCH 00/10] shrinker debugging, .to_text() report (resend) Kent Overstreet
2024-08-24 19:10 ` [PATCH 02/10] mm: shrinker: Add a .to_text() method for shrinkers Kent Overstreet
2024-08-26  3:01   ` Qi Zheng
2024-08-26  3:04     ` Kent Overstreet
2024-08-26  3:10       ` Qi Zheng
2024-08-26  3:32         ` Kent Overstreet
2024-08-28  3:32   ` Dave Chinner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox