linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH 1/2] mm: vmscan: modify the semantics of scan_control.may_unmap to UNMAP_ANON and UNMAP_FILE
@ 2024-08-29 10:20 Zhongkun He
  2024-08-29 10:20 ` [RFC PATCH 2/2] mm: memcg: add disbale_unmap_file arg to memory.reclaim Zhongkun He
  0 siblings, 1 reply; 2+ messages in thread
From: Zhongkun He @ 2024-08-29 10:20 UTC (permalink / raw)
  To: akpm, hannes, mhocko
  Cc: roman.gushchin, shakeel.butt, muchun.song, lizefan.x, linux-mm,
	linux-kernel, cgroups, Zhongkun He

This is a preparation patch to add disable_unmap_file arg to memory.reclaim.

So far, the value of scan_control.may_unmap has only two types, true or false,
which represents whether pages can be unmapped for reclamation in the reclamation
path. It cannot distinguish between file pages and anonymous pages, so we cannot
make a more accurate choice when proactively reclaiming memory in user space. In
practical experience, mapped file pages are crucial for the operation of the program,
usually containing important executable code, data, and shared libraries, etc.
Therefore, it is necessary to make a more accurate distinction.

Signed-off-by: Zhongkun He <hezhongkun.hzk@bytedance.com>
---
 mm/vmscan.c | 61 +++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 47 insertions(+), 14 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 283e3f9d652b..50ac714cba2f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -107,8 +107,11 @@ struct scan_control {
 	/* Writepage batching in laptop mode; RECLAIM_WRITE */
 	unsigned int may_writepage:1;
 
+	#define UNMAP_ANON 1
+	#define UNMAP_FILE 2
+	#define UNMAP_ANON_AND_FILE UNMAP_ANON + UNMAP_FILE
 	/* Can mapped folios be reclaimed? */
-	unsigned int may_unmap:1;
+	unsigned int may_unmap:2;
 
 	/* Can folios be swapped as part of reclaim? */
 	unsigned int may_swap:1;
@@ -1083,8 +1086,23 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
 		if (unlikely(!folio_evictable(folio)))
 			goto activate_locked;
 
-		if (!sc->may_unmap && folio_mapped(folio))
-			goto keep_locked;
+		if (folio_mapped(folio)) {
+			switch (sc->may_unmap) {
+			/* The most likely case. */
+			case UNMAP_ANON_AND_FILE:
+				break;
+			case UNMAP_ANON:
+				if (!folio_test_anon(folio))
+					goto keep_locked;
+				break;
+			case UNMAP_FILE:
+				if (folio_test_anon(folio))
+					goto keep_locked;
+				break;
+			default:
+				goto keep_locked;
+			}
+		}
 
 		/* folio_update_gen() tried to promote this page? */
 		if (lru_gen_enabled() && !ignore_references &&
@@ -1563,7 +1581,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
 {
 	struct scan_control sc = {
 		.gfp_mask = GFP_KERNEL,
-		.may_unmap = 1,
+		.may_unmap = UNMAP_ANON_AND_FILE,
 	};
 	struct reclaim_stat stat;
 	unsigned int nr_reclaimed;
@@ -1688,8 +1706,23 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
 
 		if (!folio_test_lru(folio))
 			goto move;
-		if (!sc->may_unmap && folio_mapped(folio))
-			goto move;
+
+		if (folio_mapped(folio)) {
+			switch (sc->may_unmap) {
+			case UNMAP_ANON_AND_FILE:
+				break;
+			case UNMAP_ANON:
+				if (!folio_test_anon(folio))
+					goto move;
+				break;
+			case UNMAP_FILE:
+				if (folio_test_anon(folio))
+					goto move;
+				break;
+			default:
+				goto move;
+			}
+		}
 
 		/*
 		 * Be careful not to clear the lru flag until after we're
@@ -2135,7 +2168,7 @@ static unsigned int reclaim_folio_list(struct list_head *folio_list,
 	struct scan_control sc = {
 		.gfp_mask = GFP_KERNEL,
 		.may_writepage = 1,
-		.may_unmap = 1,
+		.may_unmap = UNMAP_ANON_AND_FILE,
 		.may_swap = 1,
 		.no_demotion = 1,
 	};
@@ -5467,7 +5500,7 @@ static ssize_t lru_gen_seq_write(struct file *file, const char __user *src,
 	int err = -EINVAL;
 	struct scan_control sc = {
 		.may_writepage = true,
-		.may_unmap = true,
+		.may_unmap = UNMAP_ANON_AND_FILE,
 		.may_swap = true,
 		.reclaim_idx = MAX_NR_ZONES - 1,
 		.gfp_mask = GFP_KERNEL,
@@ -6482,7 +6515,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 		.nodemask = nodemask,
 		.priority = DEF_PRIORITY,
 		.may_writepage = !laptop_mode,
-		.may_unmap = 1,
+		.may_unmap = UNMAP_ANON_AND_FILE,
 		.may_swap = 1,
 	};
 
@@ -6526,7 +6559,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
 		.nr_to_reclaim = SWAP_CLUSTER_MAX,
 		.target_mem_cgroup = memcg,
 		.may_writepage = !laptop_mode,
-		.may_unmap = 1,
+		.may_unmap = UNMAP_ANON_AND_FILE,
 		.reclaim_idx = MAX_NR_ZONES - 1,
 		.may_swap = !noswap,
 	};
@@ -6572,7 +6605,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 		.target_mem_cgroup = memcg,
 		.priority = DEF_PRIORITY,
 		.may_writepage = !laptop_mode,
-		.may_unmap = 1,
+		.may_unmap = UNMAP_ANON_AND_FILE,
 		.may_swap = !!(reclaim_options & MEMCG_RECLAIM_MAY_SWAP),
 		.proactive = !!(reclaim_options & MEMCG_RECLAIM_PROACTIVE),
 	};
@@ -6837,7 +6870,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx)
 	struct scan_control sc = {
 		.gfp_mask = GFP_KERNEL,
 		.order = order,
-		.may_unmap = 1,
+		.may_unmap = UNMAP_ANON_AND_FILE,
 	};
 
 	set_task_reclaim_state(current, &sc.reclaim_state);
@@ -7304,7 +7337,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
 		.reclaim_idx = MAX_NR_ZONES - 1,
 		.priority = DEF_PRIORITY,
 		.may_writepage = 1,
-		.may_unmap = 1,
+		.may_unmap = UNMAP_ANON_AND_FILE,
 		.may_swap = 1,
 		.hibernation_mode = 1,
 	};
@@ -7462,7 +7495,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
 		.order = order,
 		.priority = NODE_RECLAIM_PRIORITY,
 		.may_writepage = !!(node_reclaim_mode & RECLAIM_WRITE),
-		.may_unmap = !!(node_reclaim_mode & RECLAIM_UNMAP),
+		.may_unmap = !!(node_reclaim_mode & RECLAIM_UNMAP) ? UNMAP_ANON_AND_FILE : 0,
 		.may_swap = 1,
 		.reclaim_idx = gfp_zone(gfp_mask),
 	};
-- 
2.20.1



^ permalink raw reply	[flat|nested] 2+ messages in thread

* [RFC PATCH 2/2] mm: memcg: add disbale_unmap_file arg to memory.reclaim
  2024-08-29 10:20 [RFC PATCH 1/2] mm: vmscan: modify the semantics of scan_control.may_unmap to UNMAP_ANON and UNMAP_FILE Zhongkun He
@ 2024-08-29 10:20 ` Zhongkun He
  0 siblings, 0 replies; 2+ messages in thread
From: Zhongkun He @ 2024-08-29 10:20 UTC (permalink / raw)
  To: akpm, hannes, mhocko
  Cc: roman.gushchin, shakeel.butt, muchun.song, lizefan.x, linux-mm,
	linux-kernel, cgroups, Zhongkun He

Allow proactively memory reclaimers to submit an additional
disbale_unmap_file argument to memory.reclaim. This will
skip the mapped file for that reclaim attempt.

For example:

echo "2M disable_unmap_file" > /sys/fs/cgroup/test/memory.reclaim

will perform reclaim on the test cgroup with no mapped file page.

The memory.reclaim is a useful interface. We can carry out proactive
memory reclaim in the user space, which can increase the utilization
rate of memory. In the actual usage scenarios, we found that when
there are sufficient anonymous pages, mapped file pages with a
relatively small proportion would still be reclaimed. This is likely
to cause an increase in refaults and an increase in task delay,
because mapped file pages usually include important executable codes,
data, and shared libraries, etc. According to the verified situation,
if we can skip this part of the memory, the business delay will be reduced.

Even if there are sufficient anonymous pages and a small number of
page cache and mapped file pages, mapped file pages will still be reclaimed.
Here is an example of anonymous pages being sufficient but mapped
file pages still being reclaimed:

cat memory.stat | grep -wE 'anon|file|file_mapped'
anon 3406462976
file 332967936
file_mapped 300302336

echo 1g > memory.reclaim swappiness=200 > memory.reclaim
cat memory.stat | grep -wE 'anon|file|file_mapped'
anon 2613276672
file 52523008
file_mapped 30982144

echo 1g > memory.reclaim swappiness=200 > memory.reclaim
cat memory.stat | grep -wE 'anon|file|file_mapped'
anon 1552130048
file 39759872
file_mapped 20299776

With this patch, the file_mapped pages will be skiped.

echo 1g > memory.reclaim swappiness=200 disable_unmap_file  > memory.reclaim
cat memory.stat | grep -wE 'anon|file|file_mapped'
anon 480059392
file 37978112
file_mapped 20299776

IMO,it is difficult to balance the priorities of various pages in the kernel,
there are too many scenarios to consider. However, for the scenario of proactive
memory reclaim in user space, we can make a simple judgment in this case.

Signed-off-by: Zhongkun He <hezhongkun.hzk@bytedance.com>
---
 include/linux/swap.h | 1 +
 mm/memcontrol.c      | 9 +++++++--
 mm/vmscan.c          | 4 ++++
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index ca533b478c21..49df8f3748e8 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -409,6 +409,7 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 
 #define MEMCG_RECLAIM_MAY_SWAP (1 << 1)
 #define MEMCG_RECLAIM_PROACTIVE (1 << 2)
+#define MEMCG_RECLAIM_DIS_UNMAP_FILE (1 << 3)
 #define MIN_SWAPPINESS 0
 #define MAX_SWAPPINESS 200
 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 35431035e782..7b0181553b0c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4282,11 +4282,13 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
 
 enum {
 	MEMORY_RECLAIM_SWAPPINESS = 0,
+	MEMORY_RECLAIM_DISABLE_UNMAP_FILE,
 	MEMORY_RECLAIM_NULL,
 };
 
 static const match_table_t tokens = {
 	{ MEMORY_RECLAIM_SWAPPINESS, "swappiness=%d"},
+	{ MEMORY_RECLAIM_DISABLE_UNMAP_FILE, "disable_unmap_file"},
 	{ MEMORY_RECLAIM_NULL, NULL },
 };
 
@@ -4297,7 +4299,7 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
 	unsigned int nr_retries = MAX_RECLAIM_RETRIES;
 	unsigned long nr_to_reclaim, nr_reclaimed = 0;
 	int swappiness = -1;
-	unsigned int reclaim_options;
+	unsigned int reclaim_options = 0;
 	char *old_buf, *start;
 	substring_t args[MAX_OPT_ARGS];
 
@@ -4320,12 +4322,15 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
 			if (swappiness < MIN_SWAPPINESS || swappiness > MAX_SWAPPINESS)
 				return -EINVAL;
 			break;
+		case MEMORY_RECLAIM_DISABLE_UNMAP_FILE:
+			reclaim_options = MEMCG_RECLAIM_DIS_UNMAP_FILE;
+			break;
 		default:
 			return -EINVAL;
 		}
 	}
 
-	reclaim_options	= MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE;
+	reclaim_options	|= MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE;
 	while (nr_reclaimed < nr_to_reclaim) {
 		/* Will converge on zero, but reclaim enforces a minimum */
 		unsigned long batch_size = (nr_to_reclaim - nr_reclaimed) / 4;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 50ac714cba2f..1b58126a8246 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -6609,6 +6609,10 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 		.may_swap = !!(reclaim_options & MEMCG_RECLAIM_MAY_SWAP),
 		.proactive = !!(reclaim_options & MEMCG_RECLAIM_PROACTIVE),
 	};
+
+	if (reclaim_options & MEMCG_RECLAIM_DIS_UNMAP_FILE)
+		sc.may_unmap &= ~UNMAP_FILE;
+
 	/*
 	 * Traverse the ZONELIST_FALLBACK zonelist of the current node to put
 	 * equal pressure on all the nodes. This is based on the assumption that
-- 
2.20.1



^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2024-08-29 10:21 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-08-29 10:20 [RFC PATCH 1/2] mm: vmscan: modify the semantics of scan_control.may_unmap to UNMAP_ANON and UNMAP_FILE Zhongkun He
2024-08-29 10:20 ` [RFC PATCH 2/2] mm: memcg: add disbale_unmap_file arg to memory.reclaim Zhongkun He

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox