linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Mina Almasry <almasrymina@google.com>
To: Huang Ying <ying.huang@intel.com>,
	Yang Shi <yang.shi@linux.alibaba.com>,
	 Yosry Ahmed <yosryahmed@google.com>,
	Tim Chen <tim.c.chen@linux.intel.com>,
	weixugc@google.com,  shakeelb@google.com, gthelen@google.com,
	fvdl@google.com,  Johannes Weiner <hannes@cmpxchg.org>,
	Michal Hocko <mhocko@kernel.org>,
	 Roman Gushchin <roman.gushchin@linux.dev>,
	Muchun Song <songmuchun@bytedance.com>,
	 Andrew Morton <akpm@linux-foundation.org>
Cc: Mina Almasry <almasrymina@google.com>,
	linux-kernel@vger.kernel.org,  cgroups@vger.kernel.org,
	linux-mm@kvack.org
Subject: [RFC PATCH v1 4/4] mm: Add nodes= arg to memory.demote
Date: Tue, 22 Nov 2022 12:38:48 -0800	[thread overview]
Message-ID: <20221122203850.2765015-4-almasrymina@google.com> (raw)
In-Reply-To: <20221122203850.2765015-1-almasrymina@google.com>

The nodes= arg instructs the kernel to only scan the given nodes for
demotion. For example use cases, consider a 3 tier memory system:

nodes 0,1 -> top tier
nodes 2,3 -> second tier
nodes 4,5 -> third tier

echo "1m nodes=2,3" > memory.demote

This instructs the kernel to attempt to demote 1m memory in the second tier
to the third, which can be desirable according to the userspace policy
if the second tier is filling up and there is available memory on the
third tier.

echo "1m" > memory.demote

Instructs the kernel to attempt to demote 1m of memory (regardless of
which tier the memory is currently on).

echo "1m nodes=0,1"

Instructs the kernel to demote memory from the top tier nodes, which can
be desirable according to the userspace policy if there is pressure on
the top tiers.

Signed-off-by: Mina Almasry <almasrymina@google.com>
---
 include/linux/swap.h |  3 ++-
 mm/memcontrol.c      | 64 ++++++++++++++++++++++++++++++++++++--------
 mm/vmscan.c          |  4 ++-
 3 files changed, 58 insertions(+), 13 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index f768171c2dc2..e195ee5f8efb 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -425,7 +425,8 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 						  unsigned long nr_pages,
 						  gfp_t gfp_mask,
-						  unsigned int reclaim_options);
+						  unsigned int reclaim_options,
+						  nodemask_t nodemask);
 extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
 						gfp_t gfp_mask, bool noswap,
 						pg_data_t *pgdat,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 427c79e467eb..cce446348358 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -63,6 +63,7 @@
 #include <linux/resume_user_mode.h>
 #include <linux/psi.h>
 #include <linux/seq_buf.h>
+#include <linux/parser.h>
 #include "internal.h"
 #include <net/sock.h>
 #include <net/ip.h>
@@ -2392,7 +2393,8 @@ static unsigned long reclaim_high(struct mem_cgroup *memcg,
 		psi_memstall_enter(&pflags);
 		nr_reclaimed += try_to_free_mem_cgroup_pages(memcg, nr_pages,
 							gfp_mask,
-							MEMCG_RECLAIM_DEFAULT);
+							MEMCG_RECLAIM_DEFAULT,
+							NODE_MASK_ALL);
 		psi_memstall_leave(&pflags);
 	} while ((memcg = parent_mem_cgroup(memcg)) &&
 		 !mem_cgroup_is_root(memcg));
@@ -2683,7 +2685,8 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,

 	psi_memstall_enter(&pflags);
 	nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
-						    gfp_mask, reclaim_options);
+						    gfp_mask, reclaim_options,
+						    NODE_MASK_ALL);
 	psi_memstall_leave(&pflags);

 	if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
@@ -3504,7 +3507,8 @@ static int mem_cgroup_resize_max(struct mem_cgroup *memcg,

 		if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL,
 					memsw ? MEMCG_RECLAIM_NO_SWAP :
-						MEMCG_RECLAIM_DEFAULT)) {
+						MEMCG_RECLAIM_DEFAULT,
+						NODE_MASK_ALL)) {
 			ret = -EBUSY;
 			break;
 		}
@@ -3615,7 +3619,8 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
 			return -EINTR;

 		if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL,
-						  MEMCG_RECLAIM_DEFAULT))
+						  MEMCG_RECLAIM_DEFAULT,
+						  NODE_MASK_ALL))
 			nr_retries--;
 	}

@@ -6408,7 +6413,8 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
 		}

 		reclaimed = try_to_free_mem_cgroup_pages(memcg, nr_pages - high,
-					GFP_KERNEL, MEMCG_RECLAIM_DEFAULT);
+					GFP_KERNEL, MEMCG_RECLAIM_DEFAULT,
+					NODE_MASK_ALL);

 		if (!reclaimed && !nr_retries--)
 			break;
@@ -6457,7 +6463,8 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,

 		if (nr_reclaims) {
 			if (!try_to_free_mem_cgroup_pages(memcg, nr_pages - max,
-					GFP_KERNEL, MEMCG_RECLAIM_DEFAULT))
+					GFP_KERNEL, MEMCG_RECLAIM_DEFAULT,
+					NODE_MASK_ALL))
 				nr_reclaims--;
 			continue;
 		}
@@ -6612,7 +6619,8 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,

 		reclaimed = try_to_free_mem_cgroup_pages(memcg,
 						nr_to_reclaim - nr_reclaimed,
-						GFP_KERNEL, reclaim_options);
+						GFP_KERNEL, reclaim_options,
+						NODE_MASK_ALL);

 		if (!reclaimed && !nr_retries--)
 			return -EAGAIN;
@@ -6623,6 +6631,16 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
 	return nbytes;
 }

+enum {
+	MEMORY_DEMOTE_NODES = 0,
+	MEMORY_DEMOTE_NULL,
+};
+
+static const match_table_t if_tokens = {
+	{ MEMORY_DEMOTE_NODES, "nodes=%s" },
+	{ MEMORY_DEMOTE_NULL, NULL },
+};
+
 static ssize_t memory_demote(struct kernfs_open_file *of, char *buf,
 			     size_t nbytes, loff_t off)
 {
@@ -6631,11 +6649,35 @@ static ssize_t memory_demote(struct kernfs_open_file *of, char *buf,
 	unsigned long nr_to_demote, nr_demoted = 0;
 	unsigned int reclaim_options = MEMCG_RECLAIM_ONLY_DEMOTE;
 	int err;
+	char *old_buf, *start;
+	substring_t args[MAX_OPT_ARGS];
+	int token;
+	char value[256];
+	nodemask_t nodemask = NODE_MASK_ALL;

 	buf = strstrip(buf);
-	err = page_counter_memparse(buf, "", &nr_to_demote);
-	if (err)
-		return err;
+	old_buf = buf;
+	nr_to_demote = memparse(buf, &buf) / PAGE_SIZE;
+	if (buf == old_buf)
+		return -EINVAL;
+
+	buf = strstrip(buf);
+
+	while ((start = strsep(&buf, " ")) != NULL) {
+		if (!strlen(start))
+			continue;
+		token = match_token(start, if_tokens, args);
+		match_strlcpy(value, args, sizeof(value));
+		switch (token) {
+		case MEMORY_DEMOTE_NODES:
+			err = nodelist_parse(value, nodemask);
+			if (err < 0)
+				return -EINVAL;
+			break;
+		default:
+			return -EINVAL;
+		}
+	}

 	while (nr_demoted < nr_to_demote) {
 		unsigned long demoted;
@@ -6645,7 +6687,7 @@ static ssize_t memory_demote(struct kernfs_open_file *of, char *buf,

 		demoted = try_to_free_mem_cgroup_pages(
 			memcg, nr_to_demote - nr_demoted, GFP_KERNEL,
-			reclaim_options);
+			reclaim_options, nodemask);

 		if (!demoted && !nr_retries--)
 			return -EAGAIN;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d7e509b3f07f..df5ade259b3a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -6719,7 +6719,8 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
 unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 					   unsigned long nr_pages,
 					   gfp_t gfp_mask,
-					   unsigned int reclaim_options)
+					   unsigned int reclaim_options,
+					   nodemask_t nodemask)
 {
 	unsigned long nr_reclaimed;
 	unsigned int noreclaim_flag;
@@ -6734,6 +6735,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 		.may_unmap = 1,
 		.may_swap = !!(reclaim_options & MEMCG_RECLAIM_MAY_SWAP),
 		.proactive = !!(reclaim_options & MEMCG_RECLAIM_PROACTIVE),
+		.nodemask = &nodemask,
 	};
 	/*
 	 * Traverse the ZONELIST_FALLBACK zonelist of the current node to put
--
2.38.1.584.g0f3c55d4c2-goog


  parent reply	other threads:[~2022-11-22 20:39 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-22 20:38 [RFC PATCH V1] mm: Disable demotion from proactive reclaim Mina Almasry
2022-11-22 20:38 ` [RFC PATCH v1] mm: Add memory.demote for proactive demotion only Mina Almasry
2022-11-22 20:38 ` [RFC PATCH v1 3/4] mm: Fix demotion-only scanning anon pages Mina Almasry
2022-11-24  5:27   ` Huang, Ying
2022-11-22 20:38 ` Mina Almasry [this message]
2022-11-23 18:00 ` [RFC PATCH V1] mm: Disable demotion from proactive reclaim Johannes Weiner
2022-11-23 21:20   ` Mina Almasry
2022-11-23 21:35     ` Yosry Ahmed
2022-11-23 22:30       ` Johannes Weiner
2022-11-23 23:47         ` Yosry Ahmed
2022-11-23 21:58     ` Johannes Weiner
2022-11-23 22:37       ` Mina Almasry
2022-11-24  5:51       ` Huang, Ying
2022-11-28 22:24         ` Yang Shi
2022-11-29  0:53           ` Huang, Ying
2022-11-29 17:27             ` Yang Shi
2022-11-30  5:31               ` Huang, Ying
2022-11-30 18:49                 ` Yang Shi
2022-12-01  1:51                   ` Huang, Ying
2022-12-01 22:45                     ` Yang Shi
2022-12-02  1:57                       ` Huang, Ying
2022-11-29 18:08         ` Johannes Weiner
2022-11-30  3:55           ` Huang, Ying
2022-12-01 20:40             ` Mina Almasry
2022-12-02  2:01               ` Huang, Ying
2022-12-02  2:06                 ` Mina Almasry
2022-11-30  2:14         ` Mina Almasry
2022-11-30  5:39           ` Huang, Ying
2022-11-30  6:06             ` Mina Almasry

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221122203850.2765015-4-almasrymina@google.com \
    --to=almasrymina@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=cgroups@vger.kernel.org \
    --cc=fvdl@google.com \
    --cc=gthelen@google.com \
    --cc=hannes@cmpxchg.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=roman.gushchin@linux.dev \
    --cc=shakeelb@google.com \
    --cc=songmuchun@bytedance.com \
    --cc=tim.c.chen@linux.intel.com \
    --cc=weixugc@google.com \
    --cc=yang.shi@linux.alibaba.com \
    --cc=ying.huang@intel.com \
    --cc=yosryahmed@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox