From: Huan Yang <link@vivo.com>
To: Johannes Weiner <hannes@cmpxchg.org>,
Michal Hocko <mhocko@kernel.org>,
Roman Gushchin <roman.gushchin@linux.dev>,
Shakeel Butt <shakeel.butt@linux.dev>,
Muchun Song <muchun.song@linux.dev>,
Andrew Morton <akpm@linux-foundation.org>,
"Matthew Wilcox (Oracle)" <willy@infradead.org>,
David Hildenbrand <david@redhat.com>,
Ryan Roberts <ryan.roberts@arm.com>, Chris Li <chrisl@kernel.org>,
Dan Schatzberg <schatzberg.dan@gmail.com>,
Huan Yang <link@vivo.com>, Kairui Song <kasong@tencent.com>,
cgroups@vger.kernel.org, linux-mm@kvack.org,
linux-kernel@vger.kernel.org,
Christian Brauner <brauner@kernel.org>
Cc: opensource.kernel@vivo.com
Subject: [RFC PATCH 2/4] mm: memcg: pmc support change attribute
Date: Tue, 2 Jul 2024 16:44:05 +0800 [thread overview]
Message-ID: <20240702084423.1717904-3-link@vivo.com> (raw)
In-Reply-To: <20240702084423.1717904-1-link@vivo.com>
pmc have below attribute:
watermark: only when zone free pages above high+watermark can
cache pages
limit: max memory it can cached.
This patch let user can change each attribute by `memory.cache`.
To change attribute, can type `keys=attribute=vaule` into memcg's
`memory.cache` if it enabled cache.
For example:
echo keys=watermark=157286400,limit=209715200 > memory.cache
This changed memcg's only when free pages above high+150MB can
cache pages, and can cache up to a maximum of 200MB .
Signed-off-by: Huan Yang <link@vivo.com>
---
mm/memcontrol.c | 152 +++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 151 insertions(+), 1 deletion(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 404fcb96bf68..9db5bbe63b34 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -7253,29 +7253,168 @@ static int mem_cgroup_cache_show(struct seq_file *m, void *v)
return 0;
}
+#define STATUS_UNSET_DEFAULT_VALUE -1
+enum {
+ OPT_KEY_NID,
+ OPT_KEY_WATERMARK,
+ OPT_KEY_HOLD_LIMIT,
+ OPT_KEY_ERR,
+ NR_PMC_KEY_OPTS = OPT_KEY_ERR
+};
+
+static const match_table_t fc_tokens = {
+ { OPT_KEY_NID, "nid=%d" },
+ { OPT_KEY_WATERMARK, "watermark=%u" },
+ { OPT_KEY_HOLD_LIMIT, "limit=%u" },
+ { OPT_KEY_ERR, NULL}
+};
+
+static void
+__apply_status_for_mem_cgroup_cache(struct mem_cgroup_per_node_cache *p,
+ unsigned int opts[])
+{
+ int i;
+
+ for (i = OPT_KEY_WATERMARK; i < NR_PMC_KEY_OPTS; ++i) {
+ switch (i) {
+ case OPT_KEY_WATERMARK:
+ if (opts[OPT_KEY_WATERMARK] !=
+ STATUS_UNSET_DEFAULT_VALUE)
+ p->allow_watermark = opts[OPT_KEY_WATERMARK];
+ break;
+ case OPT_KEY_HOLD_LIMIT:
+ if (opts[OPT_KEY_HOLD_LIMIT] !=
+ STATUS_UNSET_DEFAULT_VALUE)
+ p->hold_limit = opts[OPT_KEY_HOLD_LIMIT];
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static __always_inline int
+mem_cgroup_apply_cache_status(struct mem_cgroup *memcg,
+ unsigned int opts[])
+{
+ struct mem_cgroup_per_node_cache *p;
+ unsigned int nid = opts[OPT_KEY_NID];
+
+ if (nid != STATUS_UNSET_DEFAULT_VALUE) {
+ p = memcg->nodeinfo[nid]->cachep;
+ if (unlikely(!p))
+ return -EINVAL;
+ __apply_status_for_mem_cgroup_cache(p, opts);
+ return 0;
+ }
+
+ for_each_node(nid) {
+ p = memcg->nodeinfo[nid]->cachep;
+ if (!p)
+ continue;
+ __apply_status_for_mem_cgroup_cache(p, opts);
+ }
+
+ return 0;
+}
+
+/**
+ * Support nid=x,watermark=bytes,limit=bytes args
+ */
+static int __mem_cgroup_cache_control_key(char *buf,
+ struct mem_cgroup *memcg)
+{
+ char *p;
+ unsigned int opts[NR_PMC_KEY_OPTS];
+
+ memset(opts, STATUS_UNSET_DEFAULT_VALUE, sizeof(opts));
+
+ if (!READ_ONCE(memcg->cache_enabled))
+ return -EINVAL;
+
+ if (!buf)
+ return -EINVAL;
+
+ while ((p = strsep(&buf, ",")) != NULL) {
+ int token;
+ u32 v;
+ substring_t args[MAX_OPT_ARGS];
+
+ p = strstrip(p);
+
+ if (!*p)
+ continue;
+
+ token = match_token(p, fc_tokens, args);
+ switch (token) {
+ case OPT_KEY_NID:
+ if (match_uint(&args[0], &v) || v >= MAX_NUMNODES)
+ return -EINVAL;
+ opts[OPT_KEY_NID] = v;
+ break;
+ case OPT_KEY_WATERMARK:
+#define MIN_WATERMARK_LIMIT ((10 << 20) >> PAGE_SHIFT)
+ if (match_uint(&args[0], &v))
+ return -EINVAL;
+ v >>= PAGE_SHIFT;
+ if (v < MIN_WATERMARK_LIMIT)
+ return -EINVAL;
+ opts[OPT_KEY_WATERMARK] = v;
+ break;
+ case OPT_KEY_HOLD_LIMIT:
+ if (match_uint(&args[0], &v))
+ return -EINVAL;
+ v >>= PAGE_SHIFT;
+#define MAX_CACHE_LIMIT_NR ((500 << 20) >> PAGE_SHIFT)
+ if (v > MAX_CACHE_LIMIT_NR)
+ return -EINVAL;
+ opts[OPT_KEY_HOLD_LIMIT] = v;
+ break;
+ case OPT_KEY_ERR:
+ default:
+ break;
+ }
+ }
+
+ if (mem_cgroup_apply_cache_status(memcg, opts))
+ return -EINVAL;
+
+ return 0;
+}
+
enum {
OPT_CTRL_ENABLE,
+ OPT_CTRL_KEYS,
OPT_CTRL_ERR,
OPR_CTRL_NR = OPT_CTRL_ERR,
};
static const match_table_t ctrl_tokens = {
{ OPT_CTRL_ENABLE, "enable=%s" },
+ { OPT_CTRL_KEYS, "keys=%s" },
{ OPT_CTRL_ERR, NULL } };
/**
* This function can control target memcg's cache. include enable\keys set.
* To enable\disable this cache, by `echo enable=[y|n] > memory.cace`
* in target memcg.
+ * To set keys, by `echo keys=[key=args;..] > memory.cache`, current support keys:
+ * 1. nid=x, if input, will only change target NODE's cache status. Else, all.
+ * 2. watermark=bytes, change cache hold behavior, only zone free pages above
+ * high watermark+watermark, can hold.
+ * 3. limit=bytes, change max pages can cache. Max can change to 500MB
+ * Enable and keys can both input, split by space, so can set args after enable,
+ * if cache not enable, can't set keys.
*/
static ssize_t mem_cgroup_cache_control(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off)
{
bool enable;
- bool opt_enable_set = false;
+ bool opt_enable_set = false, opt_key_set = false;
int err = 0;
char *sub;
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+ char keybuf[256];
buf = strstrip(buf);
if (!strlen(buf))
@@ -7300,6 +7439,14 @@ static ssize_t mem_cgroup_cache_control(struct kernfs_open_file *of, char *buf,
return -EINVAL;
opt_enable_set = true;
break;
+ case OPT_CTRL_KEYS:
+ if (match_strlcpy(tbuf, &args[0], sizeof(tbuf)) >=
+ sizeof(tbuf))
+ return -EINVAL;
+
+ memcpy(keybuf, tbuf, sizeof(keybuf));
+ opt_key_set = true;
+ break;
case OPT_CTRL_ERR:
default:
return -EINVAL;
@@ -7315,6 +7462,9 @@ static ssize_t mem_cgroup_cache_control(struct kernfs_open_file *of, char *buf,
}
}
+ if (opt_key_set)
+ err = __mem_cgroup_cache_control_key(keybuf, memcg);
+
return err ? err : nbytes;
}
--
2.45.2
next prev parent reply other threads:[~2024-07-02 8:46 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-07-02 8:44 [RFC PATCH 0/4] Introduce PMC(PER-MEMCG-CACHE) Huan Yang
2024-07-02 8:44 ` [RFC PATCH 1/4] mm: memcg: pmc framework Huan Yang
2024-07-02 8:44 ` Huan Yang [this message]
2024-07-02 8:44 ` [RFC PATCH 3/4] mm: memcg: pmc: support reaper Huan Yang
2024-07-02 8:44 ` [RFC PATCH 4/4] mm: memcg: pmc: support oom release Huan Yang
2024-07-02 19:27 ` [RFC PATCH 0/4] Introduce PMC(PER-MEMCG-CACHE) Roman Gushchin
2024-07-03 2:23 ` Huan Yang
2024-07-03 17:27 ` Shakeel Butt
2024-07-04 2:49 ` Huan Yang
2024-07-03 22:59 ` T.J. Mercier
2024-07-04 2:29 ` Huan Yang
2024-07-09 0:11 ` T.J. Mercier
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240702084423.1717904-3-link@vivo.com \
--to=link@vivo.com \
--cc=akpm@linux-foundation.org \
--cc=brauner@kernel.org \
--cc=cgroups@vger.kernel.org \
--cc=chrisl@kernel.org \
--cc=david@redhat.com \
--cc=hannes@cmpxchg.org \
--cc=kasong@tencent.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@kernel.org \
--cc=muchun.song@linux.dev \
--cc=opensource.kernel@vivo.com \
--cc=roman.gushchin@linux.dev \
--cc=ryan.roberts@arm.com \
--cc=schatzberg.dan@gmail.com \
--cc=shakeel.butt@linux.dev \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox