From: Huan Yang <11133793@vivo.com>
To: Dan Schatzberg <schatzberg.dan@gmail.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Roman Gushchin <roman.gushchin@linux.dev>,
Yosry Ahmed <yosryahmed@google.com>, Huan Yang <link@vivo.com>
Cc: linux-kernel@vger.kernel.org, cgroups@vger.kernel.org,
linux-mm@kvack.org, Michal Hocko <mhocko@kernel.org>,
Shakeel Butt <shakeelb@google.com>,
Muchun Song <muchun.song@linux.dev>,
Andrew Morton <akpm@linux-foundation.org>,
David Hildenbrand <david@redhat.com>,
Matthew Wilcox <willy@infradead.org>,
Huang Ying <ying.huang@intel.com>,
Kefeng Wang <wangkefeng.wang@huawei.com>,
Peter Xu <peterx@redhat.com>,
"Vishal Moola (Oracle)" <vishal.moola@gmail.com>,
Yue Zhao <findns94@gmail.com>, Hugh Dickins <hughd@google.com>
Subject: Re: [PATCH 1/1] mm: add swapiness= arg to memory.reclaim
Date: Fri, 1 Dec 2023 09:56:06 +0800 [thread overview]
Message-ID: <ec8abbff-8e17-43b3-a210-fa615e71217d@vivo.com> (raw)
In-Reply-To: <20231130153658.527556-2-schatzberg.dan@gmail.com>
[-- Attachment #1: Type: text/plain, Size: 10492 bytes --]
在 2023/11/30 23:36, Dan Schatzberg 写道:
> [?????????schatzberg.dan@gmail.com ?????????https://aka.ms/LearnAboutSenderIdentification,????????????]
>
> Allow proactive reclaimers to submit an additional swappiness=<val>
> argument to memory.reclaim. This overrides the global or per-memcg
> swappiness setting for that reclaim attempt.
>
> For example:
>
> echo "2M swappiness=0" > /sys/fs/cgroup/memory.reclaim
>
> will perform reclaim on the rootcg with a swappiness setting of 0 (no
> swap) regardless of the vm.swappiness sysctl setting.
>
> Signed-off-by: Dan Schatzberg<schatzberg.dan@gmail.com>
> ---
> include/linux/swap.h | 3 ++-
> mm/memcontrol.c | 55 +++++++++++++++++++++++++++++++++++---------
> mm/vmscan.c | 13 +++++++++--
> 3 files changed, 57 insertions(+), 14 deletions(-)
>
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index f6dd6575b905..c6e309199f10 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -410,7 +410,8 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
> extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
> unsigned long nr_pages,
> gfp_t gfp_mask,
> - unsigned int reclaim_options);
> + unsigned int reclaim_options,
> + int *swappiness);
> extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
> gfp_t gfp_mask, bool noswap,
> pg_data_t *pgdat,
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 1c1061df9cd1..ba1c89455ab0 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -63,6 +63,7 @@
> #include <linux/resume_user_mode.h>
> #include <linux/psi.h>
> #include <linux/seq_buf.h>
> +#include <linux/parser.h>
> #include <linux/sched/isolation.h>
> #include "internal.h"
> #include <net/sock.h>
> @@ -2449,7 +2450,7 @@ static unsigned long reclaim_high(struct mem_cgroup *memcg,
> psi_memstall_enter(&pflags);
> nr_reclaimed += try_to_free_mem_cgroup_pages(memcg, nr_pages,
> gfp_mask,
> - MEMCG_RECLAIM_MAY_SWAP);
> + MEMCG_RECLAIM_MAY_SWAP, NULL);
> psi_memstall_leave(&pflags);
> } while ((memcg = parent_mem_cgroup(memcg)) &&
> !mem_cgroup_is_root(memcg));
> @@ -2740,7 +2741,7 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
>
> psi_memstall_enter(&pflags);
> nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
> - gfp_mask, reclaim_options);
> + gfp_mask, reclaim_options, NULL);
> psi_memstall_leave(&pflags);
>
> if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
> @@ -3660,7 +3661,7 @@ static int mem_cgroup_resize_max(struct mem_cgroup *memcg,
> }
>
> if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL,
> - memsw ? 0 : MEMCG_RECLAIM_MAY_SWAP)) {
> + memsw ? 0 : MEMCG_RECLAIM_MAY_SWAP, NULL)) {
> ret = -EBUSY;
> break;
> }
> @@ -3774,7 +3775,7 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
> return -EINTR;
>
> if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL,
> - MEMCG_RECLAIM_MAY_SWAP))
> + MEMCG_RECLAIM_MAY_SWAP, NULL))
> nr_retries--;
> }
>
> @@ -6720,7 +6721,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
> }
>
> reclaimed = try_to_free_mem_cgroup_pages(memcg, nr_pages - high,
> - GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP);
> + GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP, NULL);
>
> if (!reclaimed && !nr_retries--)
> break;
> @@ -6769,7 +6770,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
>
> if (nr_reclaims) {
> if (!try_to_free_mem_cgroup_pages(memcg, nr_pages - max,
> - GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP))
> + GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP, NULL))
> nr_reclaims--;
> continue;
> }
> @@ -6895,6 +6896,16 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
> return nbytes;
> }
>
> +enum {
> + MEMORY_RECLAIM_SWAPPINESS = 0,
> + MEMORY_RECLAIM_NULL,
> +};
> +
> +static const match_table_t if_tokens = {
> + { MEMORY_RECLAIM_SWAPPINESS, "swappiness=%d"},
> + { MEMORY_RECLAIM_NULL, NULL },
> +};
> +
> static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
> size_t nbytes, loff_t off)
> {
> @@ -6902,12 +6913,33 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
> unsigned int nr_retries = MAX_RECLAIM_RETRIES;
> unsigned long nr_to_reclaim, nr_reclaimed = 0;
> unsigned int reclaim_options;
> - int err;
> + char *old_buf, *start;
> + substring_t args[MAX_OPT_ARGS];
> + int swappiness = -1;
>
> buf = strstrip(buf);
> - err = page_counter_memparse(buf, "", &nr_to_reclaim);
> - if (err)
> - return err;
> +
> + old_buf = buf;
> + nr_to_reclaim = memparse(buf, &buf) / PAGE_SIZE;
> + if (buf == old_buf)
> + return -EINVAL;
> +
> + buf = strstrip(buf);
> +
> + while ((start = strsep(&buf, " ")) != NULL) {
> + if (!strlen(start))
> + continue;
> + switch (match_token(start, if_tokens, args)) {
> + case MEMORY_RECLAIM_SWAPPINESS:
> + if (match_int(&args[0], &swappiness))
> + return -EINVAL;
> + if (swappiness < 0 || swappiness > 200)
> + return -EINVAL;
> + break;
> + default:
> + return -EINVAL;
> + }
> + }
>
> reclaim_options = MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE;
> while (nr_reclaimed < nr_to_reclaim) {
> @@ -6926,7 +6958,8 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
>
> reclaimed = try_to_free_mem_cgroup_pages(memcg,
> min(nr_to_reclaim - nr_reclaimed, SWAP_CLUSTER_MAX),
> - GFP_KERNEL, reclaim_options);
> + GFP_KERNEL, reclaim_options,
> + swappiness == -1 ? NULL : &swappiness);
>
> if (!reclaimed && !nr_retries--)
> return -EAGAIN;
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 506f8220c5fe..546704ea01e1 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -136,6 +136,9 @@ struct scan_control {
> /* Always discard instead of demoting to lower tier memory */
> unsigned int no_demotion:1;
>
> + /* Swappiness value for reclaim, if NULL use memcg/global value */
> + int *swappiness;
> +
> /* Allocation order */
> s8 order;
>
> @@ -2327,7 +2330,8 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
> struct pglist_data *pgdat = lruvec_pgdat(lruvec);
> struct mem_cgroup *memcg = lruvec_memcg(lruvec);
> unsigned long anon_cost, file_cost, total_cost;
> - int swappiness = mem_cgroup_swappiness(memcg);
> + int swappiness = sc->swappiness ?
> + *sc->swappiness : mem_cgroup_swappiness(memcg);
Should we use "unlikely" here to indicate that sc->swappiness is an
unexpected behavior?
Due to current use case only apply in proactive reclaim.
> u64 fraction[ANON_AND_FILE];
> u64 denominator = 0; /* gcc */
> enum scan_balance scan_balance;
> @@ -2608,6 +2612,9 @@ static int get_swappiness(struct lruvec *lruvec, struct scan_control *sc)
> mem_cgroup_get_nr_swap_pages(memcg) < MIN_LRU_BATCH)
> return 0;
>
> + if (sc->swappiness)
> + return *sc->swappiness;
Also there.
> +
> return mem_cgroup_swappiness(memcg);
> }
>
> @@ -6433,7 +6440,8 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
> unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
> unsigned long nr_pages,
> gfp_t gfp_mask,
> - unsigned int reclaim_options)
> + unsigned int reclaim_options,
> + int *swappiness)
> {
> unsigned long nr_reclaimed;
> unsigned int noreclaim_flag;
> @@ -6448,6 +6456,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
> .may_unmap = 1,
> .may_swap = !!(reclaim_options & MEMCG_RECLAIM_MAY_SWAP),
> .proactive = !!(reclaim_options & MEMCG_RECLAIM_PROACTIVE),
> + .swappiness = swappiness,
> };
> /*
> * Traverse the ZONELIST_FALLBACK zonelist of the current node to put
> --
> 2.34.1
My previous patch attempted to ensure fully deterministic semantics
under extreme swappiness.
For example, when swappiness is set to 200, only anonymous pages will be
reclaimed.
Due to code in MGLRU isolate_folios will try scan anon if no scanned,
will try other type.(We do not want
it to attempt this behavior.)
How do you think about extreme swappiness scenarios?
[-- Attachment #2: Type: text/html, Size: 12064 bytes --]
next prev parent reply other threads:[~2023-12-01 1:56 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-11-30 15:36 [PATCH 0/1] Add swappiness argument " Dan Schatzberg
2023-11-30 15:36 ` [PATCH 1/1] mm: add swapiness= arg " Dan Schatzberg
2023-11-30 21:33 ` Andrew Morton
2023-11-30 21:46 ` Dan Schatzberg
2023-12-01 1:56 ` Huan Yang [this message]
2023-12-01 2:05 ` Yosry Ahmed
2023-12-01 2:13 ` Huan Yang
2023-12-01 2:17 ` Yosry Ahmed
2023-12-01 2:24 ` Huan Yang
2023-11-30 15:57 ` [PATCH 0/1] Add swappiness argument " Michal Hocko
2023-11-30 16:56 ` Johannes Weiner
2023-11-30 18:49 ` Shakeel Butt
2023-11-30 19:47 ` Dan Schatzberg
2023-11-30 20:30 ` Shakeel Butt
2023-11-30 21:37 ` Dan Schatzberg
2023-11-30 21:52 ` Shakeel Butt
2023-12-01 9:33 ` Michal Hocko
2023-12-01 15:49 ` Dan Schatzberg
2023-12-01 17:09 ` Johannes Weiner
2023-12-04 15:23 ` Michal Hocko
2023-12-05 16:19 ` Johannes Weiner
2023-12-07 18:57 ` Michal Koutný
2023-11-30 18:44 ` Shakeel Butt
2023-11-30 18:54 ` Matthew Wilcox
2023-11-30 19:39 ` Johannes Weiner
2023-11-30 19:49 ` Johannes Weiner
2023-11-30 19:50 ` Dan Schatzberg
2023-12-06 16:28 [PATCH V2 " Dan Schatzberg
2023-12-06 16:28 ` [PATCH 1/1] mm: add swapiness= arg " Dan Schatzberg
2023-12-07 19:00 ` Michal Koutný
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ec8abbff-8e17-43b3-a210-fa615e71217d@vivo.com \
--to=11133793@vivo.com \
--cc=akpm@linux-foundation.org \
--cc=cgroups@vger.kernel.org \
--cc=david@redhat.com \
--cc=findns94@gmail.com \
--cc=hannes@cmpxchg.org \
--cc=hughd@google.com \
--cc=link@vivo.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@kernel.org \
--cc=muchun.song@linux.dev \
--cc=peterx@redhat.com \
--cc=roman.gushchin@linux.dev \
--cc=schatzberg.dan@gmail.com \
--cc=shakeelb@google.com \
--cc=vishal.moola@gmail.com \
--cc=wangkefeng.wang@huawei.com \
--cc=willy@infradead.org \
--cc=ying.huang@intel.com \
--cc=yosryahmed@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox