From: Michal Hocko <mhocko@kernel.org>
To: qiwuchen55@gmail.com
Cc: chris@chrisdown.name, akpm@linux-foundation.org,
willy@infradead.org, linux-mm@kvack.org,
chenqiwu <chenqiwu@xiaomi.com>
Subject: Re: [PATCH v2] mm/vmscan: return target_mem_cgroup from cgroup_reclaim()
Date: Fri, 20 Mar 2020 08:43:16 +0100 [thread overview]
Message-ID: <20200320074316.GB24409@dhcp22.suse.cz> (raw)
In-Reply-To: <1584672705-1344-1-git-send-email-qiwuchen55@gmail.com>
On Fri 20-03-20 10:51:45, qiwuchen55@gmail.com wrote:
> From: chenqiwu <chenqiwu@xiaomi.com>
>
> Previously the code splits apart the action of checking whether
> we are in cgroup reclaim from getting the target memory cgroup
> for that reclaim. This split is confusing and seems unnecessary,
> since cgroup_reclaim itself only checks if sc->target_mem_cgroup
> is NULL or not, so merge the two use cases into one by returning
> the target_mem_cgroup itself from cgroup_reclaim.
>
> As a result, sc->target_mem_cgroup is just used when CONFIG_MEMCG
> is set, so wrap it with ifdef CONFIG_MEMCG in struct scan_control
> will save some space for those who build their kernels with memory
> cgroups disabled.
Sorry, I haven't noticed you have posted this new version when replying
to the previous version [1]. Anyway this is the comparision of the
generated code
text data bss dec hex filename
40661 24060 12 64733 fcdd mm/vmscan.memcg.after.o
40661 24060 12 64733 fcdd mm/vmscan.v2.memcg.after.o
40556 24060 12 64628 fc74 mm/vmscan.memcg.before.o
36240 23076 8 59324 e7bc mm/vmscan.nomemcg.after.o
36240 23076 8 59324 e7bc mm/vmscan.v2.nomemcg.after.o
36283 23076 8 59367 e7e7 mm/vmscan.nomemcg.before.o
Again with gcc version 9.2.1 20200306 (Debian 9.2.1-31). This doesn't
show up the benefit of scan_control shrinking for !MEMCG because that
one is allocated on the stack.
I have already said that I am not a great fan of the patch mostly
because I do not see a clear justification. The resulting naming is not
great either. But if other people think this improve readability I will
not stand in the way. Both savings and increase depending on the config
are marginal for the reclaim path which is far from hot.
[1] http://lkml.kernel.org/r/20200320072820.GA24409@dhcp22.suse.cz
> Signed-off-by: chenqiwu <chenqiwu@xiaomi.com>
> ---
> mm/vmscan.c | 41 +++++++++++++++++++++++------------------
> 1 file changed, 23 insertions(+), 18 deletions(-)
>
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index dca623d..eb9155e 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -73,11 +73,13 @@ struct scan_control {
> */
> nodemask_t *nodemask;
>
> +#ifdef CONFIG_MEMCG
> /*
> * The memory cgroup that hit its limit and as a result is the
> * primary target of this reclaim invocation.
> */
> struct mem_cgroup *target_mem_cgroup;
> +#endif
>
> /* Can active pages be deactivated as part of reclaim? */
> #define DEACTIVATE_ANON 1
> @@ -238,7 +240,7 @@ static void unregister_memcg_shrinker(struct shrinker *shrinker)
> up_write(&shrinker_rwsem);
> }
>
> -static bool cgroup_reclaim(struct scan_control *sc)
> +static struct mem_cgroup *cgroup_reclaim(struct scan_control *sc)
> {
> return sc->target_mem_cgroup;
> }
> @@ -276,9 +278,9 @@ static void unregister_memcg_shrinker(struct shrinker *shrinker)
> {
> }
>
> -static bool cgroup_reclaim(struct scan_control *sc)
> +static struct mem_cgroup *cgroup_reclaim(struct scan_control *sc)
> {
> - return false;
> + return NULL;
> }
>
> static bool writeback_throttling_sane(struct scan_control *sc)
> @@ -984,7 +986,7 @@ static enum page_references page_check_references(struct page *page,
> int referenced_ptes, referenced_page;
> unsigned long vm_flags;
>
> - referenced_ptes = page_referenced(page, 1, sc->target_mem_cgroup,
> + referenced_ptes = page_referenced(page, 1, cgroup_reclaim(sc),
> &vm_flags);
> referenced_page = TestClearPageReferenced(page);
>
> @@ -1422,7 +1424,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
> count_vm_event(PGLAZYFREED);
> count_memcg_page_event(page, PGLAZYFREED);
> } else if (!mapping || !__remove_mapping(mapping, page, true,
> - sc->target_mem_cgroup))
> + cgroup_reclaim(sc)))
> goto keep_locked;
>
> unlock_page(page);
> @@ -1907,6 +1909,7 @@ static int current_may_throttle(void)
> enum vm_event_item item;
> struct pglist_data *pgdat = lruvec_pgdat(lruvec);
> struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
> + struct mem_cgroup *target_memcg = cgroup_reclaim(sc);
> bool stalled = false;
>
> while (unlikely(too_many_isolated(pgdat, file, sc))) {
> @@ -1933,7 +1936,7 @@ static int current_may_throttle(void)
> reclaim_stat->recent_scanned[file] += nr_taken;
>
> item = current_is_kswapd() ? PGSCAN_KSWAPD : PGSCAN_DIRECT;
> - if (!cgroup_reclaim(sc))
> + if (!target_memcg)
> __count_vm_events(item, nr_scanned);
> __count_memcg_events(lruvec_memcg(lruvec), item, nr_scanned);
> spin_unlock_irq(&pgdat->lru_lock);
> @@ -1947,7 +1950,7 @@ static int current_may_throttle(void)
> spin_lock_irq(&pgdat->lru_lock);
>
> item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT;
> - if (!cgroup_reclaim(sc))
> + if (!target_memcg)
> __count_vm_events(item, nr_reclaimed);
> __count_memcg_events(lruvec_memcg(lruvec), item, nr_reclaimed);
> reclaim_stat->recent_rotated[0] += stat.nr_activate[0];
> @@ -2041,7 +2044,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
> }
> }
>
> - if (page_referenced(page, 0, sc->target_mem_cgroup,
> + if (page_referenced(page, 0, cgroup_reclaim(sc),
> &vm_flags)) {
> nr_rotated += hpage_nr_pages(page);
> /*
> @@ -2625,7 +2628,7 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
>
> static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
> {
> - struct mem_cgroup *target_memcg = sc->target_mem_cgroup;
> + struct mem_cgroup *target_memcg = cgroup_reclaim(sc);
> struct mem_cgroup *memcg;
>
> memcg = mem_cgroup_iter(target_memcg, NULL, NULL);
> @@ -2686,10 +2689,11 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
> struct reclaim_state *reclaim_state = current->reclaim_state;
> unsigned long nr_reclaimed, nr_scanned;
> struct lruvec *target_lruvec;
> + struct mem_cgroup *target_memcg = cgroup_reclaim(sc);
> bool reclaimable = false;
> unsigned long file;
>
> - target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
> + target_lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
>
> again:
> memset(&sc->nr, 0, sizeof(sc->nr));
> @@ -2744,7 +2748,7 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
> * thrashing file LRU becomes infinitely more attractive than
> * anon pages. Try to detect this based on file LRU size.
> */
> - if (!cgroup_reclaim(sc)) {
> + if (!target_memcg) {
> unsigned long total_high_wmark = 0;
> unsigned long free, anon;
> int z;
> @@ -2782,7 +2786,7 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
> }
>
> /* Record the subtree's reclaim efficiency */
> - vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true,
> + vmpressure(sc->gfp_mask, target_memcg, true,
> sc->nr_scanned - nr_scanned,
> sc->nr_reclaimed - nr_reclaimed);
>
> @@ -2833,7 +2837,7 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
> * stalling in wait_iff_congested().
> */
> if ((current_is_kswapd() ||
> - (cgroup_reclaim(sc) && writeback_throttling_sane(sc))) &&
> + (target_memcg && writeback_throttling_sane(sc))) &&
> sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
> set_bit(LRUVEC_CONGESTED, &target_lruvec->flags);
>
> @@ -3020,14 +3024,15 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
> pg_data_t *last_pgdat;
> struct zoneref *z;
> struct zone *zone;
> + struct mem_cgroup *target_memcg = cgroup_reclaim(sc);
> retry:
> delayacct_freepages_start();
>
> - if (!cgroup_reclaim(sc))
> + if (!target_memcg)
> __count_zid_vm_events(ALLOCSTALL, sc->reclaim_idx, 1);
>
> do {
> - vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
> + vmpressure_prio(sc->gfp_mask, target_memcg,
> sc->priority);
> sc->nr_scanned = 0;
> shrink_zones(zonelist, sc);
> @@ -3053,12 +3058,12 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
> continue;
> last_pgdat = zone->zone_pgdat;
>
> - snapshot_refaults(sc->target_mem_cgroup, zone->zone_pgdat);
> + snapshot_refaults(target_memcg, zone->zone_pgdat);
>
> - if (cgroup_reclaim(sc)) {
> + if (target_memcg) {
> struct lruvec *lruvec;
>
> - lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup,
> + lruvec = mem_cgroup_lruvec(target_memcg,
> zone->zone_pgdat);
> clear_bit(LRUVEC_CONGESTED, &lruvec->flags);
> }
> --
> 1.9.1
--
Michal Hocko
SUSE Labs
prev parent reply other threads:[~2020-03-20 7:43 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-03-20 2:51 qiwuchen55
2020-03-20 7:43 ` Michal Hocko [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200320074316.GB24409@dhcp22.suse.cz \
--to=mhocko@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=chenqiwu@xiaomi.com \
--cc=chris@chrisdown.name \
--cc=linux-mm@kvack.org \
--cc=qiwuchen55@gmail.com \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox