From: Roman Gushchin <roman.gushchin@linux.dev>
To: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Andrew Morton <akpm@linux-foundation.org>,
Johannes Weiner <hannes@cmpxchg.org>,
Michal Hocko <mhocko@kernel.org>,
Muchun Song <muchun.song@linux.dev>,
linux-mm@kvack.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH v2 2/7] memcg: dynamically allocate lruvec_stats
Date: Mon, 29 Apr 2024 08:50:11 -0700 [thread overview]
Message-ID: <Zi_BsznrtoC1N_lq@P9FQF9L96D> (raw)
In-Reply-To: <20240427003733.3898961-3-shakeel.butt@linux.dev>
On Fri, Apr 26, 2024 at 05:37:28PM -0700, Shakeel Butt wrote:
> To decouple the dependency of lruvec_stats on NR_VM_NODE_STAT_ITEMS, we
> need to dynamically allocate lruvec_stats in the mem_cgroup_per_node
> structure. Also move the definition of lruvec_stats_percpu and
> lruvec_stats and related functions to the memcontrol.c to facilitate
> later patches. No functional changes in the patch.
>
> Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
> ---
> include/linux/memcontrol.h | 62 +++------------------------
> mm/memcontrol.c | 87 ++++++++++++++++++++++++++++++++------
> 2 files changed, 81 insertions(+), 68 deletions(-)
>
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 9aba0d0462ca..ab8a6e884375 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -83,6 +83,8 @@ enum mem_cgroup_events_target {
>
> struct memcg_vmstats_percpu;
> struct memcg_vmstats;
> +struct lruvec_stats_percpu;
> +struct lruvec_stats;
>
> struct mem_cgroup_reclaim_iter {
> struct mem_cgroup *position;
> @@ -90,25 +92,6 @@ struct mem_cgroup_reclaim_iter {
> unsigned int generation;
> };
>
> -struct lruvec_stats_percpu {
> - /* Local (CPU and cgroup) state */
> - long state[NR_VM_NODE_STAT_ITEMS];
> -
> - /* Delta calculation for lockless upward propagation */
> - long state_prev[NR_VM_NODE_STAT_ITEMS];
> -};
> -
> -struct lruvec_stats {
> - /* Aggregated (CPU and subtree) state */
> - long state[NR_VM_NODE_STAT_ITEMS];
> -
> - /* Non-hierarchical (CPU aggregated) state */
> - long state_local[NR_VM_NODE_STAT_ITEMS];
> -
> - /* Pending child counts during tree propagation */
> - long state_pending[NR_VM_NODE_STAT_ITEMS];
> -};
> -
> /*
> * per-node information in memory controller.
> */
> @@ -116,7 +99,7 @@ struct mem_cgroup_per_node {
> struct lruvec lruvec;
>
> struct lruvec_stats_percpu __percpu *lruvec_stats_percpu;
> - struct lruvec_stats lruvec_stats;
> + struct lruvec_stats *lruvec_stats;
>
> unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
>
> @@ -1037,42 +1020,9 @@ static inline void mod_memcg_page_state(struct page *page,
> }
>
> unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx);
> -
> -static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
> - enum node_stat_item idx)
> -{
> - struct mem_cgroup_per_node *pn;
> - long x;
> -
> - if (mem_cgroup_disabled())
> - return node_page_state(lruvec_pgdat(lruvec), idx);
> -
> - pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
> - x = READ_ONCE(pn->lruvec_stats.state[idx]);
> -#ifdef CONFIG_SMP
> - if (x < 0)
> - x = 0;
> -#endif
> - return x;
> -}
> -
> -static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
> - enum node_stat_item idx)
> -{
> - struct mem_cgroup_per_node *pn;
> - long x = 0;
> -
> - if (mem_cgroup_disabled())
> - return node_page_state(lruvec_pgdat(lruvec), idx);
> -
> - pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
> - x = READ_ONCE(pn->lruvec_stats.state_local[idx]);
> -#ifdef CONFIG_SMP
> - if (x < 0)
> - x = 0;
> -#endif
> - return x;
> -}
> +unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx);
> +unsigned long lruvec_page_state_local(struct lruvec *lruvec,
> + enum node_stat_item idx);
>
> void mem_cgroup_flush_stats(struct mem_cgroup *memcg);
> void mem_cgroup_flush_stats_ratelimited(struct mem_cgroup *memcg);
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 53769d06053f..5e337ed6c6bf 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -576,6 +576,60 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
> return mz;
> }
>
> +struct lruvec_stats_percpu {
> + /* Local (CPU and cgroup) state */
> + long state[NR_VM_NODE_STAT_ITEMS];
> +
> + /* Delta calculation for lockless upward propagation */
> + long state_prev[NR_VM_NODE_STAT_ITEMS];
> +};
> +
> +struct lruvec_stats {
> + /* Aggregated (CPU and subtree) state */
> + long state[NR_VM_NODE_STAT_ITEMS];
> +
> + /* Non-hierarchical (CPU aggregated) state */
> + long state_local[NR_VM_NODE_STAT_ITEMS];
> +
> + /* Pending child counts during tree propagation */
> + long state_pending[NR_VM_NODE_STAT_ITEMS];
> +};
> +
> +unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx)
> +{
> + struct mem_cgroup_per_node *pn;
> + long x;
> +
> + if (mem_cgroup_disabled())
> + return node_page_state(lruvec_pgdat(lruvec), idx);
> +
> + pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
> + x = READ_ONCE(pn->lruvec_stats->state[idx]);
> +#ifdef CONFIG_SMP
> + if (x < 0)
> + x = 0;
> +#endif
> + return x;
> +}
> +
> +unsigned long lruvec_page_state_local(struct lruvec *lruvec,
> + enum node_stat_item idx)
> +{
> + struct mem_cgroup_per_node *pn;
> + long x = 0;
> +
> + if (mem_cgroup_disabled())
> + return node_page_state(lruvec_pgdat(lruvec), idx);
> +
> + pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
> + x = READ_ONCE(pn->lruvec_stats->state_local[idx]);
> +#ifdef CONFIG_SMP
> + if (x < 0)
> + x = 0;
> +#endif
Not directly related to your change, but do we still need it? And if yes,
do we really care about !CONFIG_SMP case enough to justify these #ifdefs?
> + return x;
> +}
> +
> /* Subset of vm_event_item to report for memcg event stats */
> static const unsigned int memcg_vm_event_stat[] = {
> PGPGIN,
> @@ -5492,18 +5546,25 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
> if (!pn)
> return 1;
>
> + pn->lruvec_stats = kzalloc_node(sizeof(struct lruvec_stats), GFP_KERNEL,
> + node);
Why not GFP_KERNEL_ACCOUNT?
> + if (!pn->lruvec_stats)
> + goto fail;
> +
> pn->lruvec_stats_percpu = alloc_percpu_gfp(struct lruvec_stats_percpu,
> GFP_KERNEL_ACCOUNT);
> - if (!pn->lruvec_stats_percpu) {
> - kfree(pn);
> - return 1;
> - }
> + if (!pn->lruvec_stats_percpu)
> + goto fail;
>
> lruvec_init(&pn->lruvec);
> pn->memcg = memcg;
>
> memcg->nodeinfo[node] = pn;
> return 0;
> +fail:
> + kfree(pn->lruvec_stats);
> + kfree(pn);
> + return 1;
> }
>
> static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
> @@ -5514,6 +5575,7 @@ static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
> return;
>
> free_percpu(pn->lruvec_stats_percpu);
> + kfree(pn->lruvec_stats);
> kfree(pn);
> }
>
> @@ -5866,18 +5928,19 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
>
> for_each_node_state(nid, N_MEMORY) {
> struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid];
> - struct mem_cgroup_per_node *ppn = NULL;
> + struct lruvec_stats *lstats = pn->lruvec_stats;
> + struct lruvec_stats *plstats = NULL;
> struct lruvec_stats_percpu *lstatc;
>
> if (parent)
> - ppn = parent->nodeinfo[nid];
> + plstats = parent->nodeinfo[nid]->lruvec_stats;
>
> lstatc = per_cpu_ptr(pn->lruvec_stats_percpu, cpu);
>
> for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
> - delta = pn->lruvec_stats.state_pending[i];
> + delta = lstats->state_pending[i];
> if (delta)
> - pn->lruvec_stats.state_pending[i] = 0;
> + lstats->state_pending[i] = 0;
>
> delta_cpu = 0;
> v = READ_ONCE(lstatc->state[i]);
> @@ -5888,12 +5951,12 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
> }
>
> if (delta_cpu)
> - pn->lruvec_stats.state_local[i] += delta_cpu;
> + lstats->state_local[i] += delta_cpu;
>
> if (delta) {
> - pn->lruvec_stats.state[i] += delta;
> - if (ppn)
> - ppn->lruvec_stats.state_pending[i] += delta;
> + lstats->state[i] += delta;
> + if (plstats)
> + plstats->state_pending[i] += delta;
> }
> }
> }
> --
> 2.43.0
>
next prev parent reply other threads:[~2024-04-29 15:50 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-04-27 0:37 [PATCH v2 0/7] memcg: reduce memory consumption by memcg stats Shakeel Butt
2024-04-27 0:37 ` [PATCH v2 1/7] memcg: reduce memory size of mem_cgroup_events_index Shakeel Butt
2024-04-27 0:42 ` Yosry Ahmed
2024-04-27 1:15 ` Shakeel Butt
2024-04-29 15:36 ` Roman Gushchin
2024-04-27 0:37 ` [PATCH v2 2/7] memcg: dynamically allocate lruvec_stats Shakeel Butt
2024-04-29 15:50 ` Roman Gushchin [this message]
2024-04-29 19:46 ` Shakeel Butt
2024-04-29 21:02 ` Roman Gushchin
2024-04-29 21:59 ` Shakeel Butt
2024-04-27 0:37 ` [PATCH v2 3/7] memcg: reduce memory for the lruvec and memcg stats Shakeel Butt
2024-04-27 0:51 ` Yosry Ahmed
2024-04-27 1:16 ` Shakeel Butt
2024-04-27 1:18 ` Yosry Ahmed
2024-04-29 16:00 ` Roman Gushchin
2024-04-29 20:00 ` Shakeel Butt
2024-04-29 17:35 ` T.J. Mercier
2024-04-29 20:13 ` Shakeel Butt
2024-04-29 22:23 ` T.J. Mercier
2024-04-27 0:37 ` [PATCH v2 4/7] memcg: cleanup __mod_memcg_lruvec_state Shakeel Butt
2024-04-27 0:53 ` Yosry Ahmed
2024-04-29 15:45 ` Roman Gushchin
2024-04-27 0:37 ` [PATCH v2 5/7] memcg: pr_warn_once for unexpected events and stats Shakeel Butt
2024-04-27 0:58 ` Yosry Ahmed
2024-04-27 1:18 ` Shakeel Butt
2024-04-27 14:22 ` Johannes Weiner
2024-04-29 19:54 ` Shakeel Butt
2024-04-29 16:06 ` Roman Gushchin
2024-04-29 19:56 ` Shakeel Butt
2024-04-27 0:37 ` [PATCH v2 6/7] memcg: use proper type for mod_memcg_state Shakeel Butt
2024-04-27 0:37 ` [PATCH v2 7/7] mm: cleanup WORKINGSET_NODES in workingset Shakeel Butt
2024-04-29 16:07 ` Roman Gushchin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=Zi_BsznrtoC1N_lq@P9FQF9L96D \
--to=roman.gushchin@linux.dev \
--cc=akpm@linux-foundation.org \
--cc=hannes@cmpxchg.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@kernel.org \
--cc=muchun.song@linux.dev \
--cc=shakeel.butt@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox