* [PATCH 0/7] memcg: further decouple v1 code from v2
@ 2024-08-15 5:04 Shakeel Butt
2024-08-15 5:04 ` [PATCH 1/7] memcg: move v1 only percpu stats in separate struct Shakeel Butt
` (6 more replies)
0 siblings, 7 replies; 9+ messages in thread
From: Shakeel Butt @ 2024-08-15 5:04 UTC (permalink / raw)
To: Andrew Morton
Cc: Johannes Weiner, Michal Hocko, Roman Gushchin, Muchun Song,
T . J . Mercier, linux-mm, linux-kernel, Meta kernel team,
cgroups
Some of the v1 code is still in v2 code base due to v1 fields in the
struct memcg_vmstats_percpu. This field decouples those fileds from v2
struct and move all the related code into v1 only code base.
Shakeel Butt (7):
memcg: move v1 only percpu stats in separate struct
memcg: move mem_cgroup_event_ratelimit to v1 code
memcg: move mem_cgroup_charge_statistics to v1 code
memcg: move v1 events and statistics code to v1 file
memcg: make v1 only functions static
memcg: allocate v1 event percpu only on v1 deployment
memcg: make PGPGIN and PGPGOUT v1 only
include/linux/memcontrol.h | 3 +
mm/memcontrol-v1.c | 110 +++++++++++++++++++++++++++++++++++--
mm/memcontrol-v1.h | 24 ++++++--
mm/memcontrol.c | 87 ++++-------------------------
4 files changed, 139 insertions(+), 85 deletions(-)
--
2.43.5
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 1/7] memcg: move v1 only percpu stats in separate struct
2024-08-15 5:04 [PATCH 0/7] memcg: further decouple v1 code from v2 Shakeel Butt
@ 2024-08-15 5:04 ` Shakeel Butt
2024-08-15 19:34 ` Roman Gushchin
2024-08-15 5:04 ` [PATCH 2/7] memcg: move mem_cgroup_event_ratelimit to v1 code Shakeel Butt
` (5 subsequent siblings)
6 siblings, 1 reply; 9+ messages in thread
From: Shakeel Butt @ 2024-08-15 5:04 UTC (permalink / raw)
To: Andrew Morton
Cc: Johannes Weiner, Michal Hocko, Roman Gushchin, Muchun Song,
T . J . Mercier, linux-mm, linux-kernel, Meta kernel team,
cgroups
At the moment struct memcg_vmstats_percpu contains two v1 only fields
which consumes memory even when CONFIG_MEMCG_V1 is not enabled. In
addition there are v1 only functions accessing them and are in the main
memcontrol source file and can not be moved to v1 only source file due
to these fields. Let's move these fields into their own struct. Later
patches will move the functions accessing them to v1 source file and
only allocate these fields when CONFIG_MEMCG_V1 is enabled.
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
---
include/linux/memcontrol.h | 2 ++
mm/memcontrol-v1.h | 19 +++++++++++++++++++
mm/memcontrol.c | 18 +++++++++---------
3 files changed, 30 insertions(+), 9 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 90ecd2dbca06..e21a1541adeb 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -70,6 +70,7 @@ struct mem_cgroup_id {
};
struct memcg_vmstats_percpu;
+struct memcg1_events_percpu;
struct memcg_vmstats;
struct lruvec_stats_percpu;
struct lruvec_stats;
@@ -254,6 +255,7 @@ struct mem_cgroup {
struct list_head objcg_list;
struct memcg_vmstats_percpu __percpu *vmstats_percpu;
+ struct memcg1_events_percpu __percpu *events_percpu;
#ifdef CONFIG_CGROUP_WRITEBACK
struct list_head cgwb_list;
diff --git a/mm/memcontrol-v1.h b/mm/memcontrol-v1.h
index 56d7eaa98274..8feccecf8e2a 100644
--- a/mm/memcontrol-v1.h
+++ b/mm/memcontrol-v1.h
@@ -56,6 +56,12 @@ enum mem_cgroup_events_target {
MEM_CGROUP_NTARGETS,
};
+/* Cgroup1: threshold notifications & softlimit tree updates */
+struct memcg1_events_percpu {
+ unsigned long nr_page_events;
+ unsigned long targets[MEM_CGROUP_NTARGETS];
+};
+
bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
enum mem_cgroup_events_target target);
unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap);
@@ -69,6 +75,19 @@ unsigned long memcg_page_state_output(struct mem_cgroup *memcg, int item);
unsigned long memcg_page_state_local_output(struct mem_cgroup *memcg, int item);
int memory_stat_show(struct seq_file *m, void *v);
+static inline bool memcg1_alloc_events(struct mem_cgroup *memcg)
+{
+ memcg->events_percpu = alloc_percpu_gfp(struct memcg1_events_percpu,
+ GFP_KERNEL_ACCOUNT);
+ return !!memcg->events_percpu;
+}
+
+static inline void memcg1_free_events(struct mem_cgroup *memcg)
+{
+ if (memcg->events_percpu)
+ free_percpu(memcg->events_percpu);
+}
+
/* Cgroup v1-specific declarations */
#ifdef CONFIG_MEMCG_V1
void memcg1_memcg_init(struct mem_cgroup *memcg);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index dacf4fec4541..66d60bb56f91 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -485,10 +485,6 @@ struct memcg_vmstats_percpu {
/* Delta calculation for lockless upward propagation */
long state_prev[MEMCG_VMSTAT_SIZE];
unsigned long events_prev[NR_MEMCG_EVENTS];
-
- /* Cgroup1: threshold notifications & softlimit tree updates */
- unsigned long nr_page_events;
- unsigned long targets[MEM_CGROUP_NTARGETS];
} ____cacheline_aligned;
struct memcg_vmstats {
@@ -865,7 +861,7 @@ void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, int nr_pages)
nr_pages = -nr_pages; /* for event */
}
- __this_cpu_add(memcg->vmstats_percpu->nr_page_events, nr_pages);
+ __this_cpu_add(memcg->events_percpu->nr_page_events, nr_pages);
}
bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
@@ -873,8 +869,8 @@ bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
{
unsigned long val, next;
- val = __this_cpu_read(memcg->vmstats_percpu->nr_page_events);
- next = __this_cpu_read(memcg->vmstats_percpu->targets[target]);
+ val = __this_cpu_read(memcg->events_percpu->nr_page_events);
+ next = __this_cpu_read(memcg->events_percpu->targets[target]);
/* from time_after() in jiffies.h */
if ((long)(next - val) < 0) {
switch (target) {
@@ -887,7 +883,7 @@ bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
default:
break;
}
- __this_cpu_write(memcg->vmstats_percpu->targets[target], next);
+ __this_cpu_write(memcg->events_percpu->targets[target], next);
return true;
}
return false;
@@ -3510,6 +3506,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
for_each_node(node)
free_mem_cgroup_per_node_info(memcg, node);
+ memcg1_free_events(memcg);
kfree(memcg->vmstats);
free_percpu(memcg->vmstats_percpu);
kfree(memcg);
@@ -3549,6 +3546,9 @@ static struct mem_cgroup *mem_cgroup_alloc(struct mem_cgroup *parent)
if (!memcg->vmstats_percpu)
goto fail;
+ if (!memcg1_alloc_events(memcg))
+ goto fail;
+
for_each_possible_cpu(cpu) {
if (parent)
pstatc = per_cpu_ptr(parent->vmstats_percpu, cpu);
@@ -4664,7 +4664,7 @@ static void uncharge_batch(const struct uncharge_gather *ug)
local_irq_save(flags);
__count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout);
- __this_cpu_add(ug->memcg->vmstats_percpu->nr_page_events, ug->nr_memory);
+ __this_cpu_add(ug->memcg->events_percpu->nr_page_events, ug->nr_memory);
memcg1_check_events(ug->memcg, ug->nid);
local_irq_restore(flags);
--
2.43.5
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 2/7] memcg: move mem_cgroup_event_ratelimit to v1 code
2024-08-15 5:04 [PATCH 0/7] memcg: further decouple v1 code from v2 Shakeel Butt
2024-08-15 5:04 ` [PATCH 1/7] memcg: move v1 only percpu stats in separate struct Shakeel Butt
@ 2024-08-15 5:04 ` Shakeel Butt
2024-08-15 5:04 ` [PATCH 3/7] memcg: move mem_cgroup_charge_statistics " Shakeel Butt
` (4 subsequent siblings)
6 siblings, 0 replies; 9+ messages in thread
From: Shakeel Butt @ 2024-08-15 5:04 UTC (permalink / raw)
To: Andrew Morton
Cc: Johannes Weiner, Michal Hocko, Roman Gushchin, Muchun Song,
T . J . Mercier, linux-mm, linux-kernel, Meta kernel team,
cgroups
There are no callers of mem_cgroup_event_ratelimit() in the v2 code.
Move it to v1 only code and rename it to memcg1_event_ratelimit().
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
---
mm/memcontrol-v1.c | 32 ++++++++++++++++++++++++++++++--
mm/memcontrol-v1.h | 2 --
mm/memcontrol.c | 28 ----------------------------
3 files changed, 30 insertions(+), 32 deletions(-)
diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c
index 52aecdae2c28..0ce1807ba468 100644
--- a/mm/memcontrol-v1.c
+++ b/mm/memcontrol-v1.c
@@ -1439,6 +1439,34 @@ static void mem_cgroup_threshold(struct mem_cgroup *memcg)
}
}
+#define THRESHOLDS_EVENTS_TARGET 128
+#define SOFTLIMIT_EVENTS_TARGET 1024
+
+static bool memcg1_event_ratelimit(struct mem_cgroup *memcg,
+ enum mem_cgroup_events_target target)
+{
+ unsigned long val, next;
+
+ val = __this_cpu_read(memcg->events_percpu->nr_page_events);
+ next = __this_cpu_read(memcg->events_percpu->targets[target]);
+ /* from time_after() in jiffies.h */
+ if ((long)(next - val) < 0) {
+ switch (target) {
+ case MEM_CGROUP_TARGET_THRESH:
+ next = val + THRESHOLDS_EVENTS_TARGET;
+ break;
+ case MEM_CGROUP_TARGET_SOFTLIMIT:
+ next = val + SOFTLIMIT_EVENTS_TARGET;
+ break;
+ default:
+ break;
+ }
+ __this_cpu_write(memcg->events_percpu->targets[target], next);
+ return true;
+ }
+ return false;
+}
+
/*
* Check events in order.
*
@@ -1449,11 +1477,11 @@ void memcg1_check_events(struct mem_cgroup *memcg, int nid)
return;
/* threshold event is triggered in finer grain than soft limit */
- if (unlikely(mem_cgroup_event_ratelimit(memcg,
+ if (unlikely(memcg1_event_ratelimit(memcg,
MEM_CGROUP_TARGET_THRESH))) {
bool do_softlimit;
- do_softlimit = mem_cgroup_event_ratelimit(memcg,
+ do_softlimit = memcg1_event_ratelimit(memcg,
MEM_CGROUP_TARGET_SOFTLIMIT);
mem_cgroup_threshold(memcg);
if (unlikely(do_softlimit))
diff --git a/mm/memcontrol-v1.h b/mm/memcontrol-v1.h
index 8feccecf8e2a..fb7d439f19de 100644
--- a/mm/memcontrol-v1.h
+++ b/mm/memcontrol-v1.h
@@ -62,8 +62,6 @@ struct memcg1_events_percpu {
unsigned long targets[MEM_CGROUP_NTARGETS];
};
-bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
- enum mem_cgroup_events_target target);
unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap);
void drain_all_stock(struct mem_cgroup *root_memcg);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 66d60bb56f91..7ea511119567 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -95,9 +95,6 @@ static bool cgroup_memory_nobpf __ro_after_init;
static DECLARE_WAIT_QUEUE_HEAD(memcg_cgwb_frn_waitq);
#endif
-#define THRESHOLDS_EVENTS_TARGET 128
-#define SOFTLIMIT_EVENTS_TARGET 1024
-
static inline bool task_is_dying(void)
{
return tsk_is_oom_victim(current) || fatal_signal_pending(current) ||
@@ -864,31 +861,6 @@ void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, int nr_pages)
__this_cpu_add(memcg->events_percpu->nr_page_events, nr_pages);
}
-bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
- enum mem_cgroup_events_target target)
-{
- unsigned long val, next;
-
- val = __this_cpu_read(memcg->events_percpu->nr_page_events);
- next = __this_cpu_read(memcg->events_percpu->targets[target]);
- /* from time_after() in jiffies.h */
- if ((long)(next - val) < 0) {
- switch (target) {
- case MEM_CGROUP_TARGET_THRESH:
- next = val + THRESHOLDS_EVENTS_TARGET;
- break;
- case MEM_CGROUP_TARGET_SOFTLIMIT:
- next = val + SOFTLIMIT_EVENTS_TARGET;
- break;
- default:
- break;
- }
- __this_cpu_write(memcg->events_percpu->targets[target], next);
- return true;
- }
- return false;
-}
-
struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
{
/*
--
2.43.5
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 3/7] memcg: move mem_cgroup_charge_statistics to v1 code
2024-08-15 5:04 [PATCH 0/7] memcg: further decouple v1 code from v2 Shakeel Butt
2024-08-15 5:04 ` [PATCH 1/7] memcg: move v1 only percpu stats in separate struct Shakeel Butt
2024-08-15 5:04 ` [PATCH 2/7] memcg: move mem_cgroup_event_ratelimit to v1 code Shakeel Butt
@ 2024-08-15 5:04 ` Shakeel Butt
2024-08-15 5:04 ` [PATCH 4/7] memcg: move v1 events and statistics code to v1 file Shakeel Butt
` (3 subsequent siblings)
6 siblings, 0 replies; 9+ messages in thread
From: Shakeel Butt @ 2024-08-15 5:04 UTC (permalink / raw)
To: Andrew Morton
Cc: Johannes Weiner, Michal Hocko, Roman Gushchin, Muchun Song,
T . J . Mercier, linux-mm, linux-kernel, Meta kernel team,
cgroups
There are no callers of mem_cgroup_charge_statistics() in the v2 code
base, so move it to the v1 only code and rename it to
memcg1_charge_statistics().
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
---
mm/memcontrol-v1.c | 17 +++++++++++++++--
mm/memcontrol-v1.h | 3 ++-
mm/memcontrol.c | 19 +++----------------
3 files changed, 20 insertions(+), 19 deletions(-)
diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c
index 0ce1807ba468..73587e6417c5 100644
--- a/mm/memcontrol-v1.c
+++ b/mm/memcontrol-v1.c
@@ -853,9 +853,9 @@ static int mem_cgroup_move_account(struct folio *folio,
nid = folio_nid(folio);
local_irq_disable();
- mem_cgroup_charge_statistics(to, nr_pages);
+ memcg1_charge_statistics(to, nr_pages);
memcg1_check_events(to, nid);
- mem_cgroup_charge_statistics(from, -nr_pages);
+ memcg1_charge_statistics(from, -nr_pages);
memcg1_check_events(from, nid);
local_irq_enable();
out:
@@ -1439,6 +1439,19 @@ static void mem_cgroup_threshold(struct mem_cgroup *memcg)
}
}
+void memcg1_charge_statistics(struct mem_cgroup *memcg, int nr_pages)
+{
+ /* pagein of a big page is an event. So, ignore page size */
+ if (nr_pages > 0)
+ __count_memcg_events(memcg, PGPGIN, 1);
+ else {
+ __count_memcg_events(memcg, PGPGOUT, 1);
+ nr_pages = -nr_pages; /* for event */
+ }
+
+ __this_cpu_add(memcg->events_percpu->nr_page_events, nr_pages);
+}
+
#define THRESHOLDS_EVENTS_TARGET 128
#define SOFTLIMIT_EVENTS_TARGET 1024
diff --git a/mm/memcontrol-v1.h b/mm/memcontrol-v1.h
index fb7d439f19de..ef72d0b7c5c6 100644
--- a/mm/memcontrol-v1.h
+++ b/mm/memcontrol-v1.h
@@ -7,7 +7,6 @@
/* Cgroup v1 and v2 common declarations */
-void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, int nr_pages);
int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
unsigned int nr_pages);
@@ -116,6 +115,7 @@ bool memcg1_oom_prepare(struct mem_cgroup *memcg, bool *locked);
void memcg1_oom_finish(struct mem_cgroup *memcg, bool locked);
void memcg1_oom_recover(struct mem_cgroup *memcg);
+void memcg1_charge_statistics(struct mem_cgroup *memcg, int nr_pages);
void memcg1_check_events(struct mem_cgroup *memcg, int nid);
void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s);
@@ -147,6 +147,7 @@ static inline bool memcg1_oom_prepare(struct mem_cgroup *memcg, bool *locked) {
static inline void memcg1_oom_finish(struct mem_cgroup *memcg, bool locked) {}
static inline void memcg1_oom_recover(struct mem_cgroup *memcg) {}
+static inline void memcg1_charge_statistics(struct mem_cgroup *memcg, int nr_pages) {}
static inline void memcg1_check_events(struct mem_cgroup *memcg, int nid) {}
static inline void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) {}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7ea511119567..f8db9924d5dc 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -848,19 +848,6 @@ unsigned long memcg_events_local(struct mem_cgroup *memcg, int event)
return READ_ONCE(memcg->vmstats->events_local[i]);
}
-void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, int nr_pages)
-{
- /* pagein of a big page is an event. So, ignore page size */
- if (nr_pages > 0)
- __count_memcg_events(memcg, PGPGIN, 1);
- else {
- __count_memcg_events(memcg, PGPGOUT, 1);
- nr_pages = -nr_pages; /* for event */
- }
-
- __this_cpu_add(memcg->events_percpu->nr_page_events, nr_pages);
-}
-
struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
{
/*
@@ -2398,7 +2385,7 @@ void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg)
commit_charge(folio, memcg);
local_irq_disable();
- mem_cgroup_charge_statistics(memcg, folio_nr_pages(folio));
+ memcg1_charge_statistics(memcg, folio_nr_pages(folio));
memcg1_check_events(memcg, folio_nid(folio));
local_irq_enable();
}
@@ -4775,7 +4762,7 @@ void mem_cgroup_replace_folio(struct folio *old, struct folio *new)
commit_charge(new, memcg);
local_irq_save(flags);
- mem_cgroup_charge_statistics(memcg, nr_pages);
+ memcg1_charge_statistics(memcg, nr_pages);
memcg1_check_events(memcg, folio_nid(new));
local_irq_restore(flags);
}
@@ -5020,7 +5007,7 @@ void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry)
* only synchronisation we have for updating the per-CPU variables.
*/
memcg_stats_lock();
- mem_cgroup_charge_statistics(memcg, -nr_entries);
+ memcg1_charge_statistics(memcg, -nr_entries);
memcg_stats_unlock();
memcg1_check_events(memcg, folio_nid(folio));
--
2.43.5
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 4/7] memcg: move v1 events and statistics code to v1 file
2024-08-15 5:04 [PATCH 0/7] memcg: further decouple v1 code from v2 Shakeel Butt
` (2 preceding siblings ...)
2024-08-15 5:04 ` [PATCH 3/7] memcg: move mem_cgroup_charge_statistics " Shakeel Butt
@ 2024-08-15 5:04 ` Shakeel Butt
2024-08-15 5:04 ` [PATCH 5/7] memcg: make v1 only functions static Shakeel Butt
` (2 subsequent siblings)
6 siblings, 0 replies; 9+ messages in thread
From: Shakeel Butt @ 2024-08-15 5:04 UTC (permalink / raw)
To: Andrew Morton
Cc: Johannes Weiner, Michal Hocko, Roman Gushchin, Muchun Song,
T . J . Mercier, linux-mm, linux-kernel, Meta kernel team,
cgroups
Currently the common code path for charge commit, swapout and batched
uncharge are executing v1 only code which is completely useless for the
v2 deployments where CONFIG_MEMCG_V1 is disabled. In addition, it is
mucking with IRQs which might be slow on some architectures. Let's move
all of this code to v1 only code and remove them from v2 only
deployments.
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
---
mm/memcontrol-v1.c | 37 +++++++++++++++++++++++++++++++++++++
mm/memcontrol-v1.h | 14 ++++++++++++++
mm/memcontrol.c | 33 ++++-----------------------------
3 files changed, 55 insertions(+), 29 deletions(-)
diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c
index 73587e6417c5..ffb7246b3f35 100644
--- a/mm/memcontrol-v1.c
+++ b/mm/memcontrol-v1.c
@@ -1502,6 +1502,43 @@ void memcg1_check_events(struct mem_cgroup *memcg, int nid)
}
}
+void memcg1_commit_charge(struct folio *folio, struct mem_cgroup *memcg)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ memcg1_charge_statistics(memcg, folio_nr_pages(folio));
+ memcg1_check_events(memcg, folio_nid(folio));
+ local_irq_restore(flags);
+}
+
+void memcg1_swapout(struct folio *folio, struct mem_cgroup *memcg)
+{
+ /*
+ * Interrupts should be disabled here because the caller holds the
+ * i_pages lock which is taken with interrupts-off. It is
+ * important here to have the interrupts disabled because it is the
+ * only synchronisation we have for updating the per-CPU variables.
+ */
+ preempt_disable_nested();
+ VM_WARN_ON_IRQS_ENABLED();
+ memcg1_charge_statistics(memcg, -folio_nr_pages(folio));
+ preempt_enable_nested();
+ memcg1_check_events(memcg, folio_nid(folio));
+}
+
+void memcg1_uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
+ unsigned long nr_memory, int nid)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __count_memcg_events(memcg, PGPGOUT, pgpgout);
+ __this_cpu_add(memcg->events_percpu->nr_page_events, nr_memory);
+ memcg1_check_events(memcg, nid);
+ local_irq_restore(flags);
+}
+
static int compare_thresholds(const void *a, const void *b)
{
const struct mem_cgroup_threshold *_a = a;
diff --git a/mm/memcontrol-v1.h b/mm/memcontrol-v1.h
index ef72d0b7c5c6..376d021a2bf4 100644
--- a/mm/memcontrol-v1.h
+++ b/mm/memcontrol-v1.h
@@ -118,6 +118,11 @@ void memcg1_oom_recover(struct mem_cgroup *memcg);
void memcg1_charge_statistics(struct mem_cgroup *memcg, int nr_pages);
void memcg1_check_events(struct mem_cgroup *memcg, int nid);
+void memcg1_commit_charge(struct folio *folio, struct mem_cgroup *memcg);
+void memcg1_swapout(struct folio *folio, struct mem_cgroup *memcg);
+void memcg1_uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
+ unsigned long nr_memory, int nid);
+
void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s);
void memcg1_account_kmem(struct mem_cgroup *memcg, int nr_pages);
@@ -150,6 +155,15 @@ static inline void memcg1_oom_recover(struct mem_cgroup *memcg) {}
static inline void memcg1_charge_statistics(struct mem_cgroup *memcg, int nr_pages) {}
static inline void memcg1_check_events(struct mem_cgroup *memcg, int nid) {}
+static inline void memcg1_commit_charge(struct folio *folio,
+ struct mem_cgroup *memcg) {}
+
+static inline void memcg1_swapout(struct folio *folio, struct mem_cgroup *memcg) {}
+
+static inline void memcg1_uncharge_batch(struct mem_cgroup *memcg,
+ unsigned long pgpgout,
+ unsigned long nr_memory, int nid) {}
+
static inline void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) {}
static inline void memcg1_account_kmem(struct mem_cgroup *memcg, int nr_pages) {}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f8db9924d5dc..c4b06f26ccfd 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2383,11 +2383,7 @@ void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg)
{
css_get(&memcg->css);
commit_charge(folio, memcg);
-
- local_irq_disable();
- memcg1_charge_statistics(memcg, folio_nr_pages(folio));
- memcg1_check_events(memcg, folio_nid(folio));
- local_irq_enable();
+ memcg1_commit_charge(folio, memcg);
}
static inline void __mod_objcg_mlstate(struct obj_cgroup *objcg,
@@ -4608,8 +4604,6 @@ static inline void uncharge_gather_clear(struct uncharge_gather *ug)
static void uncharge_batch(const struct uncharge_gather *ug)
{
- unsigned long flags;
-
if (ug->nr_memory) {
page_counter_uncharge(&ug->memcg->memory, ug->nr_memory);
if (do_memsw_account())
@@ -4621,11 +4615,7 @@ static void uncharge_batch(const struct uncharge_gather *ug)
memcg1_oom_recover(ug->memcg);
}
- local_irq_save(flags);
- __count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout);
- __this_cpu_add(ug->memcg->events_percpu->nr_page_events, ug->nr_memory);
- memcg1_check_events(ug->memcg, ug->nid);
- local_irq_restore(flags);
+ memcg1_uncharge_batch(ug->memcg, ug->pgpgout, ug->nr_memory, ug->nid);
/* drop reference from uncharge_folio */
css_put(&ug->memcg->css);
@@ -4732,7 +4722,6 @@ void mem_cgroup_replace_folio(struct folio *old, struct folio *new)
{
struct mem_cgroup *memcg;
long nr_pages = folio_nr_pages(new);
- unsigned long flags;
VM_BUG_ON_FOLIO(!folio_test_locked(old), old);
VM_BUG_ON_FOLIO(!folio_test_locked(new), new);
@@ -4760,11 +4749,7 @@ void mem_cgroup_replace_folio(struct folio *old, struct folio *new)
css_get(&memcg->css);
commit_charge(new, memcg);
-
- local_irq_save(flags);
- memcg1_charge_statistics(memcg, nr_pages);
- memcg1_check_events(memcg, folio_nid(new));
- local_irq_restore(flags);
+ memcg1_commit_charge(new, memcg);
}
/**
@@ -5000,17 +4985,7 @@ void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry)
page_counter_uncharge(&memcg->memsw, nr_entries);
}
- /*
- * Interrupts should be disabled here because the caller holds the
- * i_pages lock which is taken with interrupts-off. It is
- * important here to have the interrupts disabled because it is the
- * only synchronisation we have for updating the per-CPU variables.
- */
- memcg_stats_lock();
- memcg1_charge_statistics(memcg, -nr_entries);
- memcg_stats_unlock();
- memcg1_check_events(memcg, folio_nid(folio));
-
+ memcg1_swapout(folio, memcg);
css_put(&memcg->css);
}
--
2.43.5
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 5/7] memcg: make v1 only functions static
2024-08-15 5:04 [PATCH 0/7] memcg: further decouple v1 code from v2 Shakeel Butt
` (3 preceding siblings ...)
2024-08-15 5:04 ` [PATCH 4/7] memcg: move v1 events and statistics code to v1 file Shakeel Butt
@ 2024-08-15 5:04 ` Shakeel Butt
2024-08-15 5:04 ` [PATCH 6/7] memcg: allocate v1 event percpu only on v1 deployment Shakeel Butt
2024-08-15 5:04 ` [PATCH 7/7] memcg: make PGPGIN and PGPGOUT v1 only Shakeel Butt
6 siblings, 0 replies; 9+ messages in thread
From: Shakeel Butt @ 2024-08-15 5:04 UTC (permalink / raw)
To: Andrew Morton
Cc: Johannes Weiner, Michal Hocko, Roman Gushchin, Muchun Song,
T . J . Mercier, linux-mm, linux-kernel, Meta kernel team,
cgroups
The functions memcg1_charge_statistics() and memcg1_check_events() are
never used outside of v1 source file. So, make them static.
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
---
mm/memcontrol-v1.c | 7 +++++--
mm/memcontrol-v1.h | 6 ------
2 files changed, 5 insertions(+), 8 deletions(-)
diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c
index ffb7246b3f35..0589d08c1599 100644
--- a/mm/memcontrol-v1.c
+++ b/mm/memcontrol-v1.c
@@ -742,6 +742,9 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
return folio_file_page(folio, index);
}
+static void memcg1_check_events(struct mem_cgroup *memcg, int nid);
+static void memcg1_charge_statistics(struct mem_cgroup *memcg, int nr_pages);
+
/**
* mem_cgroup_move_account - move account of the folio
* @folio: The folio.
@@ -1439,7 +1442,7 @@ static void mem_cgroup_threshold(struct mem_cgroup *memcg)
}
}
-void memcg1_charge_statistics(struct mem_cgroup *memcg, int nr_pages)
+static void memcg1_charge_statistics(struct mem_cgroup *memcg, int nr_pages)
{
/* pagein of a big page is an event. So, ignore page size */
if (nr_pages > 0)
@@ -1484,7 +1487,7 @@ static bool memcg1_event_ratelimit(struct mem_cgroup *memcg,
* Check events in order.
*
*/
-void memcg1_check_events(struct mem_cgroup *memcg, int nid)
+static void memcg1_check_events(struct mem_cgroup *memcg, int nid)
{
if (IS_ENABLED(CONFIG_PREEMPT_RT))
return;
diff --git a/mm/memcontrol-v1.h b/mm/memcontrol-v1.h
index 376d021a2bf4..0a9f3f9c2362 100644
--- a/mm/memcontrol-v1.h
+++ b/mm/memcontrol-v1.h
@@ -115,9 +115,6 @@ bool memcg1_oom_prepare(struct mem_cgroup *memcg, bool *locked);
void memcg1_oom_finish(struct mem_cgroup *memcg, bool locked);
void memcg1_oom_recover(struct mem_cgroup *memcg);
-void memcg1_charge_statistics(struct mem_cgroup *memcg, int nr_pages);
-void memcg1_check_events(struct mem_cgroup *memcg, int nid);
-
void memcg1_commit_charge(struct folio *folio, struct mem_cgroup *memcg);
void memcg1_swapout(struct folio *folio, struct mem_cgroup *memcg);
void memcg1_uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
@@ -152,9 +149,6 @@ static inline bool memcg1_oom_prepare(struct mem_cgroup *memcg, bool *locked) {
static inline void memcg1_oom_finish(struct mem_cgroup *memcg, bool locked) {}
static inline void memcg1_oom_recover(struct mem_cgroup *memcg) {}
-static inline void memcg1_charge_statistics(struct mem_cgroup *memcg, int nr_pages) {}
-static inline void memcg1_check_events(struct mem_cgroup *memcg, int nid) {}
-
static inline void memcg1_commit_charge(struct folio *folio,
struct mem_cgroup *memcg) {}
--
2.43.5
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 6/7] memcg: allocate v1 event percpu only on v1 deployment
2024-08-15 5:04 [PATCH 0/7] memcg: further decouple v1 code from v2 Shakeel Butt
` (4 preceding siblings ...)
2024-08-15 5:04 ` [PATCH 5/7] memcg: make v1 only functions static Shakeel Butt
@ 2024-08-15 5:04 ` Shakeel Butt
2024-08-15 5:04 ` [PATCH 7/7] memcg: make PGPGIN and PGPGOUT v1 only Shakeel Butt
6 siblings, 0 replies; 9+ messages in thread
From: Shakeel Butt @ 2024-08-15 5:04 UTC (permalink / raw)
To: Andrew Morton
Cc: Johannes Weiner, Michal Hocko, Roman Gushchin, Muchun Song,
T . J . Mercier, linux-mm, linux-kernel, Meta kernel team,
cgroups
Currently memcg->events_percpu gets allocated on v2 deployments. Let's
move the allocation to v1 only codebase. This is not needed in v2.
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
---
include/linux/memcontrol.h | 3 ++-
mm/memcontrol-v1.c | 19 +++++++++++++++++++
mm/memcontrol-v1.h | 26 +++++++-------------------
3 files changed, 28 insertions(+), 20 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e21a1541adeb..1f86d01d3b97 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -255,7 +255,6 @@ struct mem_cgroup {
struct list_head objcg_list;
struct memcg_vmstats_percpu __percpu *vmstats_percpu;
- struct memcg1_events_percpu __percpu *events_percpu;
#ifdef CONFIG_CGROUP_WRITEBACK
struct list_head cgwb_list;
@@ -277,6 +276,8 @@ struct mem_cgroup {
struct page_counter kmem; /* v1 only */
struct page_counter tcpmem; /* v1 only */
+ struct memcg1_events_percpu __percpu *events_percpu;
+
unsigned long soft_limit;
/* protected by memcg_oom_lock */
diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c
index 0589d08c1599..81d8819f13cd 100644
--- a/mm/memcontrol-v1.c
+++ b/mm/memcontrol-v1.c
@@ -1442,6 +1442,12 @@ static void mem_cgroup_threshold(struct mem_cgroup *memcg)
}
}
+/* Cgroup1: threshold notifications & softlimit tree updates */
+struct memcg1_events_percpu {
+ unsigned long nr_page_events;
+ unsigned long targets[MEM_CGROUP_NTARGETS];
+};
+
static void memcg1_charge_statistics(struct mem_cgroup *memcg, int nr_pages)
{
/* pagein of a big page is an event. So, ignore page size */
@@ -3049,6 +3055,19 @@ bool memcg1_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages,
return false;
}
+bool memcg1_alloc_events(struct mem_cgroup *memcg)
+{
+ memcg->events_percpu = alloc_percpu_gfp(struct memcg1_events_percpu,
+ GFP_KERNEL_ACCOUNT);
+ return !!memcg->events_percpu;
+}
+
+void memcg1_free_events(struct mem_cgroup *memcg)
+{
+ if (memcg->events_percpu)
+ free_percpu(memcg->events_percpu);
+}
+
static int __init memcg1_init(void)
{
int node;
diff --git a/mm/memcontrol-v1.h b/mm/memcontrol-v1.h
index 0a9f3f9c2362..3bb8b3030e61 100644
--- a/mm/memcontrol-v1.h
+++ b/mm/memcontrol-v1.h
@@ -55,12 +55,6 @@ enum mem_cgroup_events_target {
MEM_CGROUP_NTARGETS,
};
-/* Cgroup1: threshold notifications & softlimit tree updates */
-struct memcg1_events_percpu {
- unsigned long nr_page_events;
- unsigned long targets[MEM_CGROUP_NTARGETS];
-};
-
unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap);
void drain_all_stock(struct mem_cgroup *root_memcg);
@@ -72,21 +66,12 @@ unsigned long memcg_page_state_output(struct mem_cgroup *memcg, int item);
unsigned long memcg_page_state_local_output(struct mem_cgroup *memcg, int item);
int memory_stat_show(struct seq_file *m, void *v);
-static inline bool memcg1_alloc_events(struct mem_cgroup *memcg)
-{
- memcg->events_percpu = alloc_percpu_gfp(struct memcg1_events_percpu,
- GFP_KERNEL_ACCOUNT);
- return !!memcg->events_percpu;
-}
-
-static inline void memcg1_free_events(struct mem_cgroup *memcg)
-{
- if (memcg->events_percpu)
- free_percpu(memcg->events_percpu);
-}
-
/* Cgroup v1-specific declarations */
#ifdef CONFIG_MEMCG_V1
+
+bool memcg1_alloc_events(struct mem_cgroup *memcg);
+void memcg1_free_events(struct mem_cgroup *memcg);
+
void memcg1_memcg_init(struct mem_cgroup *memcg);
void memcg1_remove_from_trees(struct mem_cgroup *memcg);
@@ -139,6 +124,9 @@ extern struct cftype mem_cgroup_legacy_files[];
#else /* CONFIG_MEMCG_V1 */
+static inline bool memcg1_alloc_events(struct mem_cgroup *memcg) { return true; }
+static inline void memcg1_free_events(struct mem_cgroup *memcg) {}
+
static inline void memcg1_memcg_init(struct mem_cgroup *memcg) {}
static inline void memcg1_remove_from_trees(struct mem_cgroup *memcg) {}
static inline void memcg1_soft_limit_reset(struct mem_cgroup *memcg) {}
--
2.43.5
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 7/7] memcg: make PGPGIN and PGPGOUT v1 only
2024-08-15 5:04 [PATCH 0/7] memcg: further decouple v1 code from v2 Shakeel Butt
` (5 preceding siblings ...)
2024-08-15 5:04 ` [PATCH 6/7] memcg: allocate v1 event percpu only on v1 deployment Shakeel Butt
@ 2024-08-15 5:04 ` Shakeel Butt
6 siblings, 0 replies; 9+ messages in thread
From: Shakeel Butt @ 2024-08-15 5:04 UTC (permalink / raw)
To: Andrew Morton
Cc: Johannes Weiner, Michal Hocko, Roman Gushchin, Muchun Song,
T . J . Mercier, linux-mm, linux-kernel, Meta kernel team,
cgroups
Currently PGPGIN and PGPGOUT are used and exposed in the memcg v1 only
code. So, let's put them under CONFIG_MEMCG_V1.
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
---
mm/memcontrol.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c4b06f26ccfd..9932074c617a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -411,8 +411,10 @@ unsigned long lruvec_page_state_local(struct lruvec *lruvec,
/* Subset of vm_event_item to report for memcg event stats */
static const unsigned int memcg_vm_event_stat[] = {
+#ifdef CONFIG_MEMCG_V1
PGPGIN,
PGPGOUT,
+#endif
PGSCAN_KSWAPD,
PGSCAN_DIRECT,
PGSCAN_KHUGEPAGED,
@@ -1461,10 +1463,11 @@ static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
memcg_events(memcg, PGSTEAL_KHUGEPAGED));
for (i = 0; i < ARRAY_SIZE(memcg_vm_event_stat); i++) {
+#ifdef CONFIG_MEMCG_V1
if (memcg_vm_event_stat[i] == PGPGIN ||
memcg_vm_event_stat[i] == PGPGOUT)
continue;
-
+#endif
seq_buf_printf(s, "%s %lu\n",
vm_event_name(memcg_vm_event_stat[i]),
memcg_events(memcg, memcg_vm_event_stat[i]));
--
2.43.5
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/7] memcg: move v1 only percpu stats in separate struct
2024-08-15 5:04 ` [PATCH 1/7] memcg: move v1 only percpu stats in separate struct Shakeel Butt
@ 2024-08-15 19:34 ` Roman Gushchin
0 siblings, 0 replies; 9+ messages in thread
From: Roman Gushchin @ 2024-08-15 19:34 UTC (permalink / raw)
To: Shakeel Butt
Cc: Andrew Morton, Johannes Weiner, Michal Hocko, Muchun Song,
T . J . Mercier, linux-mm, linux-kernel, Meta kernel team,
cgroups
On Wed, Aug 14, 2024 at 10:04:47PM -0700, Shakeel Butt wrote:
> At the moment struct memcg_vmstats_percpu contains two v1 only fields
> which consumes memory even when CONFIG_MEMCG_V1 is not enabled. In
> addition there are v1 only functions accessing them and are in the main
> memcontrol source file and can not be moved to v1 only source file due
> to these fields. Let's move these fields into their own struct. Later
> patches will move the functions accessing them to v1 source file and
> only allocate these fields when CONFIG_MEMCG_V1 is enabled.
>
> Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
> ---
> include/linux/memcontrol.h | 2 ++
> mm/memcontrol-v1.h | 19 +++++++++++++++++++
> mm/memcontrol.c | 18 +++++++++---------
> 3 files changed, 30 insertions(+), 9 deletions(-)
>
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 90ecd2dbca06..e21a1541adeb 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -70,6 +70,7 @@ struct mem_cgroup_id {
> };
>
> struct memcg_vmstats_percpu;
> +struct memcg1_events_percpu;
> struct memcg_vmstats;
> struct lruvec_stats_percpu;
> struct lruvec_stats;
> @@ -254,6 +255,7 @@ struct mem_cgroup {
> struct list_head objcg_list;
>
> struct memcg_vmstats_percpu __percpu *vmstats_percpu;
> + struct memcg1_events_percpu __percpu *events_percpu;
It wasn't really obvious until the patch [6/7] why it's not
under CONFIG_MEMCG_V1, but otherwise the series looks great to me.
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
for the whole series.
Thank you!
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2024-08-15 19:34 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-08-15 5:04 [PATCH 0/7] memcg: further decouple v1 code from v2 Shakeel Butt
2024-08-15 5:04 ` [PATCH 1/7] memcg: move v1 only percpu stats in separate struct Shakeel Butt
2024-08-15 19:34 ` Roman Gushchin
2024-08-15 5:04 ` [PATCH 2/7] memcg: move mem_cgroup_event_ratelimit to v1 code Shakeel Butt
2024-08-15 5:04 ` [PATCH 3/7] memcg: move mem_cgroup_charge_statistics " Shakeel Butt
2024-08-15 5:04 ` [PATCH 4/7] memcg: move v1 events and statistics code to v1 file Shakeel Butt
2024-08-15 5:04 ` [PATCH 5/7] memcg: make v1 only functions static Shakeel Butt
2024-08-15 5:04 ` [PATCH 6/7] memcg: allocate v1 event percpu only on v1 deployment Shakeel Butt
2024-08-15 5:04 ` [PATCH 7/7] memcg: make PGPGIN and PGPGOUT v1 only Shakeel Butt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox