[PATCH v1 3/3] mm/memcontrol: Add drain_remote_stock(), avoid drain_stock on isolated cpus

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Leonardo Bras <leobras@redhat.com>
To: Ingo Molnar <mingo@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Juri Lelli <juri.lelli@redhat.com>,
	Vincent Guittot <vincent.guittot@linaro.org>,
	Dietmar Eggemann <dietmar.eggemann@arm.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Ben Segall <bsegall@google.com>, Mel Gorman <mgorman@suse.de>,
	Daniel Bristot de Oliveira <bristot@redhat.com>,
	Valentin Schneider <vschneid@redhat.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Michal Hocko <mhocko@kernel.org>,
	Roman Gushchin <roman.gushchin@linux.dev>,
	Shakeel Butt <shakeelb@google.com>,
	Muchun Song <songmuchun@bytedance.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Frederic Weisbecker <frederic@kernel.org>,
	Leonardo Bras <leobras@redhat.com>, Phil Auld <pauld@redhat.com>,
	Marcelo Tosatti <mtosatti@redhat.com>
Cc: linux-kernel@vger.kernel.org, cgroups@vger.kernel.org,
	linux-mm@kvack.org
Subject: [PATCH v1 3/3] mm/memcontrol: Add drain_remote_stock(), avoid drain_stock on isolated cpus
Date: Tue,  1 Nov 2022 23:02:43 -0300	[thread overview]
Message-ID: <20221102020243.522358-4-leobras@redhat.com> (raw)
In-Reply-To: <20221102020243.522358-1-leobras@redhat.com>

When drain_all_stock() is called, some CPUs will be required to have their
per-CPU caches drained. This currently happens by scheduling a call to
drain_local_stock() to run in each affected CPU.

This, as a consequence, may end up scheduling work to CPUs that are
isolated, and therefore should have as little interruption as possible.

In order to avoid this, make drain_all_stock() able to detect isolated CPUs
and schedule draining the perCPU stock to happen in another non-isolated
CPU.

But since the current implementation only allows the drain to happen in
local CPU, implement a function to drain stock on a remote CPU:
drain_remote_stock().

Given both drain_local_stock() and drain_remote_stock() do almost the same
work, implement a inline drain_stock_helper() that is called by both.

Also, since drain_stock() will be able to run on a remote CPU, protect
memcg_hotplug_cpu_dead() with stock_lock.

Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
 mm/memcontrol.c | 47 ++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 38 insertions(+), 9 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index add46da2e6df1..7ad6e4f4b79ef 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -30,6 +30,7 @@
 #include <linux/cgroup.h>
 #include <linux/pagewalk.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/isolation.h>
 #include <linux/shmem_fs.h>
 #include <linux/hugetlb.h>
 #include <linux/pagemap.h>
@@ -2263,7 +2264,7 @@ static void drain_stock(struct memcg_stock_pcp *stock)
 	stock->cached = NULL;
 }
 
-static void drain_local_stock(struct work_struct *dummy)
+static inline void drain_stock_helper(int cpu)
 {
 	struct memcg_stock_pcp *stock;
 	struct obj_cgroup *old = NULL;
@@ -2271,10 +2272,9 @@ static void drain_local_stock(struct work_struct *dummy)
 
 	/*
 	 * The only protection from cpu hotplug (memcg_hotplug_cpu_dead) vs.
-	 * drain_stock races is that we always operate on local CPU stock
-	 * here with IRQ disabled
+	 * drain_stock races is stock_lock, a percpu spinlock.
 	 */
-	stock = this_cpu_ptr(&memcg_stock);
+	stock = per_cpu_ptr(&memcg_stock, cpu);
 	spin_lock_irqsave(&stock->stock_lock, flags);
 
 	old = drain_obj_stock(stock);
@@ -2286,6 +2286,16 @@ static void drain_local_stock(struct work_struct *dummy)
 		obj_cgroup_put(old);
 }
 
+static void drain_remote_stock(struct work_struct *work)
+{
+	drain_stock_helper(atomic_long_read(&work->data));
+}
+
+static void drain_local_stock(struct work_struct *dummy)
+{
+	drain_stock_helper(smp_processor_id());
+}
+
 /*
  * Cache charges(val) to local per_cpu area.
  * This will be consumed by consume_stock() function, later.
@@ -2352,10 +2362,16 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
 
 		if (flush &&
 		    !test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {
-			if (cpu == curcpu)
+			if (cpu == curcpu) {
 				drain_local_stock(&stock->work);
-			else
+			} else if (housekeeping_cpu(cpu, HK_TYPE_WQ)) {
 				schedule_work_on(cpu, &stock->work);
+			} else {
+				int hkcpu = housekeeping_any_cpu_from(HK_TYPE_WQ, cpu);
+
+				atomic_long_set(&stock->work.data, cpu);
+				schedule_work_on(hkcpu, &stock->work);
+			}
 		}
 	}
 	migrate_enable();
@@ -2367,7 +2383,9 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu)
 	struct memcg_stock_pcp *stock;
 
 	stock = &per_cpu(memcg_stock, cpu);
+	spin_lock(&stock->stock_lock);
 	drain_stock(stock);
+	spin_unlock(&stock->stock_lock);
 
 	return 0;
 }
@@ -7272,9 +7290,20 @@ static int __init mem_cgroup_init(void)
 	cpuhp_setup_state_nocalls(CPUHP_MM_MEMCQ_DEAD, "mm/memctrl:dead", NULL,
 				  memcg_hotplug_cpu_dead);
 
-	for_each_possible_cpu(cpu)
-		INIT_WORK(&per_cpu_ptr(&memcg_stock, cpu)->work,
-			  drain_local_stock);
+	/*
+	 * CPUs that are isolated should not spend cpu time for stock draining,
+	 * so allow them to export this task to the nearest housekeeping enabled
+	 * cpu available.
+	 */
+	for_each_possible_cpu(cpu) {
+		if (housekeeping_cpu(cpu, HK_TYPE_WQ)) {
+			INIT_WORK(&per_cpu_ptr(&memcg_stock, cpu)->work,
+				  drain_local_stock);
+		} else {
+			INIT_WORK(&per_cpu_ptr(&memcg_stock, cpu)->work,
+				  drain_remote_stock);
+		}
+	}
 
 	for_each_node(node) {
 		struct mem_cgroup_tree_per_node *rtpn;
-- 
2.38.1

next prev parent reply	other threads:[~2022-11-02  2:03 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-02  2:02 [PATCH v1 0/3] Avoid scheduling cache draining to " Leonardo Bras
2022-11-02  2:02 ` [PATCH v1 1/3] sched/isolation: Add housekeepíng_any_cpu_from() Leonardo Bras
2022-11-02  2:02 ` [PATCH v1 2/3] mm/memcontrol: Change stock_lock type from local_lock_t to spinlock_t Leonardo Bras
2022-11-02  2:02 ` Leonardo Bras [this message]
2022-11-02  8:53 ` [PATCH v1 0/3] Avoid scheduling cache draining to isolated cpus Michal Hocko
2022-11-03 14:59   ` Leonardo Brás
2022-11-03 15:31     ` Michal Hocko
2022-11-03 16:53       ` Leonardo Brás
2022-11-04  8:41         ` Michal Hocko
2022-11-05  1:45           ` Leonardo Brás
2022-11-07  8:10             ` Michal Hocko
2022-11-08 23:09               ` Leonardo Brás
2022-11-09  8:05                 ` Michal Hocko
2023-01-25  7:44                   ` Leonardo Brás

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221102020243.522358-4-leobras@redhat.com \
    --to=leobras@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=bristot@redhat.com \
    --cc=bsegall@google.com \
    --cc=cgroups@vger.kernel.org \
    --cc=dietmar.eggemann@arm.com \
    --cc=frederic@kernel.org \
    --cc=hannes@cmpxchg.org \
    --cc=juri.lelli@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@suse.de \
    --cc=mhocko@kernel.org \
    --cc=mingo@redhat.com \
    --cc=mtosatti@redhat.com \
    --cc=pauld@redhat.com \
    --cc=peterz@infradead.org \
    --cc=roman.gushchin@linux.dev \
    --cc=rostedt@goodmis.org \
    --cc=shakeelb@google.com \
    --cc=songmuchun@bytedance.com \
    --cc=vincent.guittot@linaro.org \
    --cc=vschneid@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox