linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Jesper Dangaard Brouer <hawk@kernel.org>
To: tj@kernel.org, cgroups@vger.kernel.org, yosryahmed@google.com,
	shakeel.butt@linux.dev
Cc: Jesper Dangaard Brouer <hawk@kernel.org>,
	hannes@cmpxchg.org, lizefan.x@bytedance.com, longman@redhat.com,
	kernel-team@cloudflare.com, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org
Subject: [PATCH V7 2/2 RFC] cgroup/rstat: add tracepoint for ongoing flusher waits
Date: Thu, 11 Jul 2024 15:29:04 +0200	[thread overview]
Message-ID: <172070452878.2992819.10548676901200594081.stgit@firesoul> (raw)
In-Reply-To: <172070450139.2992819.13210624094367257881.stgit@firesoul>

I'll be using this tracepoint in production and will
report back on findings, e.g. measuring how often the
race for ongoing flusher happens. Tthen we can decide
if it is worth to keep/apply this patch.

Signed-off-by: Jesper Dangaard Brouer <hawk@kernel.org>
---
 include/trace/events/cgroup.h |   49 +++++++++++++++++++++++++++++++++++++++++
 kernel/cgroup/rstat.c         |   15 ++++++++++---
 2 files changed, 61 insertions(+), 3 deletions(-)

diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h
index af2755bda6eb..c8d84e9a08dc 100644
--- a/include/trace/events/cgroup.h
+++ b/include/trace/events/cgroup.h
@@ -296,6 +296,55 @@ DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_unlock_fastpath,
 	TP_ARGS(cgrp, cpu, contended)
 );
 
+DECLARE_EVENT_CLASS(cgroup_ongoing,
+
+	TP_PROTO(struct cgroup *cgrp, struct cgroup *cgrp_ongoing, \
+		 long res, unsigned int race, ktime_t ts),
+
+	TP_ARGS(cgrp, cgrp_ongoing, res, race, ts),
+
+	TP_STRUCT__entry(
+		__field(	int,		root			)
+		__field(	int,		level			)
+		__field(	u64,		id			)
+		__field(	u64,		id_ongoing		)
+		__field(	ktime_t,	ts			)
+		__field(	long,		res			)
+		__field(	u64,		race			)
+	),
+
+	TP_fast_assign(
+		__entry->root = cgrp->root->hierarchy_id;
+		__entry->id = cgroup_id(cgrp);
+		__entry->level = cgrp->level;
+		__entry->id_ongoing = cgroup_id(cgrp_ongoing);
+		__entry->res = res;
+		__entry->race = race;
+		__entry->ts = ts;
+	),
+
+	TP_printk("root=%d id=%llu level=%d ongoing_flusher=%llu res=%ld race=%llu ts=%lld",
+		  __entry->root, __entry->id, __entry->level,
+		  __entry->id_ongoing, __entry->res, __entry->race, __entry->ts)
+);
+
+DEFINE_EVENT(cgroup_ongoing, cgroup_ongoing_flusher,
+
+	TP_PROTO(struct cgroup *cgrp, struct cgroup *cgrp_ongoing, \
+		 long res, unsigned int race, ktime_t ts),
+
+	TP_ARGS(cgrp, cgrp_ongoing, res, race, ts)
+);
+
+DEFINE_EVENT(cgroup_ongoing, cgroup_ongoing_flusher_wait,
+
+	TP_PROTO(struct cgroup *cgrp, struct cgroup *cgrp_ongoing, \
+		 long res, unsigned int race, ktime_t ts),
+
+	TP_ARGS(cgrp, cgrp_ongoing, res, race, ts)
+);
+
+
 #endif /* _TRACE_CGROUP_H */
 
 /* This part must be outside protection */
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index fe2a81a310bb..2d7af86e31c8 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -321,6 +321,7 @@ static inline void __cgroup_rstat_unlock(struct cgroup *cgrp, int cpu_in_loop)
 static bool cgroup_rstat_trylock_flusher(struct cgroup *cgrp)
 {
 	struct cgroup *ongoing;
+	unsigned int race = 0;
 	bool locked;
 
 	/* Check if ongoing flusher is already taking care of this, if
@@ -330,17 +331,25 @@ static bool cgroup_rstat_trylock_flusher(struct cgroup *cgrp)
 retry:
 	ongoing = READ_ONCE(cgrp_rstat_ongoing_flusher);
 	if (ongoing && cgroup_is_descendant(cgrp, ongoing)) {
-		wait_for_completion_interruptible_timeout(
+		ktime_t ts = ktime_get_mono_fast_ns();
+		long res = 0;
+
+		trace_cgroup_ongoing_flusher(cgrp, ongoing, 0, race, ts);
+
+		res = wait_for_completion_interruptible_timeout(
 			&ongoing->flush_done, MAX_WAIT);
-		/* TODO: Add tracepoint here */
+		trace_cgroup_ongoing_flusher_wait(cgrp, ongoing, res, race, ts);
+
 		return false;
 	}
 
 	locked = __cgroup_rstat_trylock(cgrp, -1);
 	if (!locked) {
 		/* Contended: Handle loosing race for ongoing flusher */
-		if (!ongoing && READ_ONCE(cgrp_rstat_ongoing_flusher))
+		if (!ongoing && READ_ONCE(cgrp_rstat_ongoing_flusher)) {
+			race++;
 			goto retry;
+		}
 
 		__cgroup_rstat_lock(cgrp, -1, false);
 	}




  reply	other threads:[~2024-07-11 13:29 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-07-11 13:28 [PATCH V7 1/2] cgroup/rstat: Avoid thundering herd problem by kswapd across NUMA nodes Jesper Dangaard Brouer
2024-07-11 13:29 ` Jesper Dangaard Brouer [this message]
2024-07-16  8:42 ` Jesper Dangaard Brouer
2024-07-17  0:35   ` Yosry Ahmed
2024-07-17  3:00     ` Waiman Long
2024-07-17 16:05       ` Yosry Ahmed
2024-07-17 16:36     ` Jesper Dangaard Brouer
2024-07-17 16:49       ` Yosry Ahmed
2024-07-18  8:12         ` Jesper Dangaard Brouer
2024-07-18 15:55           ` Yosry Ahmed
2024-07-19  0:40       ` Shakeel Butt
2024-07-19  3:11         ` Yosry Ahmed
2024-07-19 23:01           ` Shakeel Butt
2024-07-19  7:54         ` Jesper Dangaard Brouer
2024-07-19 22:47           ` Shakeel Butt
2024-07-20  4:52             ` Yosry Ahmed
     [not found]               ` <CAJD7tkaypFa3Nk0jh_ZYJX8YB0i7h9VY2YFXMg7GKzSS+f8H5g@mail.gmail.com>
2024-07-20 15:05                 ` Jesper Dangaard Brouer
2024-07-22 20:02               ` Shakeel Butt
2024-07-22 20:12                 ` Yosry Ahmed
2024-07-22 21:32                   ` Shakeel Butt
2024-07-22 22:58                     ` Shakeel Butt
2024-07-23  6:24                       ` Yosry Ahmed
2024-07-17  0:30 ` Yosry Ahmed
2024-07-17  7:32   ` Jesper Dangaard Brouer
2024-07-17 16:31     ` Yosry Ahmed
2024-07-17 18:17       ` Jesper Dangaard Brouer
2024-07-17 18:43         ` Yosry Ahmed
2024-07-19 15:07   ` Jesper Dangaard Brouer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=172070452878.2992819.10548676901200594081.stgit@firesoul \
    --to=hawk@kernel.org \
    --cc=cgroups@vger.kernel.org \
    --cc=hannes@cmpxchg.org \
    --cc=kernel-team@cloudflare.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lizefan.x@bytedance.com \
    --cc=longman@redhat.com \
    --cc=shakeel.butt@linux.dev \
    --cc=tj@kernel.org \
    --cc=yosryahmed@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox