linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Joe Damato <jdamato@fastly.com>
To: x86@kernel.org, linux-mm@kvack.org,
	Dave Hansen <dave.hansen@linux.intel.com>,
	Andy Lutomirski <luto@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	"H. Peter Anvin" <hpa@zytor.com>,
	Juri Lelli <juri.lelli@redhat.com>,
	Vincent Guittot <vincent.guittot@linaro.org>,
	Dietmar Eggemann <dietmar.eggemann@arm.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Ben Segall <bsegall@google.com>, Mel Gorman <mgorman@suse.de>,
	Daniel Bristot de Oliveira <bristot@redhat.com>,
	Valentin Schneider <vschneid@redhat.com>,
	linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org
Cc: Joe Damato <jdamato@fastly.com>
Subject: [RFC 1/1] mm: Add per-task struct tlb counters
Date: Tue, 13 Sep 2022 18:51:09 -0700	[thread overview]
Message-ID: <1663120270-2673-2-git-send-email-jdamato@fastly.com> (raw)
In-Reply-To: <1663120270-2673-1-git-send-email-jdamato@fastly.com>

TLB shootdowns are tracked globally, but on a busy system it can be
difficult to disambiguate the source of TLB shootdowns.

Add two counter fields:
	- nrtlbflush: number of tlb flush events received
	- ngtlbflush: number of tlb flush events generated

Expose those fields in /proc/[pid]/stat so that they can be analyzed
alongside similar metrics (e.g. min_flt and maj_flt).

Signed-off-by: Joe Damato <jdamato@fastly.com>
---
 arch/x86/mm/tlb.c            | 2 ++
 fs/proc/array.c              | 9 +++++++++
 include/linux/sched.h        | 6 ++++++
 include/linux/sched/signal.h | 1 +
 kernel/exit.c                | 6 ++++++
 kernel/fork.c                | 1 +
 6 files changed, 25 insertions(+)

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index c1e31e9..58f7c59 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -745,6 +745,7 @@ static void flush_tlb_func(void *info)
 	if (!local) {
 		inc_irq_stat(irq_tlb_count);
 		count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+		current->nrtlbflush++;
 
 		/* Can only happen on remote CPUs */
 		if (f->mm && f->mm != loaded_mm)
@@ -895,6 +896,7 @@ STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask,
 	 * would not happen.
 	 */
 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
+	current->ngtlbflush++;
 	if (info->end == TLB_FLUSH_ALL)
 		trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
 	else
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 49283b81..435afdc 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -469,6 +469,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 	unsigned long long start_time;
 	unsigned long cmin_flt = 0, cmaj_flt = 0;
 	unsigned long  min_flt = 0,  maj_flt = 0;
+	unsigned long ngtlbflush = 0, nrtlbflush = 0;
 	u64 cutime, cstime, utime, stime;
 	u64 cgtime, gtime;
 	unsigned long rsslim = 0;
@@ -530,11 +531,15 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 			do {
 				min_flt += t->min_flt;
 				maj_flt += t->maj_flt;
+				ngtlbflush += t->ngtlbflush;
+				nrtlbflush += t->nrtlbflush;
 				gtime += task_gtime(t);
 			} while_each_thread(task, t);
 
 			min_flt += sig->min_flt;
 			maj_flt += sig->maj_flt;
+			ngtlbflush += sig->ngtlbflush;
+			nrtlbflush += sig->nrtlbflush;
 			thread_group_cputime_adjusted(task, &utime, &stime);
 			gtime += sig->gtime;
 
@@ -554,6 +559,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 	if (!whole) {
 		min_flt = task->min_flt;
 		maj_flt = task->maj_flt;
+		nrtlbflush = task->nrtlbflush;
+		ngtlbflush = task->ngtlbflush;
 		task_cputime_adjusted(task, &utime, &stime);
 		gtime = task_gtime(task);
 	}
@@ -643,6 +650,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 	else
 		seq_puts(m, " 0");
 
+	seq_put_decimal_ull(m, " ", ngtlbflush);
+	seq_put_decimal_ull(m, " ", nrtlbflush);
 	seq_putc(m, '\n');
 	if (mm)
 		mmput(mm);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5cdf746..2a0d879 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1047,6 +1047,12 @@ struct task_struct {
 	unsigned long			min_flt;
 	unsigned long			maj_flt;
 
+	/* Number of TLB flushes generated by this task */
+	unsigned long			ngtlbflush;
+
+	/* Number of TLB flushes received by this task */
+	unsigned long			nrtlbflush;
+
 	/* Empty if CONFIG_POSIX_CPUTIMERS=n */
 	struct posix_cputimers		posix_cputimers;
 
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 2009926..4e0b09c 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -189,6 +189,7 @@ struct signal_struct {
 	struct prev_cputime prev_cputime;
 	unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
 	unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
+	unsigned long ngtlbflush, nrtlbflush;
 	unsigned long inblock, oublock, cinblock, coublock;
 	unsigned long maxrss, cmaxrss;
 	struct task_io_accounting ioac;
diff --git a/kernel/exit.c b/kernel/exit.c
index 35e0a31..5a72755 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -141,6 +141,8 @@ static void __exit_signal(struct task_struct *tsk)
 	sig->gtime += task_gtime(tsk);
 	sig->min_flt += tsk->min_flt;
 	sig->maj_flt += tsk->maj_flt;
+	sig->ngtlbflush += tsk->ngtlbflush;
+	sig->nrtlbflush += tsk->nrtlbflush;
 	sig->nvcsw += tsk->nvcsw;
 	sig->nivcsw += tsk->nivcsw;
 	sig->inblock += task_io_get_inblock(tsk);
@@ -1095,6 +1097,10 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 			p->min_flt + sig->min_flt + sig->cmin_flt;
 		psig->cmaj_flt +=
 			p->maj_flt + sig->maj_flt + sig->cmaj_flt;
+		psig->ngtlbflush +=
+			p->ngtlbflush + sig->ngtlbflush;
+		psig->nrtlbflush +=
+			p->nrtlbflush + sig->nrtlbflush;
 		psig->cnvcsw +=
 			p->nvcsw + sig->nvcsw + sig->cnvcsw;
 		psig->cnivcsw +=
diff --git a/kernel/fork.c b/kernel/fork.c
index b339918..5fa9f64 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1555,6 +1555,7 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
 	struct mm_struct *mm, *oldmm;
 
 	tsk->min_flt = tsk->maj_flt = 0;
+	tsk->ngtlbflush = tsk->nrtlbflush = 0;
 	tsk->nvcsw = tsk->nivcsw = 0;
 #ifdef CONFIG_DETECT_HUNG_TASK
 	tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
-- 
2.7.4



  reply	other threads:[~2022-09-14  1:53 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-14  1:51 [RFC 0/1] mm: Track per-task tlb events Joe Damato
2022-09-14  1:51 ` Joe Damato [this message]
2022-09-14  7:40   ` [RFC 1/1] mm: Add per-task struct tlb counters Dave Hansen
2022-09-14 11:58     ` Peter Zijlstra
2022-09-14 14:23       ` Joe Damato
2022-09-14 14:15     ` Joe Damato
2022-09-14 14:25       ` Joe Damato
2022-09-15  8:50       ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1663120270-2673-2-git-send-email-jdamato@fastly.com \
    --to=jdamato@fastly.com \
    --cc=bp@alien8.de \
    --cc=bristot@redhat.com \
    --cc=bsegall@google.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=hpa@zytor.com \
    --cc=juri.lelli@redhat.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    --cc=vincent.guittot@linaro.org \
    --cc=vschneid@redhat.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox