From: Dave Hansen <dave@sr71.net>
To: linux-kernel@vger.kernel.org
Cc: akpm@linux-foundation.org, ak@linux.intel.com,
kirill.shutemov@linux.intel.com, mgorman@suse.de,
alex.shi@linaro.org, x86@kernel.org, linux-mm@kvack.org,
Dave Hansen <dave@sr71.net>
Subject: [PATCH 7/7] big time hack: instrument flush times
Date: Wed, 05 Mar 2014 16:45:31 -0800 [thread overview]
Message-ID: <20140306004531.57EB13AA@viggo.jf.intel.com> (raw)
In-Reply-To: <20140306004519.BBD70A1A@viggo.jf.intel.com>
From: Dave Hansen <dave.hansen@linux.intel.com>
The tracepoint code is a _bit_ too much overhead, so use some
percpu counters to aggregate it instead. Yes, this is racy
and ugly beyond reason, but it was quick to code up.
I'm posting this here because it's interesting to have around,
and if other folks like it, maybe I can get it in to shape to
stick in to mainline.
---
b/arch/x86/mm/tlb.c | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 112 insertions(+)
diff -puN arch/x86/mm/tlb.c~instrument-flush-times arch/x86/mm/tlb.c
--- a/arch/x86/mm/tlb.c~instrument-flush-times 2014-03-05 16:10:11.255122898 -0800
+++ b/arch/x86/mm/tlb.c 2014-03-05 16:10:11.258123035 -0800
@@ -97,6 +97,8 @@ EXPORT_SYMBOL_GPL(leave_mm);
* 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
* 2) Leave the mm if we are in the lazy tlb mode.
*/
+void inc_stat(u64 flush_size, u64 time);
+
static void flush_tlb_func(void *info)
{
struct flush_tlb_info *f = info;
@@ -109,17 +111,23 @@ static void flush_tlb_func(void *info)
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
if (f->flush_end == TLB_FLUSH_ALL) {
+ u64 start_ns = sched_clock();
local_flush_tlb();
+ inc_stat(TLB_FLUSH_ALL, sched_clock() - start_ns);
trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, TLB_FLUSH_ALL);
} else if (!f->flush_end)
__flush_tlb_single(f->flush_start);
else {
+ u64 start_ns;
unsigned long addr;
+ start_ns = sched_clock();
addr = f->flush_start;
while (addr < f->flush_end) {
__flush_tlb_single(addr);
addr += PAGE_SIZE;
}
+ inc_stat((f->flush_end - f->flush_start) / PAGE_SIZE,
+ sched_clock() - start_ns);
}
} else
leave_mm(smp_processor_id());
@@ -164,12 +172,112 @@ void flush_tlb_current_task(void)
preempt_enable();
}
+struct one_tlb_stat {
+ u64 flushes;
+ u64 time;
+};
+
+#define NR_TO_TRACK 1024
+
+struct tlb_stats {
+ struct one_tlb_stat stats[NR_TO_TRACK];
+};
+
+DEFINE_PER_CPU(struct tlb_stats, tlb_stats);
+
+void inc_stat(u64 flush_size, u64 time)
+{
+ struct tlb_stats *thiscpu =
+ &per_cpu(tlb_stats, smp_processor_id());
+ struct one_tlb_stat *stat;
+
+ if (flush_size == TLB_FLUSH_ALL)
+ flush_size = 0;
+ if (flush_size >= NR_TO_TRACK)
+ flush_size = NR_TO_TRACK-1;
+
+ stat = &thiscpu->stats[flush_size];
+ stat->time += time;
+ stat->flushes++;
+}
+
+char printbuf[80 * NR_TO_TRACK];
+static ssize_t tlb_stat_read_file(struct file *file, char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ int cpu;
+ int flush_size;
+ unsigned int len = 0;
+
+ for (flush_size = 0; flush_size < NR_TO_TRACK; flush_size++) {
+ struct one_tlb_stat tot;
+ tot.flushes = 0;
+ tot.time = 0;
+
+ for_each_online_cpu(cpu){
+ struct tlb_stats *thiscpu = &per_cpu(tlb_stats, cpu);
+ struct one_tlb_stat *stat;
+ stat = &thiscpu->stats[flush_size];
+ tot.flushes += stat->flushes;
+ tot.time += stat->time;
+ }
+ if (!tot.flushes)
+ continue;
+ if (flush_size == 0)
+ len += sprintf(&printbuf[len], "[FULL]");
+ else if (flush_size == NR_TO_TRACK-1)
+ len += sprintf(&printbuf[len], "[FBIG]");
+ else
+ len += sprintf(&printbuf[len], "[%d]", flush_size);
+
+ len += sprintf(&printbuf[len], " %lld %lld\n",
+ tot.flushes, tot.time);
+ }
+
+ return simple_read_from_buffer(user_buf, count, ppos, printbuf, len);
+}
+
+static ssize_t tlb_stat_write_file(struct file *file,
+ const char __user *user_buf, size_t count, loff_t *ppos)
+{
+ int cpu;
+ int flush_size;
+
+ for_each_online_cpu(cpu){
+ struct tlb_stats *thiscpu = &per_cpu(tlb_stats, cpu);
+ for (flush_size = 0; flush_size < NR_TO_TRACK; flush_size++) {
+ struct one_tlb_stat *stat;
+ stat = &thiscpu->stats[flush_size];
+ stat->time = 0;
+ stat->flushes = 0;
+ }
+ }
+ return count;
+}
+
+static const struct file_operations fops_tlb_stat = {
+ .read = tlb_stat_read_file,
+ .write = tlb_stat_write_file,
+ .llseek = default_llseek,
+};
+
+static int __init create_tlb_stats(void)
+{
+ debugfs_create_file("tlb_flush_stats", S_IRUSR | S_IWUSR,
+ arch_debugfs_dir, NULL, &fops_tlb_stat);
+ return 0;
+}
+late_initcall(create_tlb_stats);
+
+
/* in units of pages */
unsigned long tlb_single_page_flush_ceiling = 33;
void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
unsigned long end, unsigned long vmflag)
{
+ u64 start_ns = 0;
+ u64 end_ns;
unsigned long addr;
/* do a global flush by default */
unsigned long base_pages_to_flush = TLB_FLUSH_ALL;
@@ -187,6 +295,7 @@ void flush_tlb_mm_range(struct mm_struct
base_pages_to_flush = (end - start) >> PAGE_SHIFT;
trace_tlb_flush(TLB_LOCAL_MM_SHOOTDOWN, base_pages_to_flush);
+ start_ns = sched_clock();
if (base_pages_to_flush > tlb_single_page_flush_ceiling) {
base_pages_to_flush = TLB_FLUSH_ALL;
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
@@ -198,12 +307,15 @@ void flush_tlb_mm_range(struct mm_struct
__flush_tlb_single(addr);
}
}
+ end_ns = sched_clock();
trace_tlb_flush(TLB_LOCAL_MM_SHOOTDOWN_DONE, base_pages_to_flush);
out:
if (base_pages_to_flush == TLB_FLUSH_ALL) {
start = 0UL;
end = TLB_FLUSH_ALL;
}
+ if (start_ns)
+ inc_stat(base_pages_to_flush, end_ns - start_ns);
if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
flush_tlb_others(mm_cpumask(mm), mm, start, end);
preempt_enable();
_
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2014-03-06 0:45 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-03-06 0:45 [PATCH 0/7] x86: rework tlb range flushing code Dave Hansen
2014-03-06 0:45 ` [PATCH 1/7] x86: mm: clean up tlb " Dave Hansen
2014-03-07 0:16 ` Davidlohr Bueso
2014-03-07 0:51 ` Davidlohr Bueso
2014-03-07 0:57 ` Eric Boxer
2014-03-06 0:45 ` [PATCH 2/7] x86: mm: rip out complicated, out-of-date, buggy TLB flushing Dave Hansen
2014-03-06 0:45 ` [PATCH 3/7] x86: mm: fix missed global TLB flush stat Dave Hansen
2014-03-06 0:45 ` [PATCH 4/7] x86: mm: trace tlb flushes Dave Hansen
2014-03-06 0:45 ` [PATCH 5/7] x86: mm: new tunable for single vs full TLB flush Dave Hansen
2014-03-07 1:37 ` Davidlohr Bueso
2014-03-07 17:19 ` Dave Hansen
2014-03-06 0:45 ` [PATCH 6/7] x86: mm: set TLB flush tunable to sane value Dave Hansen
2014-03-07 1:55 ` Davidlohr Bueso
2014-03-07 17:15 ` Dave Hansen
2014-03-08 0:28 ` Davidlohr Bueso
2014-03-06 0:45 ` Dave Hansen [this message]
2014-03-07 0:15 ` [PATCH 0/7] x86: rework tlb range flushing code Davidlohr Bueso
2014-03-10 17:11 Dave Hansen
2014-03-10 17:11 ` [PATCH 7/7] big time hack: instrument flush times Dave Hansen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20140306004531.57EB13AA@viggo.jf.intel.com \
--to=dave@sr71.net \
--cc=ak@linux.intel.com \
--cc=akpm@linux-foundation.org \
--cc=alex.shi@linaro.org \
--cc=kirill.shutemov@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mgorman@suse.de \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox